OSDN Git Service

* Fixed guess next line when multiple file input. [Nkf-dev 46]
[nkf/nkf.git] / nkf.c
1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 ** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B
5 ** \e$B!J\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp\e$B!K\e(B
6 ** Copyright (C) 1996,1998
7 ** Copyright (C) 2002
8 ** \e$BO"Mm@h!'\e(B \e$BN05eBg3X>pJs9)3X2J\e(B \e$B2OLn\e(B \e$B??<#\e(B  mime/X0208 support
9 ** \e$B!J\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp\e$B!K\e(B
10 ** \e$BO"Mm@h!'\e(B COW for DOS & Win16 & Win32 & OS/2
11 ** \e$B!J\e(BE-Mail Address: GHG00637@niftyserve.or.p\e$B!K\e(B
12 **
13 **    \e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"\e(B
14 **    \e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#\e(B
15 **    \e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#\e(B
16 **    \e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#\e(B
17 **    \e$B%P%$%J%j$NG[I[$N:]$K$O\e(Bversion message\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#\e(B
18 **    \e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#\e(B
19 **
20 **    Everyone is permitted to do anything on this program
21 **    including copying, modifying, improving,
22 **    as long as you don't try to pretend that you wrote it.
23 **    i.e., the above copyright notice has to appear in all copies.
24 **    Binary distribution requires original version messages.
25 **    You don't have to ask before copying, redistribution or publishing.
26 **    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
28
29 /***********************************************************************
30  * \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
31  * http://sourceforge.jp/projects/nkf/
32 ***********************************************************************/
33 /* $Id: nkf.c,v 1.140 2007/10/02 08:37:46 naruse Exp $ */
34 #define NKF_VERSION "2.0.8"
35 #define NKF_RELEASE_DATE "2007-10-02"
36 #define COPY_RIGHT \
37     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
38     "Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
39
40 #include "config.h"
41 #include "utf8tbl.h"
42 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
43 #define MSDOS
44 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
45 #define __WIN32__
46 #endif
47 #endif
48
49 #ifdef PERL_XS
50 #undef OVERWRITE
51 #endif
52
53 #ifndef PERL_XS
54 #include <stdio.h>
55 #endif
56
57 #include <stdlib.h>
58 #include <string.h>
59
60 #if defined(MSDOS) || defined(__OS2__)
61 #include <fcntl.h>
62 #include <io.h>
63 #if defined(_MSC_VER) || defined(__WATCOMC__)
64 #define mktemp _mktemp
65 #endif
66 #endif
67
68 #ifdef MSDOS
69 #ifdef LSI_C
70 #define setbinmode(fp) fsetbin(fp)
71 #elif defined(__DJGPP__)
72 #include <libc/dosio.h>
73 #define setbinmode(fp) djgpp_setbinmode(fp)
74 #else /* Microsoft C, Turbo C */
75 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
76 #endif
77 #else /* UNIX */
78 #define setbinmode(fp)
79 #endif
80
81 #if defined(__DJGPP__)
82 void  djgpp_setbinmode(FILE *fp)
83 {
84     /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
85     int fd, m;
86     fd = fileno(fp);
87     m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
88     __file_handle_set(fd, m);
89 }
90 #endif
91
92 #ifdef _IOFBF /* SysV and MSDOS, Windows */
93 #define       setvbuffer(fp, buf, size)       setvbuf(fp, buf, _IOFBF, size)
94 #else /* BSD */
95 #define       setvbuffer(fp, buf, size)       setbuffer(fp, buf, size)
96 #endif
97
98 /*Borland C++ 4.5 EasyWin*/
99 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
100 #define         EASYWIN
101 #ifndef __WIN16__
102 #define __WIN16__
103 #endif
104 #include <windows.h>
105 #endif
106
107 #ifdef OVERWRITE
108 /* added by satoru@isoternet.org */
109 #if defined(__EMX__)
110 #include <sys/types.h>
111 #endif
112 #include <sys/stat.h>
113 #if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
114 #include <unistd.h>
115 #if defined(__WATCOMC__)
116 #include <sys/utime.h>
117 #else
118 #include <utime.h>
119 #endif
120 #else /* defined(MSDOS) */
121 #ifdef __WIN32__
122 #ifdef __BORLANDC__ /* BCC32 */
123 #include <utime.h>
124 #else /* !defined(__BORLANDC__) */
125 #include <sys/utime.h>
126 #endif /* (__BORLANDC__) */
127 #else /* !defined(__WIN32__) */
128 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__)  /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
129 #include <sys/utime.h>
130 #elif defined(__TURBOC__) /* BCC */
131 #include <utime.h>
132 #elif defined(LSI_C) /* LSI C */
133 #endif /* (__WIN32__) */
134 #endif
135 #endif
136 #endif
137
138 #define         FALSE   0
139 #define         TRUE    1
140
141 /* state of output_mode and input_mode
142
143    c2           0 means ASCII
144                 X0201
145                 ISO8859_1
146                 X0208
147                 EOF      all termination
148    c1           32bit data
149
150  */
151
152 #define         ASCII           0
153 #define         X0208           1
154 #define         X0201           2
155 #define         ISO8859_1       8
156 #define         NO_X0201        3
157 #define         X0212      0x2844
158 #define         X0213_1    0x284F
159 #define         X0213_2    0x2850
160
161 /* Input Assumption */
162
163 #define         JIS_INPUT       4
164 #define         EUC_INPUT      16
165 #define         SJIS_INPUT      5
166 #define         LATIN1_INPUT    6
167 #define         FIXED_MIME      7
168 #define         STRICT_MIME     8
169
170 /* MIME ENCODE */
171
172 #define         ISO2022JP       9
173 #define         JAPANESE_EUC   10
174 #define         SHIFT_JIS      11
175
176 #define         UTF8           12
177 #define         UTF8_INPUT     13
178 #define         UTF16_INPUT    1015
179 #define         UTF32_INPUT    1017
180
181 /* byte order */
182
183 #define         ENDIAN_BIG      1234
184 #define         ENDIAN_LITTLE   4321
185 #define         ENDIAN_2143     2143
186 #define         ENDIAN_3412     3412
187
188 #define         WISH_TRUE      15
189
190 /* ASCII CODE */
191
192 #define         BS      0x08
193 #define         TAB     0x09
194 #define         LF      0x0a
195 #define         CR      0x0d
196 #define         ESC     0x1b
197 #define         SP      0x20
198 #define         AT      0x40
199 #define         SSP     0xa0
200 #define         DEL     0x7f
201 #define         SI      0x0f
202 #define         SO      0x0e
203 #define         SSO     0x8e
204 #define         SS3     0x8f
205 #define         CRLF    0x0D0A
206
207 #define         is_alnum(c)  \
208             (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
209
210 /* I don't trust portablity of toupper */
211 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
212 #define nkf_isoctal(c)  ('0'<=c && c<='7')
213 #define nkf_isdigit(c)  ('0'<=c && c<='9')
214 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
215 #define nkf_isblank(c) (c == SP || c == TAB)
216 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
217 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
218 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
219 #define nkf_isprint(c) (SP<=c && c<='~')
220 #define nkf_isgraph(c) ('!'<=c && c<='~')
221 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
222                     ('A'<=c&&c<='F') ? (c-'A'+10) : \
223                     ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
224 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
225 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
226
227 #define CP932_TABLE_BEGIN 0xFA
228 #define CP932_TABLE_END   0xFC
229 #define CP932INV_TABLE_BEGIN 0xED
230 #define CP932INV_TABLE_END   0xEE
231 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
232
233 #define         HOLD_SIZE       1024
234 #if defined(INT_IS_SHORT)
235 #define         IOBUF_SIZE      2048
236 #else
237 #define         IOBUF_SIZE      16384
238 #endif
239
240 #define         DEFAULT_J       'B'
241 #define         DEFAULT_R       'B'
242
243 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
244 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
245
246 #define         RANGE_NUM_MAX   18
247 #define         GETA1   0x22
248 #define         GETA2   0x2e
249
250
251 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
252 #define sizeof_euc_to_utf8_1byte 94
253 #define sizeof_euc_to_utf8_2bytes 94
254 #define sizeof_utf8_to_euc_C2 64
255 #define sizeof_utf8_to_euc_E5B8 64
256 #define sizeof_utf8_to_euc_2bytes 112
257 #define sizeof_utf8_to_euc_3bytes 16
258 #endif
259
260 /* MIME preprocessor */
261
262 #ifdef EASYWIN /*Easy Win */
263 extern POINT _BufferSize;
264 #endif
265
266 struct input_code{
267     char *name;
268     nkf_char stat;
269     nkf_char score;
270     nkf_char index;
271     nkf_char buf[3];
272     void (*status_func)(struct input_code *, nkf_char);
273     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
274     int _file_stat;
275 };
276
277 static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
278
279 #ifndef PERL_XS
280 static const char *CopyRight = COPY_RIGHT;
281 #endif
282 #if !defined(PERL_XS) && !defined(WIN32DLL)
283 static  nkf_char     noconvert(FILE *f);
284 #endif
285 static  void    module_connection(void);
286 static  nkf_char     kanji_convert(FILE *f);
287 static  nkf_char     h_conv(FILE *f,nkf_char c2,nkf_char c1);
288 static  nkf_char     push_hold_buf(nkf_char c2);
289 static  void    set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
290 static  nkf_char     s_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
291 static  nkf_char     s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
292 static  nkf_char     e_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
293 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
294 /* UCS Mapping
295  * 0: Shift_JIS, eucJP-ascii
296  * 1: eucJP-ms
297  * 2: CP932, CP51932
298  * 3: CP10001
299  */
300 #define UCS_MAP_ASCII   0
301 #define UCS_MAP_MS      1
302 #define UCS_MAP_CP932   2
303 #define UCS_MAP_CP10001 3
304 static int ms_ucs_map_f = UCS_MAP_ASCII;
305 #endif
306 #ifdef UTF8_INPUT_ENABLE
307 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
308 static  int     no_cp932ext_f = FALSE;
309 /* ignore ZERO WIDTH NO-BREAK SPACE */
310 static  int     no_best_fit_chars_f = FALSE;
311 static  int     input_endian = ENDIAN_BIG;
312 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
313 static  void    nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c);
314 static  void    encode_fallback_html(nkf_char c);
315 static  void    encode_fallback_xml(nkf_char c);
316 static  void    encode_fallback_java(nkf_char c);
317 static  void    encode_fallback_perl(nkf_char c);
318 static  void    encode_fallback_subchar(nkf_char c);
319 static  void    (*encode_fallback)(nkf_char c) = NULL;
320 static  nkf_char     w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
321 static  nkf_char     w_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
322 static  nkf_char     w_iconv16(nkf_char c2,nkf_char c1,nkf_char c0);
323 static  nkf_char     w_iconv32(nkf_char c2,nkf_char c1,nkf_char c0);
324 static  nkf_char        unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
325 static  nkf_char        w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
326 static  void    w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
327 static  nkf_char     ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
328 static  nkf_char     w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
329 static  void    w_status(struct input_code *, nkf_char);
330 #endif
331 #ifdef UTF8_OUTPUT_ENABLE
332 static  int     output_bom_f = FALSE;
333 static  int     output_endian = ENDIAN_BIG;
334 static  nkf_char     e2w_conv(nkf_char c2,nkf_char c1);
335 static  void    w_oconv(nkf_char c2,nkf_char c1);
336 static  void    w_oconv16(nkf_char c2,nkf_char c1);
337 static  void    w_oconv32(nkf_char c2,nkf_char c1);
338 #endif
339 static  void    e_oconv(nkf_char c2,nkf_char c1);
340 static  nkf_char     e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
341 static  void    s_oconv(nkf_char c2,nkf_char c1);
342 static  void    j_oconv(nkf_char c2,nkf_char c1);
343 static  void    fold_conv(nkf_char c2,nkf_char c1);
344 static  void    nl_conv(nkf_char c2,nkf_char c1);
345 static  void    z_conv(nkf_char c2,nkf_char c1);
346 static  void    rot_conv(nkf_char c2,nkf_char c1);
347 static  void    hira_conv(nkf_char c2,nkf_char c1);
348 static  void    base64_conv(nkf_char c2,nkf_char c1);
349 static  void    iso2022jp_check_conv(nkf_char c2,nkf_char c1);
350 static  void    no_connection(nkf_char c2,nkf_char c1);
351 static  nkf_char     no_connection2(nkf_char c2,nkf_char c1,nkf_char c0);
352
353 static  void    code_score(struct input_code *ptr);
354 static  void    code_status(nkf_char c);
355
356 static  void    std_putc(nkf_char c);
357 static  nkf_char     std_getc(FILE *f);
358 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
359
360 static  nkf_char     broken_getc(FILE *f);
361 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
362
363 static  nkf_char     mime_begin(FILE *f);
364 static  nkf_char     mime_getc(FILE *f);
365 static  nkf_char     mime_ungetc(nkf_char c,FILE *f);
366
367 static  void    switch_mime_getc(void);
368 static  void    unswitch_mime_getc(void);
369 static  nkf_char     mime_begin_strict(FILE *f);
370 static  nkf_char     mime_getc_buf(FILE *f);
371 static  nkf_char     mime_ungetc_buf(nkf_char c,FILE *f);
372 static  nkf_char     mime_integrity(FILE *f,const unsigned char *p);
373
374 static  nkf_char     base64decode(nkf_char c);
375 static  void    mime_prechar(nkf_char c2, nkf_char c1);
376 static  void    mime_putc(nkf_char c);
377 static  void    open_mime(nkf_char c);
378 static  void    close_mime(void);
379 static  void    eof_mime(void);
380 static  void    mimeout_addchar(nkf_char c);
381 #ifndef PERL_XS
382 static  void    usage(void);
383 static  void    version(void);
384 #endif
385 static  void    options(unsigned char *c);
386 #if defined(PERL_XS) || defined(WIN32DLL)
387 static  void    reinit(void);
388 #endif
389
390 /* buffers */
391
392 #if !defined(PERL_XS) && !defined(WIN32DLL)
393 static unsigned char   stdibuf[IOBUF_SIZE];
394 static unsigned char   stdobuf[IOBUF_SIZE];
395 #endif
396 static unsigned char   hold_buf[HOLD_SIZE*2];
397 static int             hold_count = 0;
398
399 /* MIME preprocessor fifo */
400
401 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
402 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)
403 #define Fifo(n)         mime_buf[(n)&MIME_BUF_MASK]
404 static unsigned char           mime_buf[MIME_BUF_SIZE];
405 static unsigned int            mime_top = 0;
406 static unsigned int            mime_last = 0;  /* decoded */
407 static unsigned int            mime_input = 0; /* undecoded */
408 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
409
410 /* flags */
411 static int             unbuf_f = FALSE;
412 static int             estab_f = FALSE;
413 static int             nop_f = FALSE;
414 static int             binmode_f = TRUE;       /* binary mode */
415 static int             rot_f = FALSE;          /* rot14/43 mode */
416 static int             hira_f = FALSE;          /* hira/kata henkan */
417 static int             input_f = FALSE;        /* non fixed input code  */
418 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
419 static int             mime_f = STRICT_MIME;   /* convert MIME B base64 or Q */
420 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
421 static int             mimebuf_f = FALSE;      /* MIME buffered input */
422 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
423 static int             iso8859_f = FALSE;      /* ISO8859 through */
424 static int             mimeout_f = FALSE;       /* base64 mode */
425 #if defined(MSDOS) || defined(__OS2__)
426 static int             x0201_f = TRUE;         /* Assume JISX0201 kana */
427 #else
428 static int             x0201_f = NO_X0201;     /* Assume NO JISX0201 */
429 #endif
430 static int             iso2022jp_f = FALSE;    /* convert ISO-2022-JP */
431
432 #ifdef UNICODE_NORMALIZATION
433 static int nfc_f = FALSE;
434 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
435 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
436 static nkf_char nfc_getc(FILE *f);
437 static nkf_char nfc_ungetc(nkf_char c,FILE *f);
438 #endif
439
440 #ifdef INPUT_OPTION
441 static int cap_f = FALSE;
442 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
443 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
444 static nkf_char cap_getc(FILE *f);
445 static nkf_char cap_ungetc(nkf_char c,FILE *f);
446
447 static int url_f = FALSE;
448 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
449 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
450 static nkf_char url_getc(FILE *f);
451 static nkf_char url_ungetc(nkf_char c,FILE *f);
452 #endif
453
454 #if defined(INT_IS_SHORT)
455 #define NKF_INT32_C(n)   (n##L)
456 #else
457 #define NKF_INT32_C(n)   (n)
458 #endif
459 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
460 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
461 #define CLASS_UNICODE   NKF_INT32_C(0x01000000)
462 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
463 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
464 #define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UNICODE)
465 #define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
466
467 #ifdef NUMCHAR_OPTION
468 static int numchar_f = FALSE;
469 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
470 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
471 static nkf_char numchar_getc(FILE *f);
472 static nkf_char numchar_ungetc(nkf_char c,FILE *f);
473 #endif
474
475 #ifdef CHECK_OPTION
476 static int noout_f = FALSE;
477 static void no_putc(nkf_char c);
478 static nkf_char debug_f = FALSE;
479 static void debug(const char *str);
480 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
481 #endif
482
483 static int guess_f = FALSE;
484 #if !defined PERL_XS
485 static  void    print_guessed_code(char *filename);
486 #endif
487 static  void    set_input_codename(char *codename);
488
489 #ifdef EXEC_IO
490 static int exec_f = 0;
491 #endif
492
493 #ifdef SHIFTJIS_CP932
494 /* invert IBM extended characters to others */
495 static int cp51932_f = FALSE;
496
497 /* invert NEC-selected IBM extended characters to IBM extended characters */
498 static int cp932inv_f = TRUE;
499
500 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
501 #endif /* SHIFTJIS_CP932 */
502
503 #ifdef X0212_ENABLE
504 static int x0212_f = FALSE;
505 static nkf_char x0212_shift(nkf_char c);
506 static nkf_char x0212_unshift(nkf_char c);
507 #endif
508 static int x0213_f = FALSE;
509
510 static unsigned char prefix_table[256];
511
512 static void set_code_score(struct input_code *ptr, nkf_char score);
513 static void clr_code_score(struct input_code *ptr, nkf_char score);
514 static void status_disable(struct input_code *ptr);
515 static void status_push_ch(struct input_code *ptr, nkf_char c);
516 static void status_clear(struct input_code *ptr);
517 static void status_reset(struct input_code *ptr);
518 static void status_reinit(struct input_code *ptr);
519 static void status_check(struct input_code *ptr, nkf_char c);
520 static void e_status(struct input_code *, nkf_char);
521 static void s_status(struct input_code *, nkf_char);
522
523 struct input_code input_code_list[] = {
524     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
525     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
526 #ifdef UTF8_INPUT_ENABLE
527     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
528     {"UTF-16",    0, 0, 0, {0, 0, 0},     NULL, w_iconv16, 0},
529     {"UTF-32",    0, 0, 0, {0, 0, 0},     NULL, w_iconv32, 0},
530 #endif
531     {0}
532 };
533
534 static int              mimeout_mode = 0;
535 static int              base64_count = 0;
536
537 /* X0208 -> ASCII converter */
538
539 /* fold parameter */
540 static int             f_line = 0;    /* chars in line */
541 static int             f_prev = 0;
542 static int             fold_preserve_f = FALSE; /* preserve new lines */
543 static int             fold_f  = FALSE;
544 static int             fold_len  = 0;
545
546 /* options */
547 static unsigned char   kanji_intro = DEFAULT_J;
548 static unsigned char   ascii_intro = DEFAULT_R;
549
550 /* Folding */
551
552 #define FOLD_MARGIN  10
553 #define DEFAULT_FOLD 60
554
555 static int             fold_margin  = FOLD_MARGIN;
556
557 /* converters */
558
559 #ifdef DEFAULT_CODE_JIS
560 #   define  DEFAULT_CONV j_oconv
561 #endif
562 #ifdef DEFAULT_CODE_SJIS
563 #   define  DEFAULT_CONV s_oconv
564 #endif
565 #ifdef DEFAULT_CODE_EUC
566 #   define  DEFAULT_CONV e_oconv
567 #endif
568 #ifdef DEFAULT_CODE_UTF8
569 #   define  DEFAULT_CONV w_oconv
570 #endif
571
572 /* process default */
573 static void (*output_conv)(nkf_char c2,nkf_char c1) = DEFAULT_CONV;
574
575 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
576 /* s_iconv or oconv */
577 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
578
579 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
580 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
581 static void (*o_nlconv)(nkf_char c2,nkf_char c1) = no_connection;
582 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
583 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
584 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
585 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
586
587 /* static redirections */
588
589 static  void   (*o_putc)(nkf_char c) = std_putc;
590
591 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
592 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
593
594 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
595 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
596
597 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
598
599 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
600 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
601
602 /* for strict mime */
603 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
604 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
605
606 /* Global states */
607 static int output_mode = ASCII,    /* output kanji mode */
608            input_mode =  ASCII,    /* input kanji mode */
609            shift_mode =  FALSE;    /* TRUE shift out, or X0201  */
610 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
611
612 /* X0201 / X0208 conversion tables */
613
614 /* X0201 kana conversion table */
615 /* 90-9F A0-DF */
616 static const unsigned char cv[]= {
617     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
618     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
619     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
620     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
621     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
622     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
623     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
624     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
625     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
626     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
627     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
628     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
629     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
630     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
631     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
632     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
633     0x00,0x00};
634
635
636 /* X0201 kana conversion table for daguten */
637 /* 90-9F A0-DF */
638 static const unsigned char dv[]= {
639     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
640     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
641     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
642     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
643     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
644     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
645     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
646     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
647     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
648     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
649     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
650     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
651     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
652     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
653     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
654     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
655     0x00,0x00};
656
657 /* X0201 kana conversion table for han-daguten */
658 /* 90-9F A0-DF */
659 static const unsigned char ev[]= {
660     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
661     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
662     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
663     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
664     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
665     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
666     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
667     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
668     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
669     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
670     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
671     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
672     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
673     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
674     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
675     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
676     0x00,0x00};
677
678
679 /* X0208 kigou conversion table */
680 /* 0x8140 - 0x819e */
681 static const unsigned char fv[] = {
682
683     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
684     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
685     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
686     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
687     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
688     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
689     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
690     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
691     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
692     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
693     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
694     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
695 } ;
696
697
698
699 static int             file_out_f = FALSE;
700 #ifdef OVERWRITE
701 static int             overwrite_f = FALSE;
702 static int             preserve_time_f = FALSE;
703 static int             backup_f = FALSE;
704 static char            *backup_suffix = "";
705 static char *get_backup_filename(const char *suffix, const char *filename);
706 #endif
707
708 static int nlmode_f = 0;   /* CR, LF, CRLF */
709 static int input_nextline = 0; /* 0: unestablished, EOF: MIXED */
710 static nkf_char prev_cr = 0; /* CR or 0 */
711 #ifdef EASYWIN /*Easy Win */
712 static int             end_check;
713 #endif /*Easy Win */
714
715 #define STD_GC_BUFSIZE (256)
716 nkf_char std_gc_buf[STD_GC_BUFSIZE];
717 nkf_char std_gc_ndx;
718
719 #ifdef WIN32DLL
720 #include "nkf32dll.c"
721 #elif defined(PERL_XS)
722 #else /* WIN32DLL */
723 int main(int argc, char **argv)
724 {
725     FILE  *fin;
726     unsigned char  *cp;
727
728     char *outfname = NULL;
729     char *origfname;
730
731 #ifdef EASYWIN /*Easy Win */
732     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
733 #endif
734
735     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
736         cp = (unsigned char *)*argv;
737         options(cp);
738 #ifdef EXEC_IO
739         if (exec_f){
740             int fds[2], pid;
741             if (pipe(fds) < 0 || (pid = fork()) < 0){
742                 abort();
743             }
744             if (pid == 0){
745                 if (exec_f > 0){
746                     close(fds[0]);
747                     dup2(fds[1], 1);
748                 }else{
749                     close(fds[1]);
750                     dup2(fds[0], 0);
751                 }
752                 execvp(argv[1], &argv[1]);
753             }
754             if (exec_f > 0){
755                 close(fds[1]);
756                 dup2(fds[0], 0);
757             }else{
758                 close(fds[0]);
759                 dup2(fds[1], 1);
760             }
761             argc = 0;
762             break;
763         }
764 #endif
765     }
766     if(x0201_f == WISH_TRUE)
767          x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
768
769     if (binmode_f == TRUE)
770 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
771     if (freopen("","wb",stdout) == NULL)
772         return (-1);
773 #else
774     setbinmode(stdout);
775 #endif
776
777     if (unbuf_f)
778       setbuf(stdout, (char *) NULL);
779     else
780       setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
781
782     if (argc == 0) {
783       if (binmode_f == TRUE)
784 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
785       if (freopen("","rb",stdin) == NULL) return (-1);
786 #else
787       setbinmode(stdin);
788 #endif
789       setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
790       if (nop_f)
791           noconvert(stdin);
792       else {
793           kanji_convert(stdin);
794           if (guess_f) print_guessed_code(NULL);
795       }
796     } else {
797       int nfiles = argc;
798         int is_argument_error = FALSE;
799       while (argc--) {
800             input_codename = NULL;
801             input_nextline = NULL;
802 #ifdef CHECK_OPTION
803             iconv_for_check = 0;
804 #endif
805           if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
806               perror(*--argv);
807                 *argv++;
808                 is_argument_error = TRUE;
809                 continue;
810           } else {
811 #ifdef OVERWRITE
812               int fd = 0;
813               int fd_backup = 0;
814 #endif
815
816 /* reopen file for stdout */
817               if (file_out_f == TRUE) {
818 #ifdef OVERWRITE
819                   if (overwrite_f){
820                       outfname = malloc(strlen(origfname)
821                                         + strlen(".nkftmpXXXXXX")
822                                         + 1);
823                       if (!outfname){
824                           perror(origfname);
825                           return -1;
826                       }
827                       strcpy(outfname, origfname);
828 #ifdef MSDOS
829                       {
830                           int i;
831                           for (i = strlen(outfname); i; --i){
832                               if (outfname[i - 1] == '/'
833                                   || outfname[i - 1] == '\\'){
834                                   break;
835                               }
836                           }
837                           outfname[i] = '\0';
838                       }
839                       strcat(outfname, "ntXXXXXX");
840                       mktemp(outfname);
841                         fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
842                                 S_IREAD | S_IWRITE);
843 #else
844                       strcat(outfname, ".nkftmpXXXXXX");
845                       fd = mkstemp(outfname);
846 #endif
847                       if (fd < 0
848                           || (fd_backup = dup(fileno(stdout))) < 0
849                           || dup2(fd, fileno(stdout)) < 0
850                           ){
851                           perror(origfname);
852                           return -1;
853                       }
854                   }else
855 #endif
856                   if(argc == 1) {
857                       outfname = *argv++;
858                       argc--;
859                   } else {
860                       outfname = "nkf.out";
861                   }
862
863                   if(freopen(outfname, "w", stdout) == NULL) {
864                       perror (outfname);
865                       return (-1);
866                   }
867                   if (binmode_f == TRUE) {
868 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
869                       if (freopen("","wb",stdout) == NULL)
870                            return (-1);
871 #else
872                       setbinmode(stdout);
873 #endif
874                   }
875               }
876               if (binmode_f == TRUE)
877 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
878                  if (freopen("","rb",fin) == NULL)
879                     return (-1);
880 #else
881                  setbinmode(fin);
882 #endif
883               setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
884               if (nop_f)
885                   noconvert(fin);
886               else {
887                   char *filename = NULL;
888                   kanji_convert(fin);
889                   if (nfiles > 1) filename = origfname;
890                   if (guess_f) print_guessed_code(filename);
891               }
892               fclose(fin);
893 #ifdef OVERWRITE
894               if (overwrite_f) {
895                   struct stat     sb;
896 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
897                   time_t tb[2];
898 #else
899                   struct utimbuf  tb;
900 #endif
901
902                   fflush(stdout);
903                   close(fd);
904                   if (dup2(fd_backup, fileno(stdout)) < 0){
905                       perror("dup2");
906                   }
907                   if (stat(origfname, &sb)) {
908                       fprintf(stderr, "Can't stat %s\n", origfname);
909                   }
910                   /* \e$B%Q!<%_%C%7%g%s$rI|85\e(B */
911                   if (chmod(outfname, sb.st_mode)) {
912                       fprintf(stderr, "Can't set permission %s\n", outfname);
913                   }
914
915                   /* \e$B%?%$%`%9%?%s%W$rI|85\e(B */
916                     if(preserve_time_f){
917 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
918                         tb[0] = tb[1] = sb.st_mtime;
919                         if (utime(outfname, tb)) {
920                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
921                         }
922 #else
923                         tb.actime  = sb.st_atime;
924                         tb.modtime = sb.st_mtime;
925                         if (utime(outfname, &tb)) {
926                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
927                         }
928 #endif
929                     }
930                     if(backup_f){
931                         char *backup_filename = get_backup_filename(backup_suffix, origfname);
932 #ifdef MSDOS
933                         unlink(backup_filename);
934 #endif
935                         if (rename(origfname, backup_filename)) {
936                             perror(backup_filename);
937                             fprintf(stderr, "Can't rename %s to %s\n",
938                                     origfname, backup_filename);
939                         }
940                     }else{
941 #ifdef MSDOS
942                         if (unlink(origfname)){
943                             perror(origfname);
944                         }
945 #endif
946                     }
947                   if (rename(outfname, origfname)) {
948                       perror(origfname);
949                       fprintf(stderr, "Can't rename %s to %s\n",
950                               outfname, origfname);
951                   }
952                   free(outfname);
953               }
954 #endif
955           }
956       }
957         if (is_argument_error)
958             return(-1);
959     }
960 #ifdef EASYWIN /*Easy Win */
961     if (file_out_f == FALSE)
962         scanf("%d",&end_check);
963     else
964         fclose(stdout);
965 #else /* for Other OS */
966     if (file_out_f == TRUE)
967         fclose(stdout);
968 #endif /*Easy Win */
969     return (0);
970 }
971 #endif /* WIN32DLL */
972
973 #ifdef OVERWRITE
974 char *get_backup_filename(const char *suffix, const char *filename)
975 {
976     char *backup_filename;
977     int asterisk_count = 0;
978     int i, j;
979     int filename_length = strlen(filename);
980
981     for(i = 0; suffix[i]; i++){
982         if(suffix[i] == '*') asterisk_count++;
983     }
984
985     if(asterisk_count){
986         backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
987         if (!backup_filename){
988             perror("Can't malloc backup filename.");
989             return NULL;
990         }
991
992         for(i = 0, j = 0; suffix[i];){
993             if(suffix[i] == '*'){
994                 backup_filename[j] = '\0';
995                 strncat(backup_filename, filename, filename_length);
996                 i++;
997                 j += filename_length;
998             }else{
999                 backup_filename[j++] = suffix[i++];
1000             }
1001         }
1002         backup_filename[j] = '\0';
1003     }else{
1004         j = strlen(suffix) + filename_length;
1005         backup_filename = malloc( + 1);
1006         strcpy(backup_filename, filename);
1007         strcat(backup_filename, suffix);
1008         backup_filename[j] = '\0';
1009     }
1010     return backup_filename;
1011 }
1012 #endif
1013
1014 static const struct {
1015     const char *name;
1016     const char *alias;
1017 } long_option[] = {
1018     {"ic=", ""},
1019     {"oc=", ""},
1020     {"base64","jMB"},
1021     {"euc","e"},
1022     {"euc-input","E"},
1023     {"fj","jm"},
1024     {"help","v"},
1025     {"jis","j"},
1026     {"jis-input","J"},
1027     {"mac","sLm"},
1028     {"mime","jM"},
1029     {"mime-input","m"},
1030     {"msdos","sLw"},
1031     {"sjis","s"},
1032     {"sjis-input","S"},
1033     {"unix","eLu"},
1034     {"version","V"},
1035     {"windows","sLw"},
1036     {"hiragana","h1"},
1037     {"katakana","h2"},
1038     {"katakana-hiragana","h3"},
1039     {"guess", "g"},
1040     {"cp932", ""},
1041     {"no-cp932", ""},
1042 #ifdef X0212_ENABLE
1043     {"x0212", ""},
1044 #endif
1045 #ifdef UTF8_OUTPUT_ENABLE
1046     {"utf8", "w"},
1047     {"utf16", "w16"},
1048     {"ms-ucs-map", ""},
1049     {"fb-skip", ""},
1050     {"fb-html", ""},
1051     {"fb-xml", ""},
1052     {"fb-perl", ""},
1053     {"fb-java", ""},
1054     {"fb-subchar", ""},
1055     {"fb-subchar=", ""},
1056 #endif
1057 #ifdef UTF8_INPUT_ENABLE
1058     {"utf8-input", "W"},
1059     {"utf16-input", "W16"},
1060     {"no-cp932ext", ""},
1061     {"no-best-fit-chars",""},
1062 #endif
1063 #ifdef UNICODE_NORMALIZATION
1064     {"utf8mac-input", ""},
1065 #endif
1066 #ifdef OVERWRITE
1067     {"overwrite", ""},
1068     {"overwrite=", ""},
1069     {"in-place", ""},
1070     {"in-place=", ""},
1071 #endif
1072 #ifdef INPUT_OPTION
1073     {"cap-input", ""},
1074     {"url-input", ""},
1075 #endif
1076 #ifdef NUMCHAR_OPTION
1077     {"numchar-input", ""},
1078 #endif
1079 #ifdef CHECK_OPTION
1080     {"no-output", ""},
1081     {"debug", ""},
1082 #endif
1083 #ifdef SHIFTJIS_CP932
1084     {"cp932inv", ""},
1085 #endif
1086 #ifdef EXEC_IO
1087     {"exec-in", ""},
1088     {"exec-out", ""},
1089 #endif
1090     {"prefix=", ""},
1091 };
1092
1093 static int option_mode = 0;
1094
1095 void options(unsigned char *cp)
1096 {
1097     nkf_char i, j;
1098     unsigned char *p;
1099     unsigned char *cp_back = NULL;
1100     char codeset[32];
1101
1102     if (option_mode==1)
1103         return;
1104     while(*cp && *cp++!='-');
1105     while (*cp || cp_back) {
1106         if(!*cp){
1107             cp = cp_back;
1108             cp_back = NULL;
1109             continue;
1110         }
1111         p = 0;
1112         switch (*cp++) {
1113         case '-':  /* literal options */
1114             if (!*cp || *cp == SP) {        /* ignore the rest of arguments */
1115                 option_mode = 1;
1116                 return;
1117             }
1118             for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1119                 p = (unsigned char *)long_option[i].name;
1120                 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1121                 if (*p == cp[j] || cp[j] == SP){
1122                     p = &cp[j] + 1;
1123                     break;
1124                 }
1125                 p = 0;
1126             }
1127             if (p == 0) return;
1128             while(*cp && *cp != SP && cp++);
1129             if (long_option[i].alias[0]){
1130                 cp_back = cp;
1131                 cp = (unsigned char *)long_option[i].alias;
1132             }else{
1133                 if (strcmp(long_option[i].name, "ic=") == 0){
1134                     for (i=0; i < 16 && SP < p[i] && p[i] < DEL; i++){
1135                         codeset[i] = nkf_toupper(p[i]);
1136                     }
1137                     codeset[i] = 0;
1138                     if(strcmp(codeset, "ISO-2022-JP") == 0){
1139                         input_f = JIS_INPUT;
1140                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0 ||
1141                       strcmp(codeset, "CP50220") == 0 ||
1142                       strcmp(codeset, "CP50221") == 0 ||
1143                       strcmp(codeset, "CP50222") == 0){
1144                         input_f = JIS_INPUT;
1145 #ifdef SHIFTJIS_CP932
1146                         cp51932_f = TRUE;
1147 #endif
1148 #ifdef UTF8_OUTPUT_ENABLE
1149                         ms_ucs_map_f = UCS_MAP_CP932;
1150 #endif
1151                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1152                         input_f = JIS_INPUT;
1153 #ifdef X0212_ENABLE
1154                         x0212_f = TRUE;
1155 #endif
1156                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1157                         input_f = JIS_INPUT;
1158 #ifdef X0212_ENABLE
1159                         x0212_f = TRUE;
1160 #endif
1161                         x0213_f = TRUE;
1162                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1163                         input_f = SJIS_INPUT;
1164                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1165                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1166                              strcmp(codeset, "CP932") == 0 ||
1167                              strcmp(codeset, "MS932") == 0){
1168                         input_f = SJIS_INPUT;
1169 #ifdef SHIFTJIS_CP932
1170                         cp51932_f = TRUE;
1171 #endif
1172 #ifdef UTF8_OUTPUT_ENABLE
1173                         ms_ucs_map_f = UCS_MAP_CP932;
1174 #endif
1175                     }else if(strcmp(codeset, "CP10001") == 0){
1176                         input_f = SJIS_INPUT;
1177 #ifdef SHIFTJIS_CP932
1178                         cp51932_f = TRUE;
1179 #endif
1180 #ifdef UTF8_OUTPUT_ENABLE
1181                         ms_ucs_map_f = UCS_MAP_CP10001;
1182 #endif
1183                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1184                              strcmp(codeset, "EUC-JP") == 0){
1185                         input_f = EUC_INPUT;
1186                     }else if(strcmp(codeset, "CP51932") == 0){
1187                         input_f = EUC_INPUT;
1188 #ifdef SHIFTJIS_CP932
1189                         cp51932_f = TRUE;
1190 #endif
1191 #ifdef UTF8_OUTPUT_ENABLE
1192                         ms_ucs_map_f = UCS_MAP_CP932;
1193 #endif
1194                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1195                              strcmp(codeset, "EUCJP-MS") == 0 ||
1196                              strcmp(codeset, "EUCJPMS") == 0){
1197                         input_f = EUC_INPUT;
1198 #ifdef SHIFTJIS_CP932
1199                         cp51932_f = FALSE;
1200 #endif
1201 #ifdef UTF8_OUTPUT_ENABLE
1202                         ms_ucs_map_f = UCS_MAP_MS;
1203 #endif
1204                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1205                              strcmp(codeset, "EUCJP-ASCII") == 0){
1206                         input_f = EUC_INPUT;
1207 #ifdef SHIFTJIS_CP932
1208                         cp51932_f = FALSE;
1209 #endif
1210 #ifdef UTF8_OUTPUT_ENABLE
1211                         ms_ucs_map_f = UCS_MAP_ASCII;
1212 #endif
1213                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1214                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1215                         input_f = SJIS_INPUT;
1216                         x0213_f = TRUE;
1217 #ifdef SHIFTJIS_CP932
1218                         cp51932_f = FALSE;
1219 #endif
1220                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1221                              strcmp(codeset, "EUC-JIS-2004") == 0){
1222                         input_f = EUC_INPUT;
1223                         x0213_f = TRUE;
1224 #ifdef SHIFTJIS_CP932
1225                         cp51932_f = FALSE;
1226 #endif
1227 #ifdef UTF8_INPUT_ENABLE
1228                     }else if(strcmp(codeset, "UTF-8") == 0 ||
1229                              strcmp(codeset, "UTF-8N") == 0 ||
1230                              strcmp(codeset, "UTF-8-BOM") == 0){
1231                         input_f = UTF8_INPUT;
1232 #ifdef UNICODE_NORMALIZATION
1233                     }else if(strcmp(codeset, "UTF8-MAC") == 0 ||
1234                              strcmp(codeset, "UTF-8-MAC") == 0){
1235                         input_f = UTF8_INPUT;
1236                         nfc_f = TRUE;
1237 #endif
1238                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1239                              strcmp(codeset, "UTF-16BE") == 0 ||
1240                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1241                         input_f = UTF16_INPUT;
1242                         input_endian = ENDIAN_BIG;
1243                     }else if(strcmp(codeset, "UTF-16LE") == 0 ||
1244                              strcmp(codeset, "UTF-16LE-BOM") == 0){
1245                         input_f = UTF16_INPUT;
1246                         input_endian = ENDIAN_LITTLE;
1247                     }else if(strcmp(codeset, "UTF-32") == 0 ||
1248                              strcmp(codeset, "UTF-32BE") == 0 ||
1249                              strcmp(codeset, "UTF-32BE-BOM") == 0){
1250                         input_f = UTF32_INPUT;
1251                         input_endian = ENDIAN_BIG;
1252                     }else if(strcmp(codeset, "UTF-32LE") == 0 ||
1253                              strcmp(codeset, "UTF-32LE-BOM") == 0){
1254                         input_f = UTF32_INPUT;
1255                         input_endian = ENDIAN_LITTLE;
1256 #endif
1257                     }
1258                     continue;
1259                 }
1260                 if (strcmp(long_option[i].name, "oc=") == 0){
1261                     x0201_f = FALSE;
1262                     for (i=0; i < 16 && SP < p[i] && p[i] < DEL; i++){
1263                         codeset[i] = nkf_toupper(p[i]);
1264                     }
1265                     codeset[i] = 0;
1266                     if(strcmp(codeset, "ISO-2022-JP") == 0){
1267                         output_conv = j_oconv;
1268                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0){
1269                         output_conv = j_oconv;
1270                         no_cp932ext_f = TRUE;
1271 #ifdef SHIFTJIS_CP932
1272                         cp932inv_f = FALSE;
1273 #endif
1274 #ifdef UTF8_OUTPUT_ENABLE
1275                         ms_ucs_map_f = UCS_MAP_CP932;
1276 #endif
1277                     }else if(strcmp(codeset, "CP50220") == 0){
1278                         output_conv = j_oconv;
1279                         x0201_f = TRUE;
1280 #ifdef SHIFTJIS_CP932
1281                         cp932inv_f = FALSE;
1282 #endif
1283 #ifdef UTF8_OUTPUT_ENABLE
1284                         ms_ucs_map_f = UCS_MAP_CP932;
1285 #endif
1286                     }else if(strcmp(codeset, "CP50221") == 0){
1287                         output_conv = j_oconv;
1288 #ifdef SHIFTJIS_CP932
1289                         cp932inv_f = FALSE;
1290 #endif
1291 #ifdef UTF8_OUTPUT_ENABLE
1292                         ms_ucs_map_f = UCS_MAP_CP932;
1293 #endif
1294                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1295                         output_conv = j_oconv;
1296 #ifdef X0212_ENABLE
1297                         x0212_f = TRUE;
1298 #endif
1299 #ifdef SHIFTJIS_CP932
1300                         cp932inv_f = FALSE;
1301 #endif
1302                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1303                         output_conv = j_oconv;
1304 #ifdef X0212_ENABLE
1305                         x0212_f = TRUE;
1306 #endif
1307                         x0213_f = TRUE;
1308 #ifdef SHIFTJIS_CP932
1309                         cp932inv_f = FALSE;
1310 #endif
1311                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1312                         output_conv = s_oconv;
1313                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1314                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1315                              strcmp(codeset, "CP932") == 0 ||
1316                              strcmp(codeset, "MS932") == 0){
1317                         output_conv = s_oconv;
1318 #ifdef UTF8_OUTPUT_ENABLE
1319                         ms_ucs_map_f = UCS_MAP_CP932;
1320 #endif
1321                     }else if(strcmp(codeset, "CP10001") == 0){
1322                         output_conv = s_oconv;
1323 #ifdef UTF8_OUTPUT_ENABLE
1324                         ms_ucs_map_f = UCS_MAP_CP10001;
1325 #endif
1326                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1327                              strcmp(codeset, "EUC-JP") == 0){
1328                         output_conv = e_oconv;
1329                     }else if(strcmp(codeset, "CP51932") == 0){
1330                         output_conv = e_oconv;
1331 #ifdef SHIFTJIS_CP932
1332                         cp932inv_f = FALSE;
1333 #endif
1334 #ifdef UTF8_OUTPUT_ENABLE
1335                         ms_ucs_map_f = UCS_MAP_CP932;
1336 #endif
1337                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1338                              strcmp(codeset, "EUCJP-MS") == 0 ||
1339                              strcmp(codeset, "EUCJPMS") == 0){
1340                         output_conv = e_oconv;
1341 #ifdef X0212_ENABLE
1342                         x0212_f = TRUE;
1343 #endif
1344 #ifdef UTF8_OUTPUT_ENABLE
1345                         ms_ucs_map_f = UCS_MAP_MS;
1346 #endif
1347                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1348                              strcmp(codeset, "EUCJP-ASCII") == 0){
1349                         output_conv = e_oconv;
1350 #ifdef X0212_ENABLE
1351                         x0212_f = TRUE;
1352 #endif
1353 #ifdef UTF8_OUTPUT_ENABLE
1354                         ms_ucs_map_f = UCS_MAP_ASCII;
1355 #endif
1356                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1357                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1358                         output_conv = s_oconv;
1359                         x0213_f = TRUE;
1360 #ifdef SHIFTJIS_CP932
1361                         cp932inv_f = FALSE;
1362 #endif
1363                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1364                              strcmp(codeset, "EUC-JIS-2004") == 0){
1365                         output_conv = e_oconv;
1366 #ifdef X0212_ENABLE
1367                         x0212_f = TRUE;
1368 #endif
1369                         x0213_f = TRUE;
1370 #ifdef SHIFTJIS_CP932
1371                         cp932inv_f = FALSE;
1372 #endif
1373 #ifdef UTF8_OUTPUT_ENABLE
1374                     }else if(strcmp(codeset, "UTF-8") == 0){
1375                         output_conv = w_oconv;
1376                     }else if(strcmp(codeset, "UTF-8N") == 0){
1377                         output_conv = w_oconv;
1378                     }else if(strcmp(codeset, "UTF-8-BOM") == 0){
1379                         output_conv = w_oconv;
1380                         output_bom_f = TRUE;
1381                     }else if(strcmp(codeset, "UTF-16BE") == 0){
1382                         output_conv = w_oconv16;
1383                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1384                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1385                         output_conv = w_oconv16;
1386                         output_bom_f = TRUE;
1387                     }else if(strcmp(codeset, "UTF-16LE") == 0){
1388                         output_conv = w_oconv16;
1389                         output_endian = ENDIAN_LITTLE;
1390                     }else if(strcmp(codeset, "UTF-16LE-BOM") == 0){
1391                         output_conv = w_oconv16;
1392                         output_endian = ENDIAN_LITTLE;
1393                         output_bom_f = TRUE;
1394                     }else if(strcmp(codeset, "UTF-32") == 0 ||
1395                              strcmp(codeset, "UTF-32BE") == 0){
1396                         output_conv = w_oconv32;
1397                     }else if(strcmp(codeset, "UTF-32BE-BOM") == 0){
1398                         output_conv = w_oconv32;
1399                         output_bom_f = TRUE;
1400                     }else if(strcmp(codeset, "UTF-32LE") == 0){
1401                         output_conv = w_oconv32;
1402                         output_endian = ENDIAN_LITTLE;
1403                     }else if(strcmp(codeset, "UTF-32LE-BOM") == 0){
1404                         output_conv = w_oconv32;
1405                         output_endian = ENDIAN_LITTLE;
1406                         output_bom_f = TRUE;
1407 #endif
1408                     }
1409                     continue;
1410                 }
1411 #ifdef OVERWRITE
1412                 if (strcmp(long_option[i].name, "overwrite") == 0){
1413                     file_out_f = TRUE;
1414                     overwrite_f = TRUE;
1415                     preserve_time_f = TRUE;
1416                     continue;
1417                 }
1418                 if (strcmp(long_option[i].name, "overwrite=") == 0){
1419                     file_out_f = TRUE;
1420                     overwrite_f = TRUE;
1421                     preserve_time_f = TRUE;
1422                     backup_f = TRUE;
1423                     backup_suffix = malloc(strlen((char *) p) + 1);
1424                     strcpy(backup_suffix, (char *) p);
1425                     continue;
1426                 }
1427                 if (strcmp(long_option[i].name, "in-place") == 0){
1428                     file_out_f = TRUE;
1429                     overwrite_f = TRUE;
1430                     preserve_time_f = FALSE;
1431                     continue;
1432                 }
1433                 if (strcmp(long_option[i].name, "in-place=") == 0){
1434                     file_out_f = TRUE;
1435                     overwrite_f = TRUE;
1436                     preserve_time_f = FALSE;
1437                     backup_f = TRUE;
1438                     backup_suffix = malloc(strlen((char *) p) + 1);
1439                     strcpy(backup_suffix, (char *) p);
1440                     continue;
1441                 }
1442 #endif
1443 #ifdef INPUT_OPTION
1444                 if (strcmp(long_option[i].name, "cap-input") == 0){
1445                     cap_f = TRUE;
1446                     continue;
1447                 }
1448                 if (strcmp(long_option[i].name, "url-input") == 0){
1449                     url_f = TRUE;
1450                     continue;
1451                 }
1452 #endif
1453 #ifdef NUMCHAR_OPTION
1454                 if (strcmp(long_option[i].name, "numchar-input") == 0){
1455                     numchar_f = TRUE;
1456                     continue;
1457                 }
1458 #endif
1459 #ifdef CHECK_OPTION
1460                 if (strcmp(long_option[i].name, "no-output") == 0){
1461                     noout_f = TRUE;
1462                     continue;
1463                 }
1464                 if (strcmp(long_option[i].name, "debug") == 0){
1465                     debug_f = TRUE;
1466                     continue;
1467                 }
1468 #endif
1469                 if (strcmp(long_option[i].name, "cp932") == 0){
1470 #ifdef SHIFTJIS_CP932
1471                     cp51932_f = TRUE;
1472                     cp932inv_f = TRUE;
1473 #endif
1474 #ifdef UTF8_OUTPUT_ENABLE
1475                     ms_ucs_map_f = UCS_MAP_CP932;
1476 #endif
1477                     continue;
1478                 }
1479                 if (strcmp(long_option[i].name, "no-cp932") == 0){
1480 #ifdef SHIFTJIS_CP932
1481                     cp51932_f = FALSE;
1482                     cp932inv_f = FALSE;
1483 #endif
1484 #ifdef UTF8_OUTPUT_ENABLE
1485                     ms_ucs_map_f = UCS_MAP_ASCII;
1486 #endif
1487                     continue;
1488                 }
1489 #ifdef SHIFTJIS_CP932
1490                 if (strcmp(long_option[i].name, "cp932inv") == 0){
1491                     cp932inv_f = TRUE;
1492                     continue;
1493                 }
1494 #endif
1495
1496 #ifdef X0212_ENABLE
1497                 if (strcmp(long_option[i].name, "x0212") == 0){
1498                     x0212_f = TRUE;
1499                     continue;
1500                 }
1501 #endif
1502
1503 #ifdef EXEC_IO
1504                   if (strcmp(long_option[i].name, "exec-in") == 0){
1505                       exec_f = 1;
1506                       return;
1507                   }
1508                   if (strcmp(long_option[i].name, "exec-out") == 0){
1509                       exec_f = -1;
1510                       return;
1511                   }
1512 #endif
1513 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1514                 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
1515                     no_cp932ext_f = TRUE;
1516                     continue;
1517                 }
1518                 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
1519                     no_best_fit_chars_f = TRUE;
1520                     continue;
1521                 }
1522                 if (strcmp(long_option[i].name, "fb-skip") == 0){
1523                     encode_fallback = NULL;
1524                     continue;
1525                 }
1526                 if (strcmp(long_option[i].name, "fb-html") == 0){
1527                     encode_fallback = encode_fallback_html;
1528                     continue;
1529                 }
1530                 if (strcmp(long_option[i].name, "fb-xml") == 0){
1531                     encode_fallback = encode_fallback_xml;
1532                     continue;
1533                 }
1534                 if (strcmp(long_option[i].name, "fb-java") == 0){
1535                     encode_fallback = encode_fallback_java;
1536                     continue;
1537                 }
1538                 if (strcmp(long_option[i].name, "fb-perl") == 0){
1539                     encode_fallback = encode_fallback_perl;
1540                     continue;
1541                 }
1542                 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1543                     encode_fallback = encode_fallback_subchar;
1544                     continue;
1545                 }
1546                 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1547                     encode_fallback = encode_fallback_subchar;
1548                     unicode_subchar = 0;
1549                     if (p[0] != '0'){
1550                         /* decimal number */
1551                         for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1552                             unicode_subchar *= 10;
1553                             unicode_subchar += hex2bin(p[i]);
1554                         }
1555                     }else if(p[1] == 'x' || p[1] == 'X'){
1556                         /* hexadecimal number */
1557                         for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1558                             unicode_subchar <<= 4;
1559                             unicode_subchar |= hex2bin(p[i]);
1560                         }
1561                     }else{
1562                         /* octal number */
1563                         for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1564                             unicode_subchar *= 8;
1565                             unicode_subchar += hex2bin(p[i]);
1566                         }
1567                     }
1568                     w16e_conv(unicode_subchar, &i, &j);
1569                     unicode_subchar = i<<8 | j;
1570                     continue;
1571                 }
1572 #endif
1573 #ifdef UTF8_OUTPUT_ENABLE
1574                 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1575                     ms_ucs_map_f = UCS_MAP_MS;
1576                     continue;
1577                 }
1578 #endif
1579 #ifdef UNICODE_NORMALIZATION
1580                 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1581                     input_f = UTF8_INPUT;
1582                     nfc_f = TRUE;
1583                     continue;
1584                 }
1585 #endif
1586                 if (strcmp(long_option[i].name, "prefix=") == 0){
1587                     if (nkf_isgraph(p[0])){
1588                         for (i = 1; nkf_isgraph(p[i]); i++){
1589                             prefix_table[p[i]] = p[0];
1590                         }
1591                     }
1592                     continue;
1593                 }
1594             }
1595             continue;
1596         case 'b':           /* buffered mode */
1597             unbuf_f = FALSE;
1598             continue;
1599         case 'u':           /* non bufferd mode */
1600             unbuf_f = TRUE;
1601             continue;
1602         case 't':           /* transparent mode */
1603             if (*cp=='1') {
1604                 /* alias of -t */
1605                 nop_f = TRUE;
1606                 *cp++;
1607             } else if (*cp=='2') {
1608                 /*
1609                  * -t with put/get
1610                  *
1611                  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
1612                  *
1613                  */
1614                 nop_f = 2;
1615                 *cp++;
1616             } else
1617                 nop_f = TRUE;
1618             continue;
1619         case 'j':           /* JIS output */
1620         case 'n':
1621             output_conv = j_oconv;
1622             continue;
1623         case 'e':           /* AT&T EUC output */
1624             output_conv = e_oconv;
1625             cp932inv_f = FALSE;
1626             continue;
1627         case 's':           /* SJIS output */
1628             output_conv = s_oconv;
1629             continue;
1630         case 'l':           /* ISO8859 Latin-1 support, no conversion */
1631             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
1632             input_f = LATIN1_INPUT;
1633             continue;
1634         case 'i':           /* Kanji IN ESC-$-@/B */
1635             if (*cp=='@'||*cp=='B')
1636                 kanji_intro = *cp++;
1637             continue;
1638         case 'o':           /* ASCII IN ESC-(-J/B */
1639             if (*cp=='J'||*cp=='B'||*cp=='H')
1640                 ascii_intro = *cp++;
1641             continue;
1642         case 'h':
1643             /*
1644                 bit:1   katakana->hiragana
1645                 bit:2   hiragana->katakana
1646             */
1647             if ('9'>= *cp && *cp>='0')
1648                 hira_f |= (*cp++ -'0');
1649             else
1650                 hira_f |= 1;
1651             continue;
1652         case 'r':
1653             rot_f = TRUE;
1654             continue;
1655 #if defined(MSDOS) || defined(__OS2__)
1656         case 'T':
1657             binmode_f = FALSE;
1658             continue;
1659 #endif
1660 #ifndef PERL_XS
1661         case 'V':
1662             version();
1663             exit(1);
1664             break;
1665         case 'v':
1666             usage();
1667             exit(1);
1668             break;
1669 #endif
1670 #ifdef UTF8_OUTPUT_ENABLE
1671         case 'w':           /* UTF-8 output */
1672             if (cp[0] == '8') {
1673                 output_conv = w_oconv; cp++;
1674                 if (cp[0] == '0'){
1675                     cp++;
1676                 } else {
1677                     output_bom_f = TRUE;
1678                 }
1679             } else {
1680                 if ('1'== cp[0] && '6'==cp[1]) {
1681                     output_conv = w_oconv16; cp+=2;
1682                 } else if ('3'== cp[0] && '2'==cp[1]) {
1683                     output_conv = w_oconv32; cp+=2;
1684                 } else {
1685                     output_conv = w_oconv;
1686                     continue;
1687                 }
1688                 if (cp[0]=='L') {
1689                     cp++;
1690                     output_endian = ENDIAN_LITTLE;
1691                 } else if (cp[0] == 'B') {
1692                     cp++;
1693                 } else {
1694                     continue;
1695                 }
1696                 if (cp[0] == '0'){
1697                     cp++;
1698                 } else {
1699                     output_bom_f = TRUE;
1700                 }
1701             }
1702             continue;
1703 #endif
1704 #ifdef UTF8_INPUT_ENABLE
1705         case 'W':           /* UTF input */
1706             if (cp[0] == '8') {
1707                 cp++;
1708                 input_f = UTF8_INPUT;
1709             }else{
1710                 if ('1'== cp[0] && '6'==cp[1]) {
1711                     cp += 2;
1712                     input_f = UTF16_INPUT;
1713                     input_endian = ENDIAN_BIG;
1714                 } else if ('3'== cp[0] && '2'==cp[1]) {
1715                     cp += 2;
1716                     input_f = UTF32_INPUT;
1717                     input_endian = ENDIAN_BIG;
1718                 } else {
1719                     input_f = UTF8_INPUT;
1720                     continue;
1721                 }
1722                 if (cp[0]=='L') {
1723                     cp++;
1724                     input_endian = ENDIAN_LITTLE;
1725                 } else if (cp[0] == 'B') {
1726                     cp++;
1727                 }
1728             }
1729             continue;
1730 #endif
1731         /* Input code assumption */
1732         case 'J':   /* JIS input */
1733             input_f = JIS_INPUT;
1734             continue;
1735         case 'E':   /* AT&T EUC input */
1736             input_f = EUC_INPUT;
1737             continue;
1738         case 'S':   /* MS Kanji input */
1739             input_f = SJIS_INPUT;
1740             if (x0201_f==NO_X0201) x0201_f=TRUE;
1741             continue;
1742         case 'Z':   /* Convert X0208 alphabet to asii */
1743             /* alpha_f
1744                bit:0   Convert JIS X 0208 Alphabet to ASCII
1745                bit:1   Convert Kankaku to one space
1746                bit:2   Convert Kankaku to two spaces
1747                bit:3   Convert HTML Entity
1748                bit:4   Convert JIS X 0208 Katakana to JIS X 0201 Katakana
1749             */
1750             while ('0'<= *cp && *cp <='9') {
1751                 alpha_f |= 1 << (*cp++ - '0');
1752             }
1753             if (!alpha_f) alpha_f = 1;
1754             continue;
1755         case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
1756             x0201_f = FALSE;    /* No X0201->X0208 conversion */
1757             /* accept  X0201
1758                     ESC-(-I     in JIS, EUC, MS Kanji
1759                     SI/SO       in JIS, EUC, MS Kanji
1760                     SSO         in EUC, JIS, not in MS Kanji
1761                     MS Kanji (0xa0-0xdf)
1762                output  X0201
1763                     ESC-(-I     in JIS (0x20-0x5f)
1764                     SSO         in EUC (0xa0-0xdf)
1765                     0xa0-0xd    in MS Kanji (0xa0-0xdf)
1766             */
1767             continue;
1768         case 'X':   /* Assume X0201 kana */
1769             /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1770             x0201_f = TRUE;
1771             continue;
1772         case 'F':   /* prserve new lines */
1773             fold_preserve_f = TRUE;
1774         case 'f':   /* folding -f60 or -f */
1775             fold_f = TRUE;
1776             fold_len = 0;
1777             while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1778                 fold_len *= 10;
1779                 fold_len += *cp++ - '0';
1780             }
1781             if (!(0<fold_len && fold_len<BUFSIZ))
1782                 fold_len = DEFAULT_FOLD;
1783             if (*cp=='-') {
1784                 fold_margin = 0;
1785                 cp++;
1786                 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1787                     fold_margin *= 10;
1788                     fold_margin += *cp++ - '0';
1789                 }
1790             }
1791             continue;
1792         case 'm':   /* MIME support */
1793             /* mime_decode_f = TRUE; */ /* this has too large side effects... */
1794             if (*cp=='B'||*cp=='Q') {
1795                 mime_decode_mode = *cp++;
1796                 mimebuf_f = FIXED_MIME;
1797             } else if (*cp=='N') {
1798                 mime_f = TRUE; cp++;
1799             } else if (*cp=='S') {
1800                 mime_f = STRICT_MIME; cp++;
1801             } else if (*cp=='0') {
1802                 mime_decode_f = FALSE;
1803                 mime_f = FALSE; cp++;
1804             }
1805             continue;
1806         case 'M':   /* MIME output */
1807             if (*cp=='B') {
1808                 mimeout_mode = 'B';
1809                 mimeout_f = FIXED_MIME; cp++;
1810             } else if (*cp=='Q') {
1811                 mimeout_mode = 'Q';
1812                 mimeout_f = FIXED_MIME; cp++;
1813             } else {
1814                 mimeout_f = TRUE;
1815             }
1816             continue;
1817         case 'B':   /* Broken JIS support */
1818             /*  bit:0   no ESC JIS
1819                 bit:1   allow any x on ESC-(-x or ESC-$-x
1820                 bit:2   reset to ascii on NL
1821             */
1822             if ('9'>= *cp && *cp>='0')
1823                 broken_f |= 1<<(*cp++ -'0');
1824             else
1825                 broken_f |= TRUE;
1826             continue;
1827 #ifndef PERL_XS
1828         case 'O':/* for Output file */
1829             file_out_f = TRUE;
1830             continue;
1831 #endif
1832         case 'c':/* add cr code */
1833             nlmode_f = CRLF;
1834             continue;
1835         case 'd':/* delete cr code */
1836             nlmode_f = LF;
1837             continue;
1838         case 'I':   /* ISO-2022-JP output */
1839             iso2022jp_f = TRUE;
1840             continue;
1841         case 'L':  /* line mode */
1842             if (*cp=='u') {         /* unix */
1843                 nlmode_f = LF; cp++;
1844             } else if (*cp=='m') { /* mac */
1845                 nlmode_f = CR; cp++;
1846             } else if (*cp=='w') { /* windows */
1847                 nlmode_f = CRLF; cp++;
1848             } else if (*cp=='0') { /* no conversion  */
1849                 nlmode_f = 0; cp++;
1850             }
1851             continue;
1852         case 'g':
1853 #ifndef PERL_XS
1854             guess_f = TRUE;
1855 #endif
1856             continue;
1857         case SP:
1858         /* module muliple options in a string are allowed for Perl moudle  */
1859             while(*cp && *cp++!='-');
1860             continue;
1861         default:
1862             /* bogus option but ignored */
1863             continue;
1864         }
1865     }
1866 }
1867
1868 struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1869 {
1870     if (iconv_func){
1871         struct input_code *p = input_code_list;
1872         while (p->name){
1873             if (iconv_func == p->iconv_func){
1874                 return p;
1875             }
1876             p++;
1877         }
1878     }
1879     return 0;
1880 }
1881
1882 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1883 {
1884 #ifdef INPUT_CODE_FIX
1885     if (f || !input_f)
1886 #endif
1887         if (estab_f != f){
1888             estab_f = f;
1889         }
1890
1891     if (iconv_func
1892 #ifdef INPUT_CODE_FIX
1893         && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1894 #endif
1895         ){
1896         iconv = iconv_func;
1897     }
1898 #ifdef CHECK_OPTION
1899     if (estab_f && iconv_for_check != iconv){
1900         struct input_code *p = find_inputcode_byfunc(iconv);
1901         if (p){
1902             set_input_codename(p->name);
1903             debug(p->name);
1904         }
1905         iconv_for_check = iconv;
1906     }
1907 #endif
1908 }
1909
1910 #define SCORE_L2       (1)                   /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
1911 #define SCORE_KANA     (SCORE_L2 << 1)       /* \e$B$$$o$f$kH>3Q%+%J\e(B */
1912 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* \e$B5!<o0MB8J8;z\e(B */
1913 #ifdef SHIFTJIS_CP932
1914 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* CP932 \e$B$K$h$kFI$_49$(\e(B */
1915 #define SCORE_NO_EXIST (SCORE_CP932 << 1)    /* \e$BB8:_$7$J$$J8;z\e(B */
1916 #else
1917 #define SCORE_NO_EXIST (SCORE_DEPEND << 1)   /* \e$BB8:_$7$J$$J8;z\e(B */
1918 #endif
1919 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
1920 #define SCORE_ERROR    (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
1921
1922 #define SCORE_INIT (SCORE_iMIME)
1923
1924 static const char score_table_A0[] = {
1925     0, 0, 0, 0,
1926     0, 0, 0, 0,
1927     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1928     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1929 };
1930
1931 static const char score_table_F0[] = {
1932     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1933     SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1934     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1935     SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1936 };
1937
1938 void set_code_score(struct input_code *ptr, nkf_char score)
1939 {
1940     if (ptr){
1941         ptr->score |= score;
1942     }
1943 }
1944
1945 void clr_code_score(struct input_code *ptr, nkf_char score)
1946 {
1947     if (ptr){
1948         ptr->score &= ~score;
1949     }
1950 }
1951
1952 void code_score(struct input_code *ptr)
1953 {
1954     nkf_char c2 = ptr->buf[0];
1955 #ifdef UTF8_OUTPUT_ENABLE
1956     nkf_char c1 = ptr->buf[1];
1957 #endif
1958     if (c2 < 0){
1959         set_code_score(ptr, SCORE_ERROR);
1960     }else if (c2 == SSO){
1961         set_code_score(ptr, SCORE_KANA);
1962 #ifdef UTF8_OUTPUT_ENABLE
1963     }else if (!e2w_conv(c2, c1)){
1964         set_code_score(ptr, SCORE_NO_EXIST);
1965 #endif
1966     }else if ((c2 & 0x70) == 0x20){
1967         set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1968     }else if ((c2 & 0x70) == 0x70){
1969         set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1970     }else if ((c2 & 0x70) >= 0x50){
1971         set_code_score(ptr, SCORE_L2);
1972     }
1973 }
1974
1975 void status_disable(struct input_code *ptr)
1976 {
1977     ptr->stat = -1;
1978     ptr->buf[0] = -1;
1979     code_score(ptr);
1980     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1981 }
1982
1983 void status_push_ch(struct input_code *ptr, nkf_char c)
1984 {
1985     ptr->buf[ptr->index++] = c;
1986 }
1987
1988 void status_clear(struct input_code *ptr)
1989 {
1990     ptr->stat = 0;
1991     ptr->index = 0;
1992 }
1993
1994 void status_reset(struct input_code *ptr)
1995 {
1996     status_clear(ptr);
1997     ptr->score = SCORE_INIT;
1998 }
1999
2000 void status_reinit(struct input_code *ptr)
2001 {
2002     status_reset(ptr);
2003     ptr->_file_stat = 0;
2004 }
2005
2006 void status_check(struct input_code *ptr, nkf_char c)
2007 {
2008     if (c <= DEL && estab_f){
2009         status_reset(ptr);
2010     }
2011 }
2012
2013 void s_status(struct input_code *ptr, nkf_char c)
2014 {
2015     switch(ptr->stat){
2016       case -1:
2017           status_check(ptr, c);
2018           break;
2019       case 0:
2020           if (c <= DEL){
2021               break;
2022 #ifdef NUMCHAR_OPTION
2023           }else if (is_unicode_capsule(c)){
2024               break;
2025 #endif
2026           }else if (0xa1 <= c && c <= 0xdf){
2027               status_push_ch(ptr, SSO);
2028               status_push_ch(ptr, c);
2029               code_score(ptr);
2030               status_clear(ptr);
2031           }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
2032               ptr->stat = 1;
2033               status_push_ch(ptr, c);
2034 #ifdef SHIFTJIS_CP932
2035           }else if (cp51932_f
2036                     && is_ibmext_in_sjis(c)){
2037               ptr->stat = 2;
2038               status_push_ch(ptr, c);
2039 #endif /* SHIFTJIS_CP932 */
2040 #ifdef X0212_ENABLE
2041           }else if (x0212_f && 0xf0 <= c && c <= 0xfc){
2042               ptr->stat = 1;
2043               status_push_ch(ptr, c);
2044 #endif /* X0212_ENABLE */
2045           }else{
2046               status_disable(ptr);
2047           }
2048           break;
2049       case 1:
2050           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2051               status_push_ch(ptr, c);
2052               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2053               code_score(ptr);
2054               status_clear(ptr);
2055           }else{
2056               status_disable(ptr);
2057           }
2058           break;
2059       case 2:
2060 #ifdef SHIFTJIS_CP932
2061           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2062               status_push_ch(ptr, c);
2063               if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
2064                   set_code_score(ptr, SCORE_CP932);
2065                   status_clear(ptr);
2066                   break;
2067               }
2068           }
2069 #endif /* SHIFTJIS_CP932 */
2070 #ifndef X0212_ENABLE
2071           status_disable(ptr);
2072 #endif
2073           break;
2074     }
2075 }
2076
2077 void e_status(struct input_code *ptr, nkf_char c)
2078 {
2079     switch (ptr->stat){
2080       case -1:
2081           status_check(ptr, c);
2082           break;
2083       case 0:
2084           if (c <= DEL){
2085               break;
2086 #ifdef NUMCHAR_OPTION
2087           }else if (is_unicode_capsule(c)){
2088               break;
2089 #endif
2090           }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
2091               ptr->stat = 1;
2092               status_push_ch(ptr, c);
2093 #ifdef X0212_ENABLE
2094           }else if (0x8f == c){
2095               ptr->stat = 2;
2096               status_push_ch(ptr, c);
2097 #endif /* X0212_ENABLE */
2098           }else{
2099               status_disable(ptr);
2100           }
2101           break;
2102       case 1:
2103           if (0xa1 <= c && c <= 0xfe){
2104               status_push_ch(ptr, c);
2105               code_score(ptr);
2106               status_clear(ptr);
2107           }else{
2108               status_disable(ptr);
2109           }
2110           break;
2111 #ifdef X0212_ENABLE
2112       case 2:
2113           if (0xa1 <= c && c <= 0xfe){
2114               ptr->stat = 1;
2115               status_push_ch(ptr, c);
2116           }else{
2117               status_disable(ptr);
2118           }
2119 #endif /* X0212_ENABLE */
2120     }
2121 }
2122
2123 #ifdef UTF8_INPUT_ENABLE
2124 void w_status(struct input_code *ptr, nkf_char c)
2125 {
2126     switch (ptr->stat){
2127       case -1:
2128           status_check(ptr, c);
2129           break;
2130       case 0:
2131           if (c <= DEL){
2132               break;
2133 #ifdef NUMCHAR_OPTION
2134           }else if (is_unicode_capsule(c)){
2135               break;
2136 #endif
2137           }else if (0xc0 <= c && c <= 0xdf){
2138               ptr->stat = 1;
2139               status_push_ch(ptr, c);
2140           }else if (0xe0 <= c && c <= 0xef){
2141               ptr->stat = 2;
2142               status_push_ch(ptr, c);
2143           }else if (0xf0 <= c && c <= 0xf4){
2144               ptr->stat = 3;
2145               status_push_ch(ptr, c);
2146           }else{
2147               status_disable(ptr);
2148           }
2149           break;
2150       case 1:
2151       case 2:
2152           if (0x80 <= c && c <= 0xbf){
2153               status_push_ch(ptr, c);
2154               if (ptr->index > ptr->stat){
2155                   int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2156                              && ptr->buf[2] == 0xbf);
2157                   w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2158                            &ptr->buf[0], &ptr->buf[1]);
2159                   if (!bom){
2160                       code_score(ptr);
2161                   }
2162                   status_clear(ptr);
2163               }
2164           }else{
2165               status_disable(ptr);
2166           }
2167           break;
2168       case 3:
2169         if (0x80 <= c && c <= 0xbf){
2170             if (ptr->index < ptr->stat){
2171                 status_push_ch(ptr, c);
2172             } else {
2173                 status_clear(ptr);
2174             }
2175           }else{
2176               status_disable(ptr);
2177           }
2178           break;
2179     }
2180 }
2181 #endif
2182
2183 void code_status(nkf_char c)
2184 {
2185     int action_flag = 1;
2186     struct input_code *result = 0;
2187     struct input_code *p = input_code_list;
2188     while (p->name){
2189         if (!p->status_func) {
2190             ++p;
2191             continue;
2192         }
2193         if (!p->status_func)
2194             continue;
2195         (p->status_func)(p, c);
2196         if (p->stat > 0){
2197             action_flag = 0;
2198         }else if(p->stat == 0){
2199             if (result){
2200                 action_flag = 0;
2201             }else{
2202                 result = p;
2203             }
2204         }
2205         ++p;
2206     }
2207
2208     if (action_flag){
2209         if (result && !estab_f){
2210             set_iconv(TRUE, result->iconv_func);
2211         }else if (c <= DEL){
2212             struct input_code *ptr = input_code_list;
2213             while (ptr->name){
2214                 status_reset(ptr);
2215                 ++ptr;
2216             }
2217         }
2218     }
2219 }
2220
2221 #ifndef WIN32DLL
2222 nkf_char std_getc(FILE *f)
2223 {
2224     if (std_gc_ndx){
2225         return std_gc_buf[--std_gc_ndx];
2226     }
2227     return getc(f);
2228 }
2229 #endif /*WIN32DLL*/
2230
2231 nkf_char std_ungetc(nkf_char c, FILE *f)
2232 {
2233     if (std_gc_ndx == STD_GC_BUFSIZE){
2234         return EOF;
2235     }
2236     std_gc_buf[std_gc_ndx++] = c;
2237     return c;
2238 }
2239
2240 #ifndef WIN32DLL
2241 void std_putc(nkf_char c)
2242 {
2243     if(c!=EOF)
2244       putchar(c);
2245 }
2246 #endif /*WIN32DLL*/
2247
2248 #if !defined(PERL_XS) && !defined(WIN32DLL)
2249 nkf_char noconvert(FILE *f)
2250 {
2251     nkf_char    c;
2252
2253     if (nop_f == 2)
2254         module_connection();
2255     while ((c = (*i_getc)(f)) != EOF)
2256       (*o_putc)(c);
2257     (*o_putc)(EOF);
2258     return 1;
2259 }
2260 #endif
2261
2262 void module_connection(void)
2263 {
2264     oconv = output_conv;
2265     o_putc = std_putc;
2266
2267     /* replace continucation module, from output side */
2268
2269     /* output redicrection */
2270 #ifdef CHECK_OPTION
2271     if (noout_f || guess_f){
2272         o_putc = no_putc;
2273     }
2274 #endif
2275     if (mimeout_f) {
2276         o_mputc = o_putc;
2277         o_putc = mime_putc;
2278         if (mimeout_f == TRUE) {
2279             o_base64conv = oconv; oconv = base64_conv;
2280         }
2281         /* base64_count = 0; */
2282     }
2283
2284     if (nlmode_f || guess_f) {
2285         o_nlconv = oconv; oconv = nl_conv;
2286     }
2287     if (rot_f) {
2288         o_rot_conv = oconv; oconv = rot_conv;
2289     }
2290     if (iso2022jp_f) {
2291         o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2292     }
2293     if (hira_f) {
2294         o_hira_conv = oconv; oconv = hira_conv;
2295     }
2296     if (fold_f) {
2297         o_fconv = oconv; oconv = fold_conv;
2298         f_line = 0;
2299     }
2300     if (alpha_f || x0201_f) {
2301         o_zconv = oconv; oconv = z_conv;
2302     }
2303
2304     i_getc = std_getc;
2305     i_ungetc = std_ungetc;
2306     /* input redicrection */
2307 #ifdef INPUT_OPTION
2308     if (cap_f){
2309         i_cgetc = i_getc; i_getc = cap_getc;
2310         i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2311     }
2312     if (url_f){
2313         i_ugetc = i_getc; i_getc = url_getc;
2314         i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2315     }
2316 #endif
2317 #ifdef NUMCHAR_OPTION
2318     if (numchar_f){
2319         i_ngetc = i_getc; i_getc = numchar_getc;
2320         i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2321     }
2322 #endif
2323 #ifdef UNICODE_NORMALIZATION
2324     if (nfc_f && input_f == UTF8_INPUT){
2325         i_nfc_getc = i_getc; i_getc = nfc_getc;
2326         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2327     }
2328 #endif
2329     if (mime_f && mimebuf_f==FIXED_MIME) {
2330         i_mgetc = i_getc; i_getc = mime_getc;
2331         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2332     }
2333     if (broken_f & 1) {
2334         i_bgetc = i_getc; i_getc = broken_getc;
2335         i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2336     }
2337     if (input_f == JIS_INPUT || input_f == EUC_INPUT || input_f == LATIN1_INPUT) {
2338         set_iconv(-TRUE, e_iconv);
2339     } else if (input_f == SJIS_INPUT) {
2340         set_iconv(-TRUE, s_iconv);
2341 #ifdef UTF8_INPUT_ENABLE
2342     } else if (input_f == UTF8_INPUT) {
2343         set_iconv(-TRUE, w_iconv);
2344     } else if (input_f == UTF16_INPUT) {
2345         set_iconv(-TRUE, w_iconv16);
2346     } else if (input_f == UTF32_INPUT) {
2347         set_iconv(-TRUE, w_iconv32);
2348 #endif
2349     } else {
2350         set_iconv(FALSE, e_iconv);
2351     }
2352
2353     {
2354         struct input_code *p = input_code_list;
2355         while (p->name){
2356             status_reinit(p++);
2357         }
2358     }
2359 }
2360
2361 /*
2362  * Check and Ignore BOM
2363  */
2364 void check_bom(FILE *f)
2365 {
2366     int c2;
2367     switch(c2 = (*i_getc)(f)){
2368     case 0x00:
2369         if((c2 = (*i_getc)(f)) == 0x00){
2370             if((c2 = (*i_getc)(f)) == 0xFE){
2371                 if((c2 = (*i_getc)(f)) == 0xFF){
2372                     if(!input_f){
2373                         set_iconv(TRUE, w_iconv32);
2374                     }
2375                     if (iconv == w_iconv32) {
2376                         input_endian = ENDIAN_BIG;
2377                         return;
2378                     }
2379                     (*i_ungetc)(0xFF,f);
2380                 }else (*i_ungetc)(c2,f);
2381                 (*i_ungetc)(0xFE,f);
2382             }else if(c2 == 0xFF){
2383                 if((c2 = (*i_getc)(f)) == 0xFE){
2384                     if(!input_f){
2385                         set_iconv(TRUE, w_iconv32);
2386                     }
2387                     if (iconv == w_iconv32) {
2388                         input_endian = ENDIAN_2143;
2389                         return;
2390                     }
2391                     (*i_ungetc)(0xFF,f);
2392                 }else (*i_ungetc)(c2,f);
2393                 (*i_ungetc)(0xFF,f);
2394             }else (*i_ungetc)(c2,f);
2395             (*i_ungetc)(0x00,f);
2396         }else (*i_ungetc)(c2,f);
2397         (*i_ungetc)(0x00,f);
2398         break;
2399     case 0xEF:
2400         if((c2 = (*i_getc)(f)) == 0xBB){
2401             if((c2 = (*i_getc)(f)) == 0xBF){
2402                 if(!input_f){
2403                     set_iconv(TRUE, w_iconv);
2404                 }
2405                 if (iconv == w_iconv) {
2406                     return;
2407                 }
2408                 (*i_ungetc)(0xBF,f);
2409             }else (*i_ungetc)(c2,f);
2410             (*i_ungetc)(0xBB,f);
2411         }else (*i_ungetc)(c2,f);
2412         (*i_ungetc)(0xEF,f);
2413         break;
2414     case 0xFE:
2415         if((c2 = (*i_getc)(f)) == 0xFF){
2416             if((c2 = (*i_getc)(f)) == 0x00){
2417                 if((c2 = (*i_getc)(f)) == 0x00){
2418                     if(!input_f){
2419                         set_iconv(TRUE, w_iconv32);
2420                     }
2421                     if (iconv == w_iconv32) {
2422                         input_endian = ENDIAN_3412;
2423                         return;
2424                     }
2425                     (*i_ungetc)(0x00,f);
2426                 }else (*i_ungetc)(c2,f);
2427                 (*i_ungetc)(0x00,f);
2428             }else (*i_ungetc)(c2,f);
2429             if(!input_f){
2430                 set_iconv(TRUE, w_iconv16);
2431             }
2432             if (iconv == w_iconv16) {
2433                 input_endian = ENDIAN_BIG;
2434                 return;
2435             }
2436             (*i_ungetc)(0xFF,f);
2437         }else (*i_ungetc)(c2,f);
2438         (*i_ungetc)(0xFE,f);
2439         break;
2440     case 0xFF:
2441         if((c2 = (*i_getc)(f)) == 0xFE){
2442             if((c2 = (*i_getc)(f)) == 0x00){
2443                 if((c2 = (*i_getc)(f)) == 0x00){
2444                     if(!input_f){
2445                         set_iconv(TRUE, w_iconv32);
2446                     }
2447                     if (iconv == w_iconv32) {
2448                         input_endian = ENDIAN_LITTLE;
2449                         return;
2450                     }
2451                     (*i_ungetc)(0x00,f);
2452                 }else (*i_ungetc)(c2,f);
2453                 (*i_ungetc)(0x00,f);
2454             }else (*i_ungetc)(c2,f);
2455             if(!input_f){
2456                 set_iconv(TRUE, w_iconv16);
2457             }
2458             if (iconv == w_iconv16) {
2459                 input_endian = ENDIAN_LITTLE;
2460                 return;
2461             }
2462             (*i_ungetc)(0xFE,f);
2463         }else (*i_ungetc)(c2,f);
2464         (*i_ungetc)(0xFF,f);
2465         break;
2466     default:
2467         (*i_ungetc)(c2,f);
2468         break;
2469     }
2470 }
2471
2472 /*
2473    Conversion main loop. Code detection only.
2474  */
2475
2476 nkf_char kanji_convert(FILE *f)
2477 {
2478     nkf_char    c3, c2=0, c1, c0=0;
2479     int is_8bit = FALSE;
2480
2481     if(input_f == SJIS_INPUT || input_f == EUC_INPUT
2482 #ifdef UTF8_INPUT_ENABLE
2483        || input_f == UTF8_INPUT || input_f == UTF16_INPUT
2484 #endif
2485       ){
2486         is_8bit = TRUE;
2487     }
2488
2489     input_mode = ASCII;
2490     output_mode = ASCII;
2491     shift_mode = FALSE;
2492
2493 #define NEXT continue      /* no output, get next */
2494 #define SEND ;             /* output c1 and c2, get next */
2495 #define LAST break         /* end of loop, go closing  */
2496
2497     module_connection();
2498     check_bom(f);
2499
2500     while ((c1 = (*i_getc)(f)) != EOF) {
2501 #ifdef INPUT_CODE_FIX
2502         if (!input_f)
2503 #endif
2504             code_status(c1);
2505         if (c2) {
2506             /* second byte */
2507             if (c2 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2508                 /* in case of 8th bit is on */
2509                 if (!estab_f&&!mime_decode_mode) {
2510                     /* in case of not established yet */
2511                     /* It is still ambiguious */
2512                     if (h_conv(f, c2, c1)==EOF)
2513                         LAST;
2514                     else
2515                         c2 = 0;
2516                     NEXT;
2517                 } else {
2518                     /* in case of already established */
2519                     if (c1 < AT) {
2520                         /* ignore bogus code and not CP5022x UCD */
2521                         c2 = 0;
2522                         NEXT;
2523                     } else {
2524                         SEND;
2525                     }
2526                 }
2527             } else
2528                 /* second byte, 7 bit code */
2529                 /* it might be kanji shitfted */
2530                 if ((c1 == DEL) || (c1 <= SP)) {
2531                     /* ignore bogus first code */
2532                     c2 = 0;
2533                     NEXT;
2534                 } else
2535                     SEND;
2536         } else {
2537             /* first byte */
2538 #ifdef UTF8_INPUT_ENABLE
2539             if (iconv == w_iconv16) {
2540                 if (input_endian == ENDIAN_BIG) {
2541                     c2 = c1;
2542                     if ((c1 = (*i_getc)(f)) != EOF) {
2543                         if (0xD8 <= c2 && c2 <= 0xDB) {
2544                             if ((c0 = (*i_getc)(f)) != EOF) {
2545                                 c0 <<= 8;
2546                                 if ((c3 = (*i_getc)(f)) != EOF) {
2547                                     c0 |= c3;
2548                                 } else c2 = EOF;
2549                             } else c2 = EOF;
2550                         }
2551                     } else c2 = EOF;
2552                 } else {
2553                     if ((c2 = (*i_getc)(f)) != EOF) {
2554                         if (0xD8 <= c2 && c2 <= 0xDB) {
2555                             if ((c3 = (*i_getc)(f)) != EOF) {
2556                                 if ((c0 = (*i_getc)(f)) != EOF) {
2557                                     c0 <<= 8;
2558                                     c0 |= c3;
2559                                 } else c2 = EOF;
2560                             } else c2 = EOF;
2561                         }
2562                     } else c2 = EOF;
2563                 }
2564                 SEND;
2565             } else if(iconv == w_iconv32){
2566                 int c3 = c1;
2567                 if((c2 = (*i_getc)(f)) != EOF &&
2568                    (c1 = (*i_getc)(f)) != EOF &&
2569                    (c0 = (*i_getc)(f)) != EOF){
2570                     switch(input_endian){
2571                     case ENDIAN_BIG:
2572                         c1 = (c2&0xFF)<<16 | (c1&0xFF)<<8 | (c0&0xFF);
2573                         break;
2574                     case ENDIAN_LITTLE:
2575                         c1 = (c3&0xFF) | (c2&0xFF)<<8 | (c1&0xFF)<<16;
2576                         break;
2577                     case ENDIAN_2143:
2578                         c1 = (c3&0xFF)<<16 | (c1&0xFF) | (c0&0xFF)<<8;
2579                         break;
2580                     case ENDIAN_3412:
2581                         c1 = (c3&0xFF)<<8 | (c2&0xFF) | (c0&0xFF)<<16;
2582                         break;
2583                     }
2584                     c2 = 0;
2585                 }else{
2586                     c2 = EOF;
2587                 }
2588                 SEND;
2589             } else
2590 #endif
2591 #ifdef NUMCHAR_OPTION
2592             if (is_unicode_capsule(c1)){
2593                 SEND;
2594             } else
2595 #endif
2596             if (c1 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2597                 /* 8 bit code */
2598                 if (!estab_f && !iso8859_f) {
2599                     /* not established yet */
2600                     c2 = c1;
2601                     NEXT;
2602                 } else { /* estab_f==TRUE */
2603                     if (iso8859_f) {
2604                         c2 = ISO8859_1;
2605                         c1 &= 0x7f;
2606                         SEND;
2607                     } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2608                         /* SJIS X0201 Case... */
2609                         if(iso2022jp_f && x0201_f==NO_X0201) {
2610                             (*oconv)(GETA1, GETA2);
2611                             NEXT;
2612                         } else {
2613                             c2 = X0201;
2614                             c1 &= 0x7f;
2615                             SEND;
2616                         }
2617                     } else if (c1==SSO && iconv != s_iconv) {
2618                         /* EUC X0201 Case */
2619                         c1 = (*i_getc)(f);  /* skip SSO */
2620                         code_status(c1);
2621                         if (SSP<=c1 && c1<0xe0) {
2622                             if(iso2022jp_f &&  x0201_f==NO_X0201) {
2623                                 (*oconv)(GETA1, GETA2);
2624                                 NEXT;
2625                             } else {
2626                                 c2 = X0201;
2627                                 c1 &= 0x7f;
2628                                 SEND;
2629                             }
2630                         } else  { /* bogus code, skip SSO and one byte */
2631                             NEXT;
2632                         }
2633                     } else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
2634                                (c1 == 0xFD || c1 == 0xFE)) {
2635                         /* CP10001 */
2636                         c2 = X0201;
2637                         c1 &= 0x7f;
2638                         SEND;
2639                     } else {
2640                        /* already established */
2641                        c2 = c1;
2642                        NEXT;
2643                     }
2644                 }
2645             } else if ((c1 > SP) && (c1 != DEL)) {
2646                 /* in case of Roman characters */
2647                 if (shift_mode) {
2648                     /* output 1 shifted byte */
2649                     if (iso8859_f) {
2650                         c2 = ISO8859_1;
2651                         SEND;
2652                     } else if (SP <= c1 && c1 < (0xe0&0x7f)){
2653                       /* output 1 shifted byte */
2654                         if(iso2022jp_f && x0201_f==NO_X0201) {
2655                             (*oconv)(GETA1, GETA2);
2656                             NEXT;
2657                         } else {
2658                             c2 = X0201;
2659                             SEND;
2660                         }
2661                     } else {
2662                         /* look like bogus code */
2663                         NEXT;
2664                     }
2665                 } else if (input_mode == X0208 || input_mode == X0212 ||
2666                            input_mode == X0213_1 || input_mode == X0213_2) {
2667                     /* in case of Kanji shifted */
2668                     c2 = c1;
2669                     NEXT;
2670                 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
2671                     /* Check MIME code */
2672                     if ((c1 = (*i_getc)(f)) == EOF) {
2673                         (*oconv)(0, '=');
2674                         LAST;
2675                     } else if (c1 == '?') {
2676                         /* =? is mime conversion start sequence */
2677                         if(mime_f == STRICT_MIME) {
2678                             /* check in real detail */
2679                             if (mime_begin_strict(f) == EOF)
2680                                 LAST;
2681                             else
2682                                 NEXT;
2683                         } else if (mime_begin(f) == EOF)
2684                             LAST;
2685                         else
2686                             NEXT;
2687                     } else {
2688                         (*oconv)(0, '=');
2689                         (*i_ungetc)(c1,f);
2690                         NEXT;
2691                     }
2692                 } else {
2693                     /* normal ASCII code */
2694                     SEND;
2695                 }
2696             } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
2697                 shift_mode = FALSE;
2698                 NEXT;
2699             } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
2700                 shift_mode = TRUE;
2701                 NEXT;
2702             } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
2703                 if ((c1 = (*i_getc)(f)) == EOF) {
2704                     /*  (*oconv)(0, ESC); don't send bogus code */
2705                     LAST;
2706                 } else if (c1 == '$') {
2707                     if ((c1 = (*i_getc)(f)) == EOF) {
2708                         /*
2709                         (*oconv)(0, ESC); don't send bogus code
2710                         (*oconv)(0, '$'); */
2711                         LAST;
2712                     } else if (c1 == '@'|| c1 == 'B') {
2713                         /* This is kanji introduction */
2714                         input_mode = X0208;
2715                         shift_mode = FALSE;
2716                         set_input_codename("ISO-2022-JP");
2717 #ifdef CHECK_OPTION
2718                         debug("ISO-2022-JP");
2719 #endif
2720                         NEXT;
2721                     } else if (c1 == '(') {
2722                         if ((c1 = (*i_getc)(f)) == EOF) {
2723                             /* don't send bogus code
2724                             (*oconv)(0, ESC);
2725                             (*oconv)(0, '$');
2726                             (*oconv)(0, '(');
2727                                 */
2728                             LAST;
2729                         } else if (c1 == '@'|| c1 == 'B') {
2730                             /* This is kanji introduction */
2731                             input_mode = X0208;
2732                             shift_mode = FALSE;
2733                             NEXT;
2734 #ifdef X0212_ENABLE
2735                         } else if (c1 == 'D'){
2736                             input_mode = X0212;
2737                             shift_mode = FALSE;
2738                             NEXT;
2739 #endif /* X0212_ENABLE */
2740                         } else if (c1 == (X0213_1&0x7F)){
2741                             input_mode = X0213_1;
2742                             shift_mode = FALSE;
2743                             NEXT;
2744                         } else if (c1 == (X0213_2&0x7F)){
2745                             input_mode = X0213_2;
2746                             shift_mode = FALSE;
2747                             NEXT;
2748                         } else {
2749                             /* could be some special code */
2750                             (*oconv)(0, ESC);
2751                             (*oconv)(0, '$');
2752                             (*oconv)(0, '(');
2753                             (*oconv)(0, c1);
2754                             NEXT;
2755                         }
2756                     } else if (broken_f&0x2) {
2757                         /* accept any ESC-(-x as broken code ... */
2758                         input_mode = X0208;
2759                         shift_mode = FALSE;
2760                         NEXT;
2761                     } else {
2762                         (*oconv)(0, ESC);
2763                         (*oconv)(0, '$');
2764                         (*oconv)(0, c1);
2765                         NEXT;
2766                     }
2767                 } else if (c1 == '(') {
2768                     if ((c1 = (*i_getc)(f)) == EOF) {
2769                         /* don't send bogus code
2770                         (*oconv)(0, ESC);
2771                         (*oconv)(0, '('); */
2772                         LAST;
2773                     } else {
2774                         if (c1 == 'I') {
2775                             /* This is X0201 kana introduction */
2776                             input_mode = X0201; shift_mode = X0201;
2777                             NEXT;
2778                         } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2779                             /* This is X0208 kanji introduction */
2780                             input_mode = ASCII; shift_mode = FALSE;
2781                             NEXT;
2782                         } else if (broken_f&0x2) {
2783                             input_mode = ASCII; shift_mode = FALSE;
2784                             NEXT;
2785                         } else {
2786                             (*oconv)(0, ESC);
2787                             (*oconv)(0, '(');
2788                             /* maintain various input_mode here */
2789                             SEND;
2790                         }
2791                     }
2792                } else if ( c1 == 'N' || c1 == 'n'){
2793                    /* SS2 */
2794                    c3 = (*i_getc)(f);  /* skip SS2 */
2795                    if ( (SP<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2796                        c1 = c3;
2797                        c2 = X0201;
2798                        SEND;
2799                    }else{
2800                        (*i_ungetc)(c3, f);
2801                        /* lonely ESC  */
2802                        (*oconv)(0, ESC);
2803                        SEND;
2804                    }
2805                 } else {
2806                     /* lonely ESC  */
2807                     (*oconv)(0, ESC);
2808                     SEND;
2809                 }
2810             } else if (c1 == ESC && iconv == s_iconv) {
2811                 /* ESC in Shift_JIS */
2812                 if ((c1 = (*i_getc)(f)) == EOF) {
2813                     /*  (*oconv)(0, ESC); don't send bogus code */
2814                     LAST;
2815                 } else if (c1 == '$') {
2816                     /* J-PHONE emoji */
2817                     if ((c1 = (*i_getc)(f)) == EOF) {
2818                         /*
2819                            (*oconv)(0, ESC); don't send bogus code
2820                            (*oconv)(0, '$'); */
2821                         LAST;
2822                     } else {
2823                         if (('E' <= c1 && c1 <= 'G') ||
2824                             ('O' <= c1 && c1 <= 'Q')) {
2825                             /*
2826                                NUM : 0 1 2 3 4 5
2827                                BYTE: G E F O P Q
2828                                C%7 : 1 6 0 2 3 4
2829                                C%7 : 0 1 2 3 4 5 6
2830                                NUM : 2 0 3 4 5 X 1
2831                              */
2832                             static const char jphone_emoji_first_table[7] = {2, 0, 3, 4, 5, 0, 1};
2833                             c0 = (jphone_emoji_first_table[c1 % 7] << 8) - SP + 0xE000 + CLASS_UNICODE;
2834                             while ((c1 = (*i_getc)(f)) != EOF) {
2835                                 if (SP <= c1 && c1 <= 'z') {
2836                                     (*oconv)(0, c1 + c0);
2837                                 } else break; /* c1 == SO */
2838                             }
2839                         }
2840                     }
2841                     if (c1 == EOF) LAST;
2842                     NEXT;
2843                 } else {
2844                     /* lonely ESC  */
2845                     (*oconv)(0, ESC);
2846                     SEND;
2847                 }
2848             } else if (c1 == LF || c1 == CR) {
2849                 if (broken_f&4) {
2850                     input_mode = ASCII; set_iconv(FALSE, 0);
2851                     SEND;
2852                 } else if (mime_decode_f && !mime_decode_mode){
2853                     if (c1 == LF) {
2854                         if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
2855                             i_ungetc(SP,f);
2856                             continue;
2857                         } else {
2858                             i_ungetc(c1,f);
2859                         }
2860                         c1 = LF;
2861                         SEND;
2862                     } else  { /* if (c1 == CR)*/
2863                         if ((c1=(*i_getc)(f))!=EOF) {
2864                             if (c1==SP) {
2865                                 i_ungetc(SP,f);
2866                                 continue;
2867                             } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
2868                                 i_ungetc(SP,f);
2869                                 continue;
2870                             } else {
2871                                 i_ungetc(c1,f);
2872                             }
2873                             i_ungetc(LF,f);
2874                         } else {
2875                             i_ungetc(c1,f);
2876                         }
2877                         c1 = CR;
2878                         SEND;
2879                     }
2880                 }
2881             } else if (c1 == DEL && input_mode == X0208) {
2882                 /* CP5022x */
2883                 c2 = c1;
2884                 NEXT;
2885             } else
2886                 SEND;
2887         }
2888         /* send: */
2889         switch(input_mode){
2890         case ASCII:
2891             switch ((*iconv)(c2, c1, c0)) {  /* can be EUC / SJIS / UTF-8 / UTF-16 */
2892             case -2:
2893                 /* 4 bytes UTF-8 */
2894                 if ((c0 = (*i_getc)(f)) != EOF) {
2895                     code_status(c0);
2896                     c0 <<= 8;
2897                     if ((c3 = (*i_getc)(f)) != EOF) {
2898                         code_status(c3);
2899                         (*iconv)(c2, c1, c0|c3);
2900                     }
2901                 }
2902                 break;
2903             case -1:
2904                 /* 3 bytes EUC or UTF-8 */
2905                 if ((c0 = (*i_getc)(f)) != EOF) {
2906                     code_status(c0);
2907                     (*iconv)(c2, c1, c0);
2908                 }
2909                 break;
2910             }
2911             break;
2912         case X0208:
2913         case X0213_1:
2914             if (ms_ucs_map_f &&
2915                 0x7F <= c2 && c2 <= 0x92 &&
2916                 0x21 <= c1 && c1 <= 0x7E) {
2917                 /* CP932 UDC */
2918                 if(c1 == 0x7F) return 0;
2919                 c1 = (c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000 + CLASS_UNICODE;
2920                 c2 = 0;
2921             }
2922             (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2923             break;
2924 #ifdef X0212_ENABLE
2925         case X0212:
2926             (*oconv)(PREFIX_EUCG3 | c2, c1);
2927             break;
2928 #endif /* X0212_ENABLE */
2929         case X0213_2:
2930             (*oconv)(PREFIX_EUCG3 | c2, c1);
2931             break;
2932         default:
2933             (*oconv)(input_mode, c1);  /* other special case */
2934         }
2935
2936         c2 = 0;
2937         c0 = 0;
2938         continue;
2939         /* goto next_word */
2940     }
2941
2942     /* epilogue */
2943     (*iconv)(EOF, 0, 0);
2944     if (!input_codename)
2945     {
2946         if (is_8bit) {
2947             struct input_code *p = input_code_list;
2948             struct input_code *result = p;
2949             while (p->name){
2950                 if (p->score < result->score) result = p;
2951                 ++p;
2952             }
2953             set_input_codename(result->name);
2954 #ifdef CHECK_OPTION
2955             debug(result->name);
2956 #endif
2957         }
2958     }
2959     return 1;
2960 }
2961
2962 nkf_char
2963 h_conv(FILE *f, nkf_char c2, nkf_char c1)
2964 {
2965     nkf_char ret, c3, c0;
2966     int hold_index;
2967
2968
2969     /** it must NOT be in the kanji shifte sequence      */
2970     /** it must NOT be written in JIS7                   */
2971     /** and it must be after 2 byte 8bit code            */
2972
2973     hold_count = 0;
2974     push_hold_buf(c2);
2975     push_hold_buf(c1);
2976
2977     while ((c1 = (*i_getc)(f)) != EOF) {
2978         if (c1 == ESC){
2979             (*i_ungetc)(c1,f);
2980             break;
2981         }
2982         code_status(c1);
2983         if (push_hold_buf(c1) == EOF || estab_f){
2984             break;
2985         }
2986     }
2987
2988     if (!estab_f){
2989         struct input_code *p = input_code_list;
2990         struct input_code *result = p;
2991         if (c1 == EOF){
2992             code_status(c1);
2993         }
2994         while (p->name){
2995             if (p->status_func && p->score < result->score){
2996                 result = p;
2997             }
2998             ++p;
2999         }
3000         set_iconv(TRUE, result->iconv_func);
3001     }
3002
3003
3004     /** now,
3005      ** 1) EOF is detected, or
3006      ** 2) Code is established, or
3007      ** 3) Buffer is FULL (but last word is pushed)
3008      **
3009      ** in 1) and 3) cases, we continue to use
3010      ** Kanji codes by oconv and leave estab_f unchanged.
3011      **/
3012
3013     ret = c1;
3014     hold_index = 0;
3015     while (hold_index < hold_count){
3016         c2 = hold_buf[hold_index++];
3017         if (c2 <= DEL
3018 #ifdef NUMCHAR_OPTION
3019             || is_unicode_capsule(c2)
3020 #endif
3021             ){
3022             (*iconv)(0, c2, 0);
3023             continue;
3024         }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
3025             (*iconv)(X0201, c2, 0);
3026             continue;
3027         }
3028         if (hold_index < hold_count){
3029             c1 = hold_buf[hold_index++];
3030         }else{
3031             c1 = (*i_getc)(f);
3032             if (c1 == EOF){
3033                 c3 = EOF;
3034                 break;
3035             }
3036             code_status(c1);
3037         }
3038         c0 = 0;
3039         switch ((*iconv)(c2, c1, 0)) {  /* can be EUC/SJIS/UTF-8 */
3040         case -2:
3041             /* 4 bytes UTF-8 */
3042             if (hold_index < hold_count){
3043                 c0 = hold_buf[hold_index++];
3044             } else if ((c0 = (*i_getc)(f)) == EOF) {
3045                 ret = EOF;
3046                 break;
3047             } else {
3048                 code_status(c0);
3049                 c0 <<= 8;
3050                 if (hold_index < hold_count){
3051                     c3 = hold_buf[hold_index++];
3052                 } else if ((c3 = (*i_getc)(f)) == EOF) {
3053                     c0 = ret = EOF;
3054                     break;
3055                 } else {
3056                     code_status(c3);
3057                     (*iconv)(c2, c1, c0|c3);
3058                 }
3059             }
3060             break;
3061         case -1:
3062             /* 3 bytes EUC or UTF-8 */
3063             if (hold_index < hold_count){
3064                 c0 = hold_buf[hold_index++];
3065             } else if ((c0 = (*i_getc)(f)) == EOF) {
3066                 ret = EOF;
3067                 break;
3068             } else {
3069                 code_status(c0);
3070             }
3071             (*iconv)(c2, c1, c0);
3072             break;
3073         }
3074         if (c0 == EOF) break;
3075     }
3076     return ret;
3077 }
3078
3079 nkf_char push_hold_buf(nkf_char c2)
3080 {
3081     if (hold_count >= HOLD_SIZE*2)
3082         return (EOF);
3083     hold_buf[hold_count++] = (unsigned char)c2;
3084     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3085 }
3086
3087 nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
3088 {
3089 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
3090     nkf_char val;
3091 #endif
3092     static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
3093 #ifdef SHIFTJIS_CP932
3094     if (!cp932inv_f && is_ibmext_in_sjis(c2)){
3095         val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
3096         if (val){
3097             c2 = val >> 8;
3098             c1 = val & 0xff;
3099         }
3100     }
3101     if (cp932inv_f
3102         && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3103         nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3104         if (c){
3105             c2 = c >> 8;
3106             c1 = c & 0xff;
3107         }
3108     }
3109 #endif /* SHIFTJIS_CP932 */
3110 #ifdef X0212_ENABLE
3111     if (!x0213_f && is_ibmext_in_sjis(c2)){
3112         val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
3113         if (val){
3114             if (val > 0x7FFF){
3115                 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
3116                 c1 = val & 0xff;
3117             }else{
3118                 c2 = val >> 8;
3119                 c1 = val & 0xff;
3120             }
3121             if (p2) *p2 = c2;
3122             if (p1) *p1 = c1;
3123             return 0;
3124         }
3125     }
3126 #endif
3127     if(c2 >= 0x80){
3128         if(x0213_f && c2 >= 0xF0){
3129             if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
3130                 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
3131             }else{ /* 78<=k<=94 */
3132                 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
3133                 if (0x9E < c1) c2++;
3134             }
3135         }else{
3136             c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
3137             if (0x9E < c1) c2++;
3138         }
3139         if (c1 < 0x9F)
3140             c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
3141         else {
3142             c1 = c1 - 0x7E;
3143         }
3144     }
3145
3146 #ifdef X0212_ENABLE
3147     c2 = x0212_unshift(c2);
3148 #endif
3149     if (p2) *p2 = c2;
3150     if (p1) *p1 = c1;
3151     return 0;
3152 }
3153
3154 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3155 {
3156     if (c2 == X0201) {
3157         c1 &= 0x7f;
3158     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3159         /* NOP */
3160     } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
3161         /* CP932 UDC */
3162         if(c1 == 0x7F) return 0;
3163         c1 = (c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000 + CLASS_UNICODE;
3164         c2 = 0;
3165     } else {
3166         nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
3167         if (ret) return ret;
3168     }
3169     (*oconv)(c2, c1);
3170     return 0;
3171 }
3172
3173 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3174 {
3175     if (c2 == X0201) {
3176         c1 &= 0x7f;
3177 #ifdef X0212_ENABLE
3178     }else if (c2 == 0x8f){
3179         if (c0 == 0){
3180             return -1;
3181         }
3182         if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
3183             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3184             c1 = (c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC + CLASS_UNICODE;
3185             c2 = 0;
3186         } else {
3187             c2 = (c2 << 8) | (c1 & 0x7f);
3188             c1 = c0 & 0x7f;
3189 #ifdef SHIFTJIS_CP932
3190             if (cp51932_f){
3191                 nkf_char s2, s1;
3192                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3193                     s2e_conv(s2, s1, &c2, &c1);
3194                     if (c2 < 0x100){
3195                         c1 &= 0x7f;
3196                         c2 &= 0x7f;
3197                     }
3198                 }
3199             }
3200 #endif /* SHIFTJIS_CP932 */
3201         }
3202 #endif /* X0212_ENABLE */
3203     } else if (c2 == SSO){
3204         c2 = X0201;
3205         c1 &= 0x7f;
3206     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3207         /* NOP */
3208     } else {
3209         if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
3210             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3211             c1 = (c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000 + CLASS_UNICODE;
3212             c2 = 0;
3213         } else {
3214             c1 &= 0x7f;
3215             c2 &= 0x7f;
3216 #ifdef SHIFTJIS_CP932
3217             if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
3218                 nkf_char s2, s1;
3219                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3220                     s2e_conv(s2, s1, &c2, &c1);
3221                     if (c2 < 0x100){
3222                         c1 &= 0x7f;
3223                         c2 &= 0x7f;
3224                     }
3225                 }
3226             }
3227 #endif /* SHIFTJIS_CP932 */
3228         }
3229     }
3230     (*oconv)(c2, c1);
3231     return 0;
3232 }
3233
3234 #ifdef UTF8_INPUT_ENABLE
3235 nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3236 {
3237     nkf_char ret = 0;
3238
3239     if (!c1){
3240         *p2 = 0;
3241         *p1 = c2;
3242     }else if (0xc0 <= c2 && c2 <= 0xef) {
3243         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3244 #ifdef NUMCHAR_OPTION
3245         if (ret > 0){
3246             if (p2) *p2 = 0;
3247             if (p1) *p1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3248             ret = 0;
3249         }
3250 #endif
3251     }
3252     return ret;
3253 }
3254
3255 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3256 {
3257     nkf_char ret = 0;
3258     static const char w_iconv_utf8_1st_byte[] =
3259     { /* 0xC0 - 0xFF */
3260         20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3261         21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3262         30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
3263         40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
3264
3265     if (c2 < 0 || 0xff < c2) {
3266     }else if (c2 == 0) { /* 0 : 1 byte*/
3267         c0 = 0;
3268     } else if ((c2 & 0xc0) == 0x80) { /* 0x80-0xbf : trail byte */
3269         return 0;
3270     } else{
3271         switch (w_iconv_utf8_1st_byte[c2 - 0xC0]) {
3272         case 21:
3273             if (c1 < 0x80 || 0xBF < c1) return 0;
3274             break;
3275         case 30:
3276             if (c0 == 0) return -1;
3277             if (c1 < 0xA0 || 0xBF < c1 || (c0 & 0xc0) != 0x80)
3278                 return 0;
3279             break;
3280         case 31:
3281         case 33:
3282             if (c0 == 0) return -1;
3283             if ((c1 & 0xc0) != 0x80 || (c0 & 0xc0) != 0x80)
3284                 return 0;
3285             break;
3286         case 32:
3287             if (c0 == 0) return -1;
3288             if (c1 < 0x80 || 0x9F < c1 || (c0 & 0xc0) != 0x80)
3289                 return 0;
3290             break;
3291         case 40:
3292             if (c0 == 0) return -2;
3293             if (c1 < 0x90 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3294                 return 0;
3295             break;
3296         case 41:
3297             if (c0 == 0) return -2;
3298             if (c1 < 0x80 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3299                 return 0;
3300             break;
3301         case 42:
3302             if (c0 == 0) return -2;
3303             if (c1 < 0x80 || 0x8F < c1 || (c0 & 0xc0c0) != 0x8080)
3304                 return 0;
3305             break;
3306         default:
3307             return 0;
3308             break;
3309         }
3310     }
3311     if (c2 == 0 || c2 == EOF){
3312     } else if ((c2 & 0xf8) == 0xf0) { /* 4 bytes */
3313         c1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3314         c2 = 0;
3315     } else {
3316         ret = w2e_conv(c2, c1, c0, &c2, &c1);
3317     }
3318     if (ret == 0){
3319         (*oconv)(c2, c1);
3320     }
3321     return ret;
3322 }
3323 #endif
3324
3325 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3326 void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
3327 {
3328     val &= VALUE_MASK;
3329     if (val < 0x80){
3330         *p2 = val;
3331         *p1 = 0;
3332         *p0 = 0;
3333     }else if (val < 0x800){
3334         *p2 = 0xc0 | (val >> 6);
3335         *p1 = 0x80 | (val & 0x3f);
3336         *p0 = 0;
3337     } else if (val <= NKF_INT32_C(0xFFFF)) {
3338         *p2 = 0xe0 | (val >> 12);
3339         *p1 = 0x80 | ((val >> 6) & 0x3f);
3340         *p0 = 0x80 | (val        & 0x3f);
3341     } else if (val <= NKF_INT32_C(0x10FFFF)) {
3342         *p2 = 0xe0 |  (val >> 16);
3343         *p1 = 0x80 | ((val >> 12) & 0x3f);
3344         *p0 = 0x8080 | ((val << 2) & 0x3f00)| (val & 0x3f);
3345     } else {
3346         *p2 = 0;
3347         *p1 = 0;
3348         *p0 = 0;
3349     }
3350 }
3351 #endif
3352
3353 #ifdef UTF8_INPUT_ENABLE
3354 nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0)
3355 {
3356     nkf_char val;
3357     if (c2 >= 0xf8) {
3358         val = -1;
3359     } else if (c2 >= 0xf0){
3360         /* c2: 1st, c1: 2nd, c0: 3rd/4th */
3361         val = (c2 & 0x0f) << 18;
3362         val |= (c1 & 0x3f) << 12;
3363         val |= (c0 & 0x3f00) >> 2;
3364         val |= (c0 & 0x3f);
3365     }else if (c2 >= 0xe0){
3366         val = (c2 & 0x0f) << 12;
3367         val |= (c1 & 0x3f) << 6;
3368         val |= (c0 & 0x3f);
3369     }else if (c2 >= 0xc0){
3370         val = (c2 & 0x1f) << 6;
3371         val |= (c1 & 0x3f);
3372     }else{
3373         val = c2;
3374     }
3375     return val;
3376 }
3377
3378 nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
3379 {
3380     nkf_char c2, c1, c0;
3381     nkf_char ret = 0;
3382     val &= VALUE_MASK;
3383     if (val < 0x80){
3384         *p2 = 0;
3385         *p1 = val;
3386     }else{
3387         w16w_conv(val, &c2, &c1, &c0);
3388         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3389 #ifdef NUMCHAR_OPTION
3390         if (ret > 0){
3391             *p2 = 0;
3392             *p1 = CLASS_UNICODE | val;
3393             ret = 0;
3394         }
3395 #endif
3396     }
3397     return ret;
3398 }
3399 #endif
3400
3401 #ifdef UTF8_INPUT_ENABLE
3402 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
3403 {
3404     nkf_char ret = 0;
3405     if ((c2==0 && c1 < 0x80) || c2==EOF) {
3406         (*oconv)(c2, c1);
3407         return 0;
3408     }else if (0xD8 <= c2 && c2 <= 0xDB) {
3409         if (c0 < NKF_INT32_C(0xDC00) || NKF_INT32_C(0xDFFF) < c0)
3410             return -2;
3411         c1 =  CLASS_UNICODE | ((c2 << 18) + (c1 << 10) + c0 - NKF_INT32_C(0x35FDC00));
3412         c2 = 0;
3413     }else if ((c2>>3) == 27) { /* unpaired surrogate */
3414         /*
3415            return 2;
3416         */
3417         return 1;
3418     }else ret = w16e_conv(((c2 & 0xff)<<8) + c1, &c2, &c1);
3419     if (ret) return ret;
3420     (*oconv)(c2, c1);
3421     return 0;
3422 }
3423
3424 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
3425 {
3426     int ret = 0;
3427
3428     if ((c2 == 0 && c1 < 0x80) || c2==EOF) {
3429     } else if (is_unicode_bmp(c1)) {
3430         ret = w16e_conv(c1, &c2, &c1);
3431     } else {
3432         c2 = 0;
3433         c1 =  CLASS_UNICODE | c1;
3434     }
3435     if (ret) return ret;
3436     (*oconv)(c2, c1);
3437     return 0;
3438 }
3439
3440 nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3441 {
3442     const unsigned short *const *pp;
3443     const unsigned short *const *const *ppp;
3444     static const char no_best_fit_chars_table_C2[] =
3445     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3446         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3447         1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
3448         0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
3449     static const char no_best_fit_chars_table_C2_ms[] =
3450     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3451         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3452         1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
3453         0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
3454     static const char no_best_fit_chars_table_932_C2[] =
3455     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3456         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3457         1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
3458         0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
3459     static const char no_best_fit_chars_table_932_C3[] =
3460     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3461         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
3462         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3463         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
3464     nkf_char ret = 0;
3465
3466     if(c2 < 0x80){
3467         *p2 = 0;
3468         *p1 = c2;
3469     }else if(c2 < 0xe0){
3470         if(no_best_fit_chars_f){
3471             if(ms_ucs_map_f == UCS_MAP_CP932){
3472                 switch(c2){
3473                 case 0xC2:
3474                     if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
3475                     break;
3476                 case 0xC3:
3477                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3478                     break;
3479                 }
3480             }else if(!cp932inv_f){
3481                 switch(c2){
3482                 case 0xC2:
3483                     if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
3484                     break;
3485                 case 0xC3:
3486                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3487                     break;
3488                 }
3489             }else if(ms_ucs_map_f == UCS_MAP_MS){
3490                 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
3491             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3492                 switch(c2){
3493                 case 0xC2:
3494                     switch(c1){
3495                     case 0xA2:
3496                     case 0xA3:
3497                     case 0xA5:
3498                     case 0xA6:
3499                     case 0xAC:
3500                     case 0xAF:
3501                     case 0xB8:
3502                         return 1;
3503                     }
3504                     break;
3505                 }
3506             }
3507         }
3508         pp =
3509             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
3510             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
3511             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
3512             utf8_to_euc_2bytes;
3513         ret =  w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
3514     }else if(c0 < 0xF0){
3515         if(no_best_fit_chars_f){
3516             if(ms_ucs_map_f == UCS_MAP_CP932){
3517                 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
3518             }else if(ms_ucs_map_f == UCS_MAP_MS){
3519                 switch(c2){
3520                 case 0xE2:
3521                     switch(c1){
3522                     case 0x80:
3523                         if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
3524                         break;
3525                     case 0x88:
3526                         if(c0 == 0x92) return 1;
3527                         break;
3528                     }
3529                     break;
3530                 case 0xE3:
3531                     if(c1 == 0x80 || c0 == 0x9C) return 1;
3532                     break;
3533                 }
3534             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3535                 switch(c2){
3536                 case 0xE3:
3537                     switch(c1){
3538                     case 0x82:
3539                             if(c0 == 0x94) return 1;
3540                         break;
3541                     case 0x83:
3542                             if(c0 == 0xBB) return 1;
3543                         break;
3544                     }
3545                     break;
3546                 }
3547             }else{
3548                 switch(c2){
3549                 case 0xE2:
3550                     switch(c1){
3551                     case 0x80:
3552                         if(c0 == 0x95) return 1;
3553                         break;
3554                     case 0x88:
3555                         if(c0 == 0xA5) return 1;
3556                         break;
3557                     }
3558                     break;
3559                 case 0xEF:
3560                     switch(c1){
3561                     case 0xBC:
3562                         if(c0 == 0x8D) return 1;
3563                         break;
3564                     case 0xBD:
3565                         if(c0 == 0x9E && !cp932inv_f) return 1;
3566                         break;
3567                     case 0xBF:
3568                         if(0xA0 <= c0 && c0 <= 0xA5) return 1;
3569                         break;
3570                     }
3571                     break;
3572                 }
3573             }
3574         }
3575         ppp =
3576             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
3577             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
3578             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
3579             utf8_to_euc_3bytes;
3580         ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
3581     }else return -1;
3582 #ifdef SHIFTJIS_CP932
3583     if (!ret && !cp932inv_f && is_eucg3(*p2)) {
3584         nkf_char s2, s1;
3585         if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
3586             s2e_conv(s2, s1, p2, p1);
3587         }else{
3588             ret = 1;
3589         }
3590     }
3591 #endif
3592     return ret;
3593 }
3594
3595 nkf_char w_iconv_common(nkf_char c1, nkf_char c0, const unsigned short *const *pp, nkf_char psize, nkf_char *p2, nkf_char *p1)
3596 {
3597     nkf_char c2;
3598     const unsigned short *p;
3599     unsigned short val;
3600
3601     if (pp == 0) return 1;
3602
3603     c1 -= 0x80;
3604     if (c1 < 0 || psize <= c1) return 1;
3605     p = pp[c1];
3606     if (p == 0)  return 1;
3607
3608     c0 -= 0x80;
3609     if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
3610     val = p[c0];
3611     if (val == 0) return 1;
3612     if (no_cp932ext_f && (
3613         (val>>8) == 0x2D || /* NEC special characters */
3614         val > NKF_INT32_C(0xF300) /* IBM extended characters */
3615         )) return 1;
3616
3617     c2 = val >> 8;
3618    if (val > 0x7FFF){
3619         c2 &= 0x7f;
3620         c2 |= PREFIX_EUCG3;
3621     }
3622     if (c2 == SO) c2 = X0201;
3623     c1 = val & 0x7f;
3624     if (p2) *p2 = c2;
3625     if (p1) *p1 = c1;
3626     return 0;
3627 }
3628
3629 void nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
3630 {
3631     int shift = 20;
3632     c &= VALUE_MASK;
3633     while(shift >= 0){
3634         if(c >= 1<<shift){
3635             while(shift >= 0){
3636                 (*f)(0, bin2hex(c>>shift));
3637                 shift -= 4;
3638             }
3639         }else{
3640             shift -= 4;
3641         }
3642     }
3643     return;
3644 }
3645
3646 void encode_fallback_html(nkf_char c)
3647 {
3648     (*oconv)(0, '&');
3649     (*oconv)(0, '#');
3650     c &= VALUE_MASK;
3651     if(c >= NKF_INT32_C(1000000))
3652         (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
3653     if(c >= NKF_INT32_C(100000))
3654         (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
3655     if(c >= 10000)
3656         (*oconv)(0, 0x30+(c/10000  )%10);
3657     if(c >= 1000)
3658         (*oconv)(0, 0x30+(c/1000   )%10);
3659     if(c >= 100)
3660         (*oconv)(0, 0x30+(c/100    )%10);
3661     if(c >= 10)
3662         (*oconv)(0, 0x30+(c/10     )%10);
3663     if(c >= 0)
3664         (*oconv)(0, 0x30+ c         %10);
3665     (*oconv)(0, ';');
3666     return;
3667 }
3668
3669 void encode_fallback_xml(nkf_char c)
3670 {
3671     (*oconv)(0, '&');
3672     (*oconv)(0, '#');
3673     (*oconv)(0, 'x');
3674     nkf_each_char_to_hex(oconv, c);
3675     (*oconv)(0, ';');
3676     return;
3677 }
3678
3679 void encode_fallback_java(nkf_char c)
3680 {
3681     (*oconv)(0, '\\');
3682     c &= VALUE_MASK;
3683     if(!is_unicode_bmp(c)){
3684         (*oconv)(0, 'U');
3685         (*oconv)(0, '0');
3686         (*oconv)(0, '0');
3687         (*oconv)(0, bin2hex(c>>20));
3688         (*oconv)(0, bin2hex(c>>16));
3689     }else{
3690         (*oconv)(0, 'u');
3691     }
3692     (*oconv)(0, bin2hex(c>>12));
3693     (*oconv)(0, bin2hex(c>> 8));
3694     (*oconv)(0, bin2hex(c>> 4));
3695     (*oconv)(0, bin2hex(c    ));
3696     return;
3697 }
3698
3699 void encode_fallback_perl(nkf_char c)
3700 {
3701     (*oconv)(0, '\\');
3702     (*oconv)(0, 'x');
3703     (*oconv)(0, '{');
3704     nkf_each_char_to_hex(oconv, c);
3705     (*oconv)(0, '}');
3706     return;
3707 }
3708
3709 void encode_fallback_subchar(nkf_char c)
3710 {
3711     c = unicode_subchar;
3712     (*oconv)((c>>8)&0xFF, c&0xFF);
3713     return;
3714 }
3715 #endif
3716
3717 #ifdef UTF8_OUTPUT_ENABLE
3718 nkf_char e2w_conv(nkf_char c2, nkf_char c1)
3719 {
3720     const unsigned short *p;
3721
3722     if (c2 == X0201) {
3723         if (ms_ucs_map_f == UCS_MAP_CP10001) {
3724             switch (c1) {
3725             case 0x20:
3726                 return 0xA0;
3727             case 0x7D:
3728                 return 0xA9;
3729             }
3730         }
3731         p = euc_to_utf8_1byte;
3732 #ifdef X0212_ENABLE
3733     } else if (is_eucg3(c2)){
3734         if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
3735             return 0xA6;
3736         }
3737         c2 = (c2&0x7f) - 0x21;
3738         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3739             p = x0212_to_utf8_2bytes[c2];
3740         else
3741             return 0;
3742 #endif
3743     } else {
3744         c2 &= 0x7f;
3745         c2 = (c2&0x7f) - 0x21;
3746         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3747             p =
3748                 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
3749                 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
3750                 euc_to_utf8_2bytes_ms[c2];
3751         else
3752             return 0;
3753     }
3754     if (!p) return 0;
3755     c1 = (c1 & 0x7f) - 0x21;
3756     if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
3757         return p[c1];
3758     return 0;
3759 }
3760
3761 void w_oconv(nkf_char c2, nkf_char c1)
3762 {
3763     nkf_char c0;
3764     nkf_char val;
3765
3766     if (output_bom_f) {
3767         output_bom_f = FALSE;
3768         (*o_putc)('\357');
3769         (*o_putc)('\273');
3770         (*o_putc)('\277');
3771     }
3772
3773     if (c2 == EOF) {
3774         (*o_putc)(EOF);
3775         return;
3776     }
3777
3778 #ifdef NUMCHAR_OPTION
3779     if (c2 == 0 && is_unicode_capsule(c1)){
3780         val = c1 & VALUE_MASK;
3781         if (val < 0x80){
3782             (*o_putc)(val);
3783         }else if (val < 0x800){
3784             (*o_putc)(0xC0 | (val >> 6));
3785             (*o_putc)(0x80 | (val & 0x3f));
3786         } else if (val <= NKF_INT32_C(0xFFFF)) {
3787             (*o_putc)(0xE0 | (val >> 12));
3788             (*o_putc)(0x80 | ((val >> 6) & 0x3f));
3789             (*o_putc)(0x80 | (val        & 0x3f));
3790         } else if (val <= NKF_INT32_C(0x10FFFF)) {
3791             (*o_putc)(0xF0 | ( val>>18));
3792             (*o_putc)(0x80 | ((val>>12) & 0x3f));
3793             (*o_putc)(0x80 | ((val>> 6) & 0x3f));
3794             (*o_putc)(0x80 | ( val      & 0x3f));
3795         }
3796         return;
3797     }
3798 #endif
3799
3800     if (c2 == 0) {
3801         output_mode = ASCII;
3802         (*o_putc)(c1);
3803     } else if (c2 == ISO8859_1) {
3804         output_mode = ISO8859_1;
3805         (*o_putc)(c1 | 0x080);
3806     } else {
3807         output_mode = UTF8;
3808         val = e2w_conv(c2, c1);
3809         if (val){
3810             w16w_conv(val, &c2, &c1, &c0);
3811             (*o_putc)(c2);
3812             if (c1){
3813                 (*o_putc)(c1);
3814                 if (c0) (*o_putc)(c0);
3815             }
3816         }
3817     }
3818 }
3819
3820 void w_oconv16(nkf_char c2, nkf_char c1)
3821 {
3822     if (output_bom_f) {
3823         output_bom_f = FALSE;
3824         if (output_endian == ENDIAN_LITTLE){
3825             (*o_putc)((unsigned char)'\377');
3826             (*o_putc)('\376');
3827         }else{
3828             (*o_putc)('\376');
3829             (*o_putc)((unsigned char)'\377');
3830         }
3831     }
3832
3833     if (c2 == EOF) {
3834         (*o_putc)(EOF);
3835         return;
3836     }
3837
3838     if (c2 == ISO8859_1) {
3839         c2 = 0;
3840         c1 |= 0x80;
3841 #ifdef NUMCHAR_OPTION
3842     } else if (c2 == 0 && is_unicode_capsule(c1)) {
3843         if (is_unicode_bmp(c1)) {
3844             c2 = (c1 >> 8) & 0xff;
3845             c1 &= 0xff;
3846         } else {
3847             c1 &= VALUE_MASK;
3848             if (c1 <= UNICODE_MAX) {
3849                 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */
3850                 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
3851                 if (output_endian == ENDIAN_LITTLE){
3852                     (*o_putc)(c2 & 0xff);
3853                     (*o_putc)((c2 >> 8) & 0xff);
3854                     (*o_putc)(c1 & 0xff);
3855                     (*o_putc)((c1 >> 8) & 0xff);
3856                 }else{
3857                     (*o_putc)((c2 >> 8) & 0xff);
3858                     (*o_putc)(c2 & 0xff);
3859                     (*o_putc)((c1 >> 8) & 0xff);
3860                     (*o_putc)(c1 & 0xff);
3861                 }
3862             }
3863             return;
3864         }
3865 #endif
3866     } else if (c2) {
3867         nkf_char val = e2w_conv(c2, c1);
3868         c2 = (val >> 8) & 0xff;
3869         c1 = val & 0xff;
3870         if (!val) return;
3871     }
3872     if (output_endian == ENDIAN_LITTLE){
3873         (*o_putc)(c1);
3874         (*o_putc)(c2);
3875     }else{
3876         (*o_putc)(c2);
3877         (*o_putc)(c1);
3878     }
3879 }
3880
3881 void w_oconv32(nkf_char c2, nkf_char c1)
3882 {
3883     if (output_bom_f) {
3884         output_bom_f = FALSE;
3885         if (output_endian == ENDIAN_LITTLE){
3886             (*o_putc)((unsigned char)'\377');
3887             (*o_putc)('\376');
3888             (*o_putc)('\000');
3889             (*o_putc)('\000');
3890         }else{
3891             (*o_putc)('\000');
3892             (*o_putc)('\000');
3893             (*o_putc)('\376');
3894             (*o_putc)((unsigned char)'\377');
3895         }
3896     }
3897
3898     if (c2 == EOF) {
3899         (*o_putc)(EOF);
3900         return;
3901     }
3902
3903     if (c2 == ISO8859_1) {
3904         c1 |= 0x80;
3905 #ifdef NUMCHAR_OPTION
3906     } else if (c2 == 0 && is_unicode_capsule(c1)) {
3907         c1 &= VALUE_MASK;
3908 #endif
3909     } else if (c2) {
3910         c1 = e2w_conv(c2, c1);
3911         if (!c1) return;
3912     }
3913     if (output_endian == ENDIAN_LITTLE){
3914         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
3915         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
3916         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
3917         (*o_putc)('\000');
3918     }else{
3919         (*o_putc)('\000');
3920         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
3921         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
3922         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
3923     }
3924 }
3925 #endif
3926
3927 void e_oconv(nkf_char c2, nkf_char c1)
3928 {
3929 #ifdef NUMCHAR_OPTION
3930     if (c2 == 0 && is_unicode_capsule(c1)){
3931         w16e_conv(c1, &c2, &c1);
3932         if (c2 == 0 && is_unicode_capsule(c1)){
3933             c2 = c1 & VALUE_MASK;
3934             if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
3935                 /* eucJP-ms UDC */
3936                 c1 &= 0xFFF;
3937                 c2 = c1 / 94;
3938                 c2 += c2 < 10 ? 0x75 : 0x8FEB;
3939                 c1 = 0x21 + c1 % 94;
3940                 if (is_eucg3(c2)){
3941                     (*o_putc)(0x8f);
3942                     (*o_putc)((c2 & 0x7f) | 0x080);
3943                     (*o_putc)(c1 | 0x080);
3944                 }else{
3945                     (*o_putc)((c2 & 0x7f) | 0x080);
3946                     (*o_putc)(c1 | 0x080);
3947                 }
3948                 return;
3949             } else {
3950                 if (encode_fallback) (*encode_fallback)(c1);
3951                 return;
3952             }
3953         }
3954     }
3955 #endif
3956     if (c2 == EOF) {
3957         (*o_putc)(EOF);
3958         return;
3959     } else if (c2 == 0) {
3960         output_mode = ASCII;
3961         (*o_putc)(c1);
3962     } else if (c2 == X0201) {
3963         output_mode = JAPANESE_EUC;
3964         (*o_putc)(SSO); (*o_putc)(c1|0x80);
3965     } else if (c2 == ISO8859_1) {
3966         output_mode = ISO8859_1;
3967         (*o_putc)(c1 | 0x080);
3968 #ifdef X0212_ENABLE
3969     } else if (is_eucg3(c2)){
3970         output_mode = JAPANESE_EUC;
3971 #ifdef SHIFTJIS_CP932
3972         if (!cp932inv_f){
3973             nkf_char s2, s1;
3974             if (e2s_conv(c2, c1, &s2, &s1) == 0){
3975                 s2e_conv(s2, s1, &c2, &c1);
3976             }
3977         }
3978 #endif
3979         if (c2 == 0) {
3980             output_mode = ASCII;
3981             (*o_putc)(c1);
3982         }else if (is_eucg3(c2)){
3983             if (x0212_f){
3984                 (*o_putc)(0x8f);
3985                 (*o_putc)((c2 & 0x7f) | 0x080);
3986                 (*o_putc)(c1 | 0x080);
3987             }
3988         }else{
3989             (*o_putc)((c2 & 0x7f) | 0x080);
3990             (*o_putc)(c1 | 0x080);
3991         }
3992 #endif
3993     } else {
3994         if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
3995             set_iconv(FALSE, 0);
3996             return; /* too late to rescue this char */
3997         }
3998         output_mode = JAPANESE_EUC;
3999         (*o_putc)(c2 | 0x080);
4000         (*o_putc)(c1 | 0x080);
4001     }
4002 }
4003
4004 #ifdef X0212_ENABLE
4005 nkf_char x0212_shift(nkf_char c)
4006 {
4007     nkf_char ret = c;
4008     c &= 0x7f;
4009     if (is_eucg3(ret)){
4010         if (0x75 <= c && c <= 0x7f){
4011             ret = c + (0x109 - 0x75);
4012         }
4013     }else{
4014         if (0x75 <= c && c <= 0x7f){
4015             ret = c + (0x113 - 0x75);
4016         }
4017     }
4018     return ret;
4019 }
4020
4021
4022 nkf_char x0212_unshift(nkf_char c)
4023 {
4024     nkf_char ret = c;
4025     if (0x7f <= c && c <= 0x88){
4026         ret = c + (0x75 - 0x7f);
4027     }else if (0x89 <= c && c <= 0x92){
4028         ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
4029     }
4030     return ret;
4031 }
4032 #endif /* X0212_ENABLE */
4033
4034 nkf_char e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
4035 {
4036     nkf_char ndx;
4037     if (is_eucg3(c2)){
4038         ndx = c2 & 0x7f;
4039         if (x0213_f){
4040             if((0x21 <= ndx && ndx <= 0x2F)){
4041                 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
4042                 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4043                 return 0;
4044             }else if(0x6E <= ndx && ndx <= 0x7E){
4045                 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
4046                 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4047                 return 0;
4048             }
4049             return 1;
4050         }
4051 #ifdef X0212_ENABLE
4052         else if(nkf_isgraph(ndx)){
4053             nkf_char val = 0;
4054             const unsigned short *ptr;
4055             ptr = x0212_shiftjis[ndx - 0x21];
4056             if (ptr){
4057                 val = ptr[(c1 & 0x7f) - 0x21];
4058             }
4059             if (val){
4060                 c2 = val >> 8;
4061                 c1 = val & 0xff;
4062                 if (p2) *p2 = c2;
4063                 if (p1) *p1 = c1;
4064                 return 0;
4065             }
4066             c2 = x0212_shift(c2);
4067         }
4068 #endif /* X0212_ENABLE */
4069     }
4070     if(0x7F < c2) return 1;
4071     if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
4072     if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4073     return 0;
4074 }
4075
4076 void s_oconv(nkf_char c2, nkf_char c1)
4077 {
4078 #ifdef NUMCHAR_OPTION
4079     if (c2 == 0 && is_unicode_capsule(c1)){
4080         w16e_conv(c1, &c2, &c1);
4081         if (c2 == 0 && is_unicode_capsule(c1)){
4082             c2 = c1 & VALUE_MASK;
4083             if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
4084                 /* CP932 UDC */
4085                 c1 &= 0xFFF;
4086                 c2 = c1 / 188 + 0xF0;
4087                 c1 = c1 % 188;
4088                 c1 += 0x40 + (c1 > 0x3e);
4089                 (*o_putc)(c2);
4090                 (*o_putc)(c1);
4091                 return;
4092             } else {
4093                 if(encode_fallback)(*encode_fallback)(c1);
4094                 return;
4095             }
4096         }
4097     }
4098 #endif
4099     if (c2 == EOF) {
4100         (*o_putc)(EOF);
4101         return;
4102     } else if (c2 == 0) {
4103         output_mode = ASCII;
4104         (*o_putc)(c1);
4105     } else if (c2 == X0201) {
4106         output_mode = SHIFT_JIS;
4107         (*o_putc)(c1|0x80);
4108     } else if (c2 == ISO8859_1) {
4109         output_mode = ISO8859_1;
4110         (*o_putc)(c1 | 0x080);
4111 #ifdef X0212_ENABLE
4112     } else if (is_eucg3(c2)){
4113         output_mode = SHIFT_JIS;
4114         if (e2s_conv(c2, c1, &c2, &c1) == 0){
4115             (*o_putc)(c2);
4116             (*o_putc)(c1);
4117         }
4118 #endif
4119     } else {
4120         if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
4121             set_iconv(FALSE, 0);
4122             return; /* too late to rescue this char */
4123         }
4124         output_mode = SHIFT_JIS;
4125         e2s_conv(c2, c1, &c2, &c1);
4126
4127 #ifdef SHIFTJIS_CP932
4128         if (cp932inv_f
4129             && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
4130             nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
4131             if (c){
4132                 c2 = c >> 8;
4133                 c1 = c & 0xff;
4134             }
4135         }
4136 #endif /* SHIFTJIS_CP932 */
4137
4138         (*o_putc)(c2);
4139         if (prefix_table[(unsigned char)c1]){
4140             (*o_putc)(prefix_table[(unsigned char)c1]);
4141         }
4142         (*o_putc)(c1);
4143     }
4144 }
4145
4146 void j_oconv(nkf_char c2, nkf_char c1)
4147 {
4148 #ifdef NUMCHAR_OPTION
4149     if (c2 == 0 && is_unicode_capsule(c1)){
4150         w16e_conv(c1, &c2, &c1);
4151         if (c2 == 0 && is_unicode_capsule(c1)){
4152             c2 = c1 & VALUE_MASK;
4153             if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
4154                 /* CP5022x UDC */
4155                 c1 &= 0xFFF;
4156                 c2 = 0x7F + c1 / 94;
4157                 c1 = 0x21 + c1 % 94;
4158             } else {
4159                 if (encode_fallback) (*encode_fallback)(c1);
4160                 return;
4161             }
4162         }
4163     }
4164 #endif
4165     if (c2 == EOF) {
4166         if (output_mode !=ASCII && output_mode!=ISO8859_1) {
4167             (*o_putc)(ESC);
4168             (*o_putc)('(');
4169             (*o_putc)(ascii_intro);
4170             output_mode = ASCII;
4171         }
4172         (*o_putc)(EOF);
4173 #ifdef X0212_ENABLE
4174     } else if (is_eucg3(c2)){
4175         if(x0213_f){
4176             if(output_mode!=X0213_2){
4177                 output_mode = X0213_2;
4178                 (*o_putc)(ESC);
4179                 (*o_putc)('$');
4180                 (*o_putc)('(');
4181                 (*o_putc)(X0213_2&0x7F);
4182             }
4183         }else{
4184             if(output_mode!=X0212){
4185                 output_mode = X0212;
4186                 (*o_putc)(ESC);
4187                 (*o_putc)('$');
4188                 (*o_putc)('(');
4189                 (*o_putc)(X0212&0x7F);
4190             }
4191         }
4192         (*o_putc)(c2 & 0x7f);
4193         (*o_putc)(c1);
4194 #endif
4195     } else if (c2==X0201) {
4196         if (output_mode!=X0201) {
4197             output_mode = X0201;
4198             (*o_putc)(ESC);
4199             (*o_putc)('(');
4200             (*o_putc)('I');
4201         }
4202         (*o_putc)(c1);
4203     } else if (c2==ISO8859_1) {
4204             /* iso8859 introduction, or 8th bit on */<