OSDN Git Service

2cdc7fbfdeff6c3e8074edd9ff4471dbad4d380a
[nkf/nkf.git] / nkf.c
1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 ** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B
5 ** \e$B!J\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp\e$B!K\e(B
6 ** Copyright (C) 1996,1998
7 ** Copyright (C) 2002
8 ** \e$BO"Mm@h!'\e(B \e$BN05eBg3X>pJs9)3X2J\e(B \e$B2OLn\e(B \e$B??<#\e(B  mime/X0208 support
9 ** \e$B!J\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp\e$B!K\e(B
10 ** \e$BO"Mm@h!'\e(B COW for DOS & Win16 & Win32 & OS/2
11 ** \e$B!J\e(BE-Mail Address: GHG00637@niftyserve.or.p\e$B!K\e(B
12 **
13 **    \e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"\e(B
14 **    \e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#\e(B
15 **    \e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#\e(B
16 **    \e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#\e(B
17 **    \e$B%P%$%J%j$NG[I[$N:]$K$O\e(Bversion message\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#\e(B
18 **    \e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#\e(B
19 **
20 **    Everyone is permitted to do anything on this program
21 **    including copying, modifying, improving,
22 **    as long as you don't try to pretend that you wrote it.
23 **    i.e., the above copyright notice has to appear in all copies.
24 **    Binary distribution requires original version messages.
25 **    You don't have to ask before copying, redistribution or publishing.
26 **    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
28
29 /***********************************************************************
30  * \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
31  * http://sourceforge.jp/projects/nkf/
32 ***********************************************************************/
33 /* $Id: nkf.c,v 1.147 2007/11/03 08:02:49 naruse Exp $ */
34 #define NKF_VERSION "2.0.8"
35 #define NKF_RELEASE_DATE "2007-11-03"
36 #define COPY_RIGHT \
37     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
38     "Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
39
40 #include "config.h"
41 #include "utf8tbl.h"
42 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
43 #define MSDOS
44 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
45 #define __WIN32__
46 #endif
47 #endif
48
49 #ifdef PERL_XS
50 #undef OVERWRITE
51 #endif
52
53 #ifndef PERL_XS
54 #include <stdio.h>
55 #endif
56
57 #include <stdlib.h>
58 #include <string.h>
59
60 #if defined(MSDOS) || defined(__OS2__)
61 #include <fcntl.h>
62 #include <io.h>
63 #if defined(_MSC_VER) || defined(__WATCOMC__)
64 #define mktemp _mktemp
65 #endif
66 #endif
67
68 #ifdef MSDOS
69 #ifdef LSI_C
70 #define setbinmode(fp) fsetbin(fp)
71 #elif defined(__DJGPP__)
72 #include <libc/dosio.h>
73 #define setbinmode(fp) djgpp_setbinmode(fp)
74 #else /* Microsoft C, Turbo C */
75 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
76 #endif
77 #else /* UNIX */
78 #define setbinmode(fp)
79 #endif
80
81 #if defined(__DJGPP__)
82 void  djgpp_setbinmode(FILE *fp)
83 {
84     /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
85     int fd, m;
86     fd = fileno(fp);
87     m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
88     __file_handle_set(fd, m);
89 }
90 #endif
91
92 #ifdef _IOFBF /* SysV and MSDOS, Windows */
93 #define       setvbuffer(fp, buf, size)       setvbuf(fp, buf, _IOFBF, size)
94 #else /* BSD */
95 #define       setvbuffer(fp, buf, size)       setbuffer(fp, buf, size)
96 #endif
97
98 /*Borland C++ 4.5 EasyWin*/
99 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
100 #define         EASYWIN
101 #ifndef __WIN16__
102 #define __WIN16__
103 #endif
104 #include <windows.h>
105 #endif
106
107 #ifdef OVERWRITE
108 /* added by satoru@isoternet.org */
109 #if defined(__EMX__)
110 #include <sys/types.h>
111 #endif
112 #include <sys/stat.h>
113 #if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
114 #include <unistd.h>
115 #if defined(__WATCOMC__)
116 #include <sys/utime.h>
117 #else
118 #include <utime.h>
119 #endif
120 #else /* defined(MSDOS) */
121 #ifdef __WIN32__
122 #ifdef __BORLANDC__ /* BCC32 */
123 #include <utime.h>
124 #else /* !defined(__BORLANDC__) */
125 #include <sys/utime.h>
126 #endif /* (__BORLANDC__) */
127 #else /* !defined(__WIN32__) */
128 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__)  /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
129 #include <sys/utime.h>
130 #elif defined(__TURBOC__) /* BCC */
131 #include <utime.h>
132 #elif defined(LSI_C) /* LSI C */
133 #endif /* (__WIN32__) */
134 #endif
135 #endif
136 #endif
137
138 #define         FALSE   0
139 #define         TRUE    1
140
141 /* state of output_mode and input_mode
142
143    c2           0 means ASCII
144                 X0201
145                 ISO8859_1
146                 X0208
147                 EOF      all termination
148    c1           32bit data
149
150  */
151
152 #define         ASCII           0
153 #define         X0208           1
154 #define         X0201           2
155 #define         ISO8859_1       8
156 #define         NO_X0201        3
157 #define         X0212      0x2844
158 #define         X0213_1    0x284F
159 #define         X0213_2    0x2850
160
161 /* Input Assumption */
162
163 #define         JIS_INPUT       4
164 #define         EUC_INPUT      16
165 #define         SJIS_INPUT      5
166 #define         LATIN1_INPUT    6
167 #define         FIXED_MIME      7
168 #define         STRICT_MIME     8
169
170 /* MIME ENCODE */
171
172 #define         ISO2022JP       9
173 #define         JAPANESE_EUC   10
174 #define         SHIFT_JIS      11
175
176 #define         UTF8           12
177 #define         UTF8_INPUT     13
178 #define         UTF16_INPUT    1015
179 #define         UTF32_INPUT    1017
180
181 /* byte order */
182
183 #define         ENDIAN_BIG      1234
184 #define         ENDIAN_LITTLE   4321
185 #define         ENDIAN_2143     2143
186 #define         ENDIAN_3412     3412
187
188 #define         WISH_TRUE      15
189
190 /* ASCII CODE */
191
192 #define         BS      0x08
193 #define         TAB     0x09
194 #define         LF      0x0a
195 #define         CR      0x0d
196 #define         ESC     0x1b
197 #define         SP      0x20
198 #define         AT      0x40
199 #define         SSP     0xa0
200 #define         DEL     0x7f
201 #define         SI      0x0f
202 #define         SO      0x0e
203 #define         SSO     0x8e
204 #define         SS3     0x8f
205 #define         CRLF    0x0D0A
206
207 #define         is_alnum(c)  \
208             (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
209
210 /* I don't trust portablity of toupper */
211 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
212 #define nkf_isoctal(c)  ('0'<=c && c<='7')
213 #define nkf_isdigit(c)  ('0'<=c && c<='9')
214 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
215 #define nkf_isblank(c) (c == SP || c == TAB)
216 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
217 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
218 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
219 #define nkf_isprint(c) (SP<=c && c<='~')
220 #define nkf_isgraph(c) ('!'<=c && c<='~')
221 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
222                     ('A'<=c&&c<='F') ? (c-'A'+10) : \
223                     ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
224 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
225 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
226 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
227     ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
228      && (c != '.') && (c != 0x22)))
229
230 #define CP932_TABLE_BEGIN 0xFA
231 #define CP932_TABLE_END   0xFC
232 #define CP932INV_TABLE_BEGIN 0xED
233 #define CP932INV_TABLE_END   0xEE
234 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
235
236 #define         HOLD_SIZE       1024
237 #if defined(INT_IS_SHORT)
238 #define         IOBUF_SIZE      2048
239 #else
240 #define         IOBUF_SIZE      16384
241 #endif
242
243 #define         DEFAULT_J       'B'
244 #define         DEFAULT_R       'B'
245
246 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
247 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
248
249 #define         RANGE_NUM_MAX   18
250 #define         GETA1   0x22
251 #define         GETA2   0x2e
252
253
254 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
255 #define sizeof_euc_to_utf8_1byte 94
256 #define sizeof_euc_to_utf8_2bytes 94
257 #define sizeof_utf8_to_euc_C2 64
258 #define sizeof_utf8_to_euc_E5B8 64
259 #define sizeof_utf8_to_euc_2bytes 112
260 #define sizeof_utf8_to_euc_3bytes 16
261 #endif
262
263 /* MIME preprocessor */
264
265 #ifdef EASYWIN /*Easy Win */
266 extern POINT _BufferSize;
267 #endif
268
269 struct input_code{
270     char *name;
271     nkf_char stat;
272     nkf_char score;
273     nkf_char index;
274     nkf_char buf[3];
275     void (*status_func)(struct input_code *, nkf_char);
276     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
277     int _file_stat;
278 };
279
280 static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
281
282 #ifndef PERL_XS
283 static const char *CopyRight = COPY_RIGHT;
284 #endif
285 #if !defined(PERL_XS) && !defined(WIN32DLL)
286 static  nkf_char     noconvert(FILE *f);
287 #endif
288 static  void    module_connection(void);
289 static  nkf_char     kanji_convert(FILE *f);
290 static  nkf_char     h_conv(FILE *f,nkf_char c2,nkf_char c1);
291 static  nkf_char     push_hold_buf(nkf_char c2);
292 static  void    set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
293 static  nkf_char     s_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
294 static  nkf_char     s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
295 static  nkf_char     e_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
296 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
297 /* UCS Mapping
298  * 0: Shift_JIS, eucJP-ascii
299  * 1: eucJP-ms
300  * 2: CP932, CP51932
301  * 3: CP10001
302  */
303 #define UCS_MAP_ASCII   0
304 #define UCS_MAP_MS      1
305 #define UCS_MAP_CP932   2
306 #define UCS_MAP_CP10001 3
307 static int ms_ucs_map_f = UCS_MAP_ASCII;
308 #endif
309 #ifdef UTF8_INPUT_ENABLE
310 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
311 static  int     no_cp932ext_f = FALSE;
312 /* ignore ZERO WIDTH NO-BREAK SPACE */
313 static  int     no_best_fit_chars_f = FALSE;
314 static  int     input_endian = ENDIAN_BIG;
315 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
316 static  void    nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c);
317 static  void    encode_fallback_html(nkf_char c);
318 static  void    encode_fallback_xml(nkf_char c);
319 static  void    encode_fallback_java(nkf_char c);
320 static  void    encode_fallback_perl(nkf_char c);
321 static  void    encode_fallback_subchar(nkf_char c);
322 static  void    (*encode_fallback)(nkf_char c) = NULL;
323 static  nkf_char     w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
324 static  nkf_char     w_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
325 static  nkf_char     w_iconv16(nkf_char c2,nkf_char c1,nkf_char c0);
326 static  nkf_char     w_iconv32(nkf_char c2,nkf_char c1,nkf_char c0);
327 static  nkf_char        unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
328 static  nkf_char        w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
329 static  void    w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
330 static  nkf_char     ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
331 static  nkf_char     w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
332 static  void    w_status(struct input_code *, nkf_char);
333 #endif
334 #ifdef UTF8_OUTPUT_ENABLE
335 static  int     output_bom_f = FALSE;
336 static  int     output_endian = ENDIAN_BIG;
337 static  nkf_char     e2w_conv(nkf_char c2,nkf_char c1);
338 static  void    w_oconv(nkf_char c2,nkf_char c1);
339 static  void    w_oconv16(nkf_char c2,nkf_char c1);
340 static  void    w_oconv32(nkf_char c2,nkf_char c1);
341 #endif
342 static  void    e_oconv(nkf_char c2,nkf_char c1);
343 static  nkf_char     e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
344 static  void    s_oconv(nkf_char c2,nkf_char c1);
345 static  void    j_oconv(nkf_char c2,nkf_char c1);
346 static  void    fold_conv(nkf_char c2,nkf_char c1);
347 static  void    nl_conv(nkf_char c2,nkf_char c1);
348 static  void    z_conv(nkf_char c2,nkf_char c1);
349 static  void    rot_conv(nkf_char c2,nkf_char c1);
350 static  void    hira_conv(nkf_char c2,nkf_char c1);
351 static  void    base64_conv(nkf_char c2,nkf_char c1);
352 static  void    iso2022jp_check_conv(nkf_char c2,nkf_char c1);
353 static  void    no_connection(nkf_char c2,nkf_char c1);
354 static  nkf_char     no_connection2(nkf_char c2,nkf_char c1,nkf_char c0);
355
356 static  void    code_score(struct input_code *ptr);
357 static  void    code_status(nkf_char c);
358
359 static  void    std_putc(nkf_char c);
360 static  nkf_char     std_getc(FILE *f);
361 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
362
363 static  nkf_char     broken_getc(FILE *f);
364 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
365
366 static  nkf_char     mime_begin(FILE *f);
367 static  nkf_char     mime_getc(FILE *f);
368 static  nkf_char     mime_ungetc(nkf_char c,FILE *f);
369
370 static  void    switch_mime_getc(void);
371 static  void    unswitch_mime_getc(void);
372 static  nkf_char     mime_begin_strict(FILE *f);
373 static  nkf_char     mime_getc_buf(FILE *f);
374 static  nkf_char     mime_ungetc_buf(nkf_char c,FILE *f);
375 static  nkf_char     mime_integrity(FILE *f,const unsigned char *p);
376
377 static  nkf_char     base64decode(nkf_char c);
378 static  void    mime_prechar(nkf_char c2, nkf_char c1);
379 static  void    mime_putc(nkf_char c);
380 static  void    open_mime(nkf_char c);
381 static  void    close_mime(void);
382 static  void    eof_mime(void);
383 static  void    mimeout_addchar(nkf_char c);
384 #ifndef PERL_XS
385 static  void    usage(void);
386 static  void    version(void);
387 #endif
388 static  void    options(unsigned char *c);
389 static  void    reinit(void);
390
391 /* buffers */
392
393 #if !defined(PERL_XS) && !defined(WIN32DLL)
394 static unsigned char   stdibuf[IOBUF_SIZE];
395 static unsigned char   stdobuf[IOBUF_SIZE];
396 #endif
397 static unsigned char   hold_buf[HOLD_SIZE*2];
398 static int             hold_count = 0;
399
400 /* MIME preprocessor fifo */
401
402 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
403 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)
404 #define Fifo(n)         mime_buf[(n)&MIME_BUF_MASK]
405 static unsigned char           mime_buf[MIME_BUF_SIZE];
406 static unsigned int            mime_top = 0;
407 static unsigned int            mime_last = 0;  /* decoded */
408 static unsigned int            mime_input = 0; /* undecoded */
409 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
410
411 /* flags */
412 static int             unbuf_f = FALSE;
413 static int             estab_f = FALSE;
414 static int             nop_f = FALSE;
415 static int             binmode_f = TRUE;       /* binary mode */
416 static int             rot_f = FALSE;          /* rot14/43 mode */
417 static int             hira_f = FALSE;          /* hira/kata henkan */
418 static int             input_f = FALSE;        /* non fixed input code  */
419 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
420 static int             mime_f = STRICT_MIME;   /* convert MIME B base64 or Q */
421 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
422 static int             mimebuf_f = FALSE;      /* MIME buffered input */
423 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
424 static int             iso8859_f = FALSE;      /* ISO8859 through */
425 static int             mimeout_f = FALSE;       /* base64 mode */
426 #if defined(MSDOS) || defined(__OS2__)
427 static int             x0201_f = TRUE;         /* Assume JISX0201 kana */
428 #else
429 static int             x0201_f = NO_X0201;     /* Assume NO JISX0201 */
430 #endif
431 static int             iso2022jp_f = FALSE;    /* convert ISO-2022-JP */
432
433 #ifdef UNICODE_NORMALIZATION
434 static int nfc_f = FALSE;
435 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
436 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
437 static nkf_char nfc_getc(FILE *f);
438 static nkf_char nfc_ungetc(nkf_char c,FILE *f);
439 #endif
440
441 #ifdef INPUT_OPTION
442 static int cap_f = FALSE;
443 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
444 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
445 static nkf_char cap_getc(FILE *f);
446 static nkf_char cap_ungetc(nkf_char c,FILE *f);
447
448 static int url_f = FALSE;
449 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
450 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
451 static nkf_char url_getc(FILE *f);
452 static nkf_char url_ungetc(nkf_char c,FILE *f);
453 #endif
454
455 #if defined(INT_IS_SHORT)
456 #define NKF_INT32_C(n)   (n##L)
457 #else
458 #define NKF_INT32_C(n)   (n)
459 #endif
460 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
461 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
462 #define CLASS_UNICODE   NKF_INT32_C(0x01000000)
463 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
464 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
465 #define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UNICODE)
466 #define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
467
468 #ifdef NUMCHAR_OPTION
469 static int numchar_f = FALSE;
470 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
471 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
472 static nkf_char numchar_getc(FILE *f);
473 static nkf_char numchar_ungetc(nkf_char c,FILE *f);
474 #endif
475
476 #ifdef CHECK_OPTION
477 static int noout_f = FALSE;
478 static void no_putc(nkf_char c);
479 static int debug_f = FALSE;
480 static void debug(const char *str);
481 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
482 #endif
483
484 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
485 #if !defined PERL_XS
486 static  void    print_guessed_code(char *filename);
487 #endif
488 static  void    set_input_codename(char *codename);
489
490 #ifdef EXEC_IO
491 static int exec_f = 0;
492 #endif
493
494 #ifdef SHIFTJIS_CP932
495 /* invert IBM extended characters to others */
496 static int cp51932_f = FALSE;
497
498 /* invert NEC-selected IBM extended characters to IBM extended characters */
499 static int cp932inv_f = TRUE;
500
501 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
502 #endif /* SHIFTJIS_CP932 */
503
504 #ifdef X0212_ENABLE
505 static int x0212_f = FALSE;
506 static nkf_char x0212_shift(nkf_char c);
507 static nkf_char x0212_unshift(nkf_char c);
508 #endif
509 static int x0213_f = FALSE;
510
511 static unsigned char prefix_table[256];
512
513 static void set_code_score(struct input_code *ptr, nkf_char score);
514 static void clr_code_score(struct input_code *ptr, nkf_char score);
515 static void status_disable(struct input_code *ptr);
516 static void status_push_ch(struct input_code *ptr, nkf_char c);
517 static void status_clear(struct input_code *ptr);
518 static void status_reset(struct input_code *ptr);
519 static void status_reinit(struct input_code *ptr);
520 static void status_check(struct input_code *ptr, nkf_char c);
521 static void e_status(struct input_code *, nkf_char);
522 static void s_status(struct input_code *, nkf_char);
523
524 struct input_code input_code_list[] = {
525     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
526     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
527 #ifdef UTF8_INPUT_ENABLE
528     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
529     {"UTF-16",    0, 0, 0, {0, 0, 0},     NULL, w_iconv16, 0},
530     {"UTF-32",    0, 0, 0, {0, 0, 0},     NULL, w_iconv32, 0},
531 #endif
532     {0}
533 };
534
535 static int              mimeout_mode = 0;
536 static int              base64_count = 0;
537
538 /* X0208 -> ASCII converter */
539
540 /* fold parameter */
541 static int             f_line = 0;    /* chars in line */
542 static int             f_prev = 0;
543 static int             fold_preserve_f = FALSE; /* preserve new lines */
544 static int             fold_f  = FALSE;
545 static int             fold_len  = 0;
546
547 /* options */
548 static unsigned char   kanji_intro = DEFAULT_J;
549 static unsigned char   ascii_intro = DEFAULT_R;
550
551 /* Folding */
552
553 #define FOLD_MARGIN  10
554 #define DEFAULT_FOLD 60
555
556 static int             fold_margin  = FOLD_MARGIN;
557
558 /* converters */
559
560 #ifdef DEFAULT_CODE_JIS
561 #   define  DEFAULT_CONV j_oconv
562 #endif
563 #ifdef DEFAULT_CODE_SJIS
564 #   define  DEFAULT_CONV s_oconv
565 #endif
566 #ifdef DEFAULT_CODE_EUC
567 #   define  DEFAULT_CONV e_oconv
568 #endif
569 #ifdef DEFAULT_CODE_UTF8
570 #   define  DEFAULT_CONV w_oconv
571 #endif
572
573 /* process default */
574 static void (*output_conv)(nkf_char c2,nkf_char c1) = DEFAULT_CONV;
575
576 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
577 /* s_iconv or oconv */
578 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
579
580 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
581 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
582 static void (*o_nlconv)(nkf_char c2,nkf_char c1) = no_connection;
583 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
584 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
585 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
586 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
587
588 /* static redirections */
589
590 static  void   (*o_putc)(nkf_char c) = std_putc;
591
592 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
593 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
594
595 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
596 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
597
598 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
599
600 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
601 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
602
603 /* for strict mime */
604 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
605 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
606
607 /* Global states */
608 static int output_mode = ASCII,    /* output kanji mode */
609            input_mode =  ASCII,    /* input kanji mode */
610            shift_mode =  FALSE;    /* TRUE shift out, or X0201  */
611 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
612
613 /* X0201 / X0208 conversion tables */
614
615 /* X0201 kana conversion table */
616 /* 90-9F A0-DF */
617 static const unsigned char cv[]= {
618     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
619     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
620     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
621     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
622     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
623     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
624     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
625     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
626     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
627     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
628     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
629     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
630     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
631     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
632     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
633     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
634     0x00,0x00};
635
636
637 /* X0201 kana conversion table for daguten */
638 /* 90-9F A0-DF */
639 static const unsigned char dv[]= {
640     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
641     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
642     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
643     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
644     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
645     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
646     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
647     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
648     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
649     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
650     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
651     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
652     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
653     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
654     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
655     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
656     0x00,0x00};
657
658 /* X0201 kana conversion table for han-daguten */
659 /* 90-9F A0-DF */
660 static const unsigned char ev[]= {
661     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
662     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
663     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
664     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
665     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
666     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
667     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
668     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
669     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
670     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
671     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
672     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
673     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
674     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
675     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
676     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
677     0x00,0x00};
678
679
680 /* X0208 kigou conversion table */
681 /* 0x8140 - 0x819e */
682 static const unsigned char fv[] = {
683
684     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
685     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
686     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
687     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
688     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
689     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
690     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
691     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
692     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
693     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
694     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
695     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
696 } ;
697
698
699
700 static int             file_out_f = FALSE;
701 #ifdef OVERWRITE
702 static int             overwrite_f = FALSE;
703 static int             preserve_time_f = FALSE;
704 static int             backup_f = FALSE;
705 static char            *backup_suffix = "";
706 static char *get_backup_filename(const char *suffix, const char *filename);
707 #endif
708
709 static int nlmode_f = 0;   /* CR, LF, CRLF */
710 static int input_nextline = 0; /* 0: unestablished, EOF: MIXED */
711 static nkf_char prev_cr = 0; /* CR or 0 */
712 #ifdef EASYWIN /*Easy Win */
713 static int             end_check;
714 #endif /*Easy Win */
715
716 #define STD_GC_BUFSIZE (256)
717 nkf_char std_gc_buf[STD_GC_BUFSIZE];
718 nkf_char std_gc_ndx;
719
720 #ifdef WIN32DLL
721 #include "nkf32dll.c"
722 #elif defined(PERL_XS)
723 #else /* WIN32DLL */
724 int main(int argc, char **argv)
725 {
726     FILE  *fin;
727     unsigned char  *cp;
728
729     char *outfname = NULL;
730     char *origfname;
731
732 #ifdef EASYWIN /*Easy Win */
733     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
734 #endif
735
736     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
737         cp = (unsigned char *)*argv;
738         options(cp);
739         if (guess_f) {
740 #ifdef CHECK_OPTION
741             int debug_f_back = debug_f;
742 #endif
743 #ifdef EXEC_IO
744             int exec_f_back = exec_f;
745 #endif
746 #ifdef X0212_ENABLE
747             int x0212_f_back = x0212_f;
748 #endif
749 #ifdef X0212_ENABLE
750             int x0213_f_back = x0213_f;
751 #endif
752             int guess_f_back = guess_f;
753             reinit();
754             guess_f = guess_f_back;
755             mime_f = FALSE;
756 #ifdef CHECK_OPTION
757             debug_f = debug_f_back;
758 #endif
759 #ifdef EXEC_IO
760             exec_f = exec_f_back;
761 #endif
762 #ifdef X0212_ENABLE
763             x0212_f = x0212_f_back;
764 #endif
765 #ifdef X0213_ENABLE
766             x0213_f = x0213_f_back;
767 #endif
768     }
769 #ifdef EXEC_IO
770         if (exec_f){
771             int fds[2], pid;
772             if (pipe(fds) < 0 || (pid = fork()) < 0){
773                 abort();
774             }
775             if (pid == 0){
776                 if (exec_f > 0){
777                     close(fds[0]);
778                     dup2(fds[1], 1);
779                 }else{
780                     close(fds[1]);
781                     dup2(fds[0], 0);
782                 }
783                 execvp(argv[1], &argv[1]);
784             }
785             if (exec_f > 0){
786                 close(fds[1]);
787                 dup2(fds[0], 0);
788             }else{
789                 close(fds[0]);
790                 dup2(fds[1], 1);
791             }
792             argc = 0;
793             break;
794         }
795 #endif
796     }
797     if(x0201_f == WISH_TRUE)
798          x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
799
800     if (binmode_f == TRUE)
801 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
802     if (freopen("","wb",stdout) == NULL)
803         return (-1);
804 #else
805     setbinmode(stdout);
806 #endif
807
808     if (unbuf_f)
809       setbuf(stdout, (char *) NULL);
810     else
811       setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
812
813     if (argc == 0) {
814       if (binmode_f == TRUE)
815 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
816       if (freopen("","rb",stdin) == NULL) return (-1);
817 #else
818       setbinmode(stdin);
819 #endif
820       setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
821       if (nop_f)
822           noconvert(stdin);
823       else {
824           kanji_convert(stdin);
825           if (guess_f) print_guessed_code(NULL);
826       }
827     } else {
828       int nfiles = argc;
829         int is_argument_error = FALSE;
830       while (argc--) {
831             input_codename = NULL;
832             input_nextline = 0;
833 #ifdef CHECK_OPTION
834             iconv_for_check = 0;
835 #endif
836           if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
837               perror(*--argv);
838                 *argv++;
839                 is_argument_error = TRUE;
840                 continue;
841           } else {
842 #ifdef OVERWRITE
843               int fd = 0;
844               int fd_backup = 0;
845 #endif
846
847 /* reopen file for stdout */
848               if (file_out_f == TRUE) {
849 #ifdef OVERWRITE
850                   if (overwrite_f){
851                       outfname = malloc(strlen(origfname)
852                                         + strlen(".nkftmpXXXXXX")
853                                         + 1);
854                       if (!outfname){
855                           perror(origfname);
856                           return -1;
857                       }
858                       strcpy(outfname, origfname);
859 #ifdef MSDOS
860                       {
861                           int i;
862                           for (i = strlen(outfname); i; --i){
863                               if (outfname[i - 1] == '/'
864                                   || outfname[i - 1] == '\\'){
865                                   break;
866                               }
867                           }
868                           outfname[i] = '\0';
869                       }
870                       strcat(outfname, "ntXXXXXX");
871                       mktemp(outfname);
872                         fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
873                                 S_IREAD | S_IWRITE);
874 #else
875                       strcat(outfname, ".nkftmpXXXXXX");
876                       fd = mkstemp(outfname);
877 #endif
878                       if (fd < 0
879                           || (fd_backup = dup(fileno(stdout))) < 0
880                           || dup2(fd, fileno(stdout)) < 0
881                           ){
882                           perror(origfname);
883                           return -1;
884                       }
885                   }else
886 #endif
887                   if(argc == 1) {
888                       outfname = *argv++;
889                       argc--;
890                   } else {
891                       outfname = "nkf.out";
892                   }
893
894                   if(freopen(outfname, "w", stdout) == NULL) {
895                       perror (outfname);
896                       return (-1);
897                   }
898                   if (binmode_f == TRUE) {
899 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
900                       if (freopen("","wb",stdout) == NULL)
901                            return (-1);
902 #else
903                       setbinmode(stdout);
904 #endif
905                   }
906               }
907               if (binmode_f == TRUE)
908 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
909                  if (freopen("","rb",fin) == NULL)
910                     return (-1);
911 #else
912                  setbinmode(fin);
913 #endif
914               setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
915               if (nop_f)
916                   noconvert(fin);
917               else {
918                   char *filename = NULL;
919                   kanji_convert(fin);
920                   if (nfiles > 1) filename = origfname;
921                   if (guess_f) print_guessed_code(filename);
922               }
923               fclose(fin);
924 #ifdef OVERWRITE
925               if (overwrite_f) {
926                   struct stat     sb;
927 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
928                   time_t tb[2];
929 #else
930                   struct utimbuf  tb;
931 #endif
932
933                   fflush(stdout);
934                   close(fd);
935                   if (dup2(fd_backup, fileno(stdout)) < 0){
936                       perror("dup2");
937                   }
938                   if (stat(origfname, &sb)) {
939                       fprintf(stderr, "Can't stat %s\n", origfname);
940                   }
941                   /* \e$B%Q!<%_%C%7%g%s$rI|85\e(B */
942                   if (chmod(outfname, sb.st_mode)) {
943                       fprintf(stderr, "Can't set permission %s\n", outfname);
944                   }
945
946                   /* \e$B%?%$%`%9%?%s%W$rI|85\e(B */
947                     if(preserve_time_f){
948 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
949                         tb[0] = tb[1] = sb.st_mtime;
950                         if (utime(outfname, tb)) {
951                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
952                         }
953 #else
954                         tb.actime  = sb.st_atime;
955                         tb.modtime = sb.st_mtime;
956                         if (utime(outfname, &tb)) {
957                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
958                         }
959 #endif
960                     }
961                     if(backup_f){
962                         char *backup_filename = get_backup_filename(backup_suffix, origfname);
963 #ifdef MSDOS
964                         unlink(backup_filename);
965 #endif
966                         if (rename(origfname, backup_filename)) {
967                             perror(backup_filename);
968                             fprintf(stderr, "Can't rename %s to %s\n",
969                                     origfname, backup_filename);
970                         }
971                     }else{
972 #ifdef MSDOS
973                         if (unlink(origfname)){
974                             perror(origfname);
975                         }
976 #endif
977                     }
978                   if (rename(outfname, origfname)) {
979                       perror(origfname);
980                       fprintf(stderr, "Can't rename %s to %s\n",
981                               outfname, origfname);
982                   }
983                   free(outfname);
984               }
985 #endif
986           }
987       }
988         if (is_argument_error)
989             return(-1);
990     }
991 #ifdef EASYWIN /*Easy Win */
992     if (file_out_f == FALSE)
993         scanf("%d",&end_check);
994     else
995         fclose(stdout);
996 #else /* for Other OS */
997     if (file_out_f == TRUE)
998         fclose(stdout);
999 #endif /*Easy Win */
1000     return (0);
1001 }
1002 #endif /* WIN32DLL */
1003
1004 #ifdef OVERWRITE
1005 char *get_backup_filename(const char *suffix, const char *filename)
1006 {
1007     char *backup_filename;
1008     int asterisk_count = 0;
1009     int i, j;
1010     int filename_length = strlen(filename);
1011
1012     for(i = 0; suffix[i]; i++){
1013         if(suffix[i] == '*') asterisk_count++;
1014     }
1015
1016     if(asterisk_count){
1017         backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1018         if (!backup_filename){
1019             perror("Can't malloc backup filename.");
1020             return NULL;
1021         }
1022
1023         for(i = 0, j = 0; suffix[i];){
1024             if(suffix[i] == '*'){
1025                 backup_filename[j] = '\0';
1026                 strncat(backup_filename, filename, filename_length);
1027                 i++;
1028                 j += filename_length;
1029             }else{
1030                 backup_filename[j++] = suffix[i++];
1031             }
1032         }
1033         backup_filename[j] = '\0';
1034     }else{
1035         j = strlen(suffix) + filename_length;
1036         backup_filename = malloc( + 1);
1037         strcpy(backup_filename, filename);
1038         strcat(backup_filename, suffix);
1039         backup_filename[j] = '\0';
1040     }
1041     return backup_filename;
1042 }
1043 #endif
1044
1045 static const struct {
1046     const char *name;
1047     const char *alias;
1048 } long_option[] = {
1049     {"ic=", ""},
1050     {"oc=", ""},
1051     {"base64","jMB"},
1052     {"euc","e"},
1053     {"euc-input","E"},
1054     {"fj","jm"},
1055     {"help","v"},
1056     {"jis","j"},
1057     {"jis-input","J"},
1058     {"mac","sLm"},
1059     {"mime","jM"},
1060     {"mime-input","m"},
1061     {"msdos","sLw"},
1062     {"sjis","s"},
1063     {"sjis-input","S"},
1064     {"unix","eLu"},
1065     {"version","V"},
1066     {"windows","sLw"},
1067     {"hiragana","h1"},
1068     {"katakana","h2"},
1069     {"katakana-hiragana","h3"},
1070     {"guess=", ""},
1071     {"guess", "g"},
1072     {"cp932", ""},
1073     {"no-cp932", ""},
1074 #ifdef X0212_ENABLE
1075     {"x0212", ""},
1076 #endif
1077 #ifdef UTF8_OUTPUT_ENABLE
1078     {"utf8", "w"},
1079     {"utf16", "w16"},
1080     {"ms-ucs-map", ""},
1081     {"fb-skip", ""},
1082     {"fb-html", ""},
1083     {"fb-xml", ""},
1084     {"fb-perl", ""},
1085     {"fb-java", ""},
1086     {"fb-subchar", ""},
1087     {"fb-subchar=", ""},
1088 #endif
1089 #ifdef UTF8_INPUT_ENABLE
1090     {"utf8-input", "W"},
1091     {"utf16-input", "W16"},
1092     {"no-cp932ext", ""},
1093     {"no-best-fit-chars",""},
1094 #endif
1095 #ifdef UNICODE_NORMALIZATION
1096     {"utf8mac-input", ""},
1097 #endif
1098 #ifdef OVERWRITE
1099     {"overwrite", ""},
1100     {"overwrite=", ""},
1101     {"in-place", ""},
1102     {"in-place=", ""},
1103 #endif
1104 #ifdef INPUT_OPTION
1105     {"cap-input", ""},
1106     {"url-input", ""},
1107 #endif
1108 #ifdef NUMCHAR_OPTION
1109     {"numchar-input", ""},
1110 #endif
1111 #ifdef CHECK_OPTION
1112     {"no-output", ""},
1113     {"debug", ""},
1114 #endif
1115 #ifdef SHIFTJIS_CP932
1116     {"cp932inv", ""},
1117 #endif
1118 #ifdef EXEC_IO
1119     {"exec-in", ""},
1120     {"exec-out", ""},
1121 #endif
1122     {"prefix=", ""},
1123 };
1124
1125 static int option_mode = 0;
1126
1127 void options(unsigned char *cp)
1128 {
1129     nkf_char i, j;
1130     unsigned char *p;
1131     unsigned char *cp_back = NULL;
1132     char codeset[32];
1133
1134     if (option_mode==1)
1135         return;
1136     while(*cp && *cp++!='-');
1137     while (*cp || cp_back) {
1138         if(!*cp){
1139             cp = cp_back;
1140             cp_back = NULL;
1141             continue;
1142         }
1143         p = 0;
1144         switch (*cp++) {
1145         case '-':  /* literal options */
1146             if (!*cp || *cp == SP) {        /* ignore the rest of arguments */
1147                 option_mode = 1;
1148                 return;
1149             }
1150             for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1151                 p = (unsigned char *)long_option[i].name;
1152                 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1153                 if (*p == cp[j] || cp[j] == SP){
1154                     p = &cp[j] + 1;
1155                     break;
1156                 }
1157                 p = 0;
1158             }
1159             if (p == 0) {
1160                 fprintf(stderr, "unknown long option: --%s\n", cp);
1161                 return;
1162             }
1163             while(*cp && *cp != SP && cp++);
1164             if (long_option[i].alias[0]){
1165                 cp_back = cp;
1166                 cp = (unsigned char *)long_option[i].alias;
1167             }else{
1168                 if (strcmp(long_option[i].name, "ic=") == 0){
1169                     for (i=0; i < 16 && SP < p[i] && p[i] < DEL; i++){
1170                         codeset[i] = nkf_toupper(p[i]);
1171                     }
1172                     codeset[i] = 0;
1173                     if(strcmp(codeset, "ISO-2022-JP") == 0){
1174                         input_f = JIS_INPUT;
1175                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0 ||
1176                       strcmp(codeset, "CP50220") == 0 ||
1177                       strcmp(codeset, "CP50221") == 0 ||
1178                       strcmp(codeset, "CP50222") == 0){
1179                         input_f = JIS_INPUT;
1180 #ifdef SHIFTJIS_CP932
1181                         cp51932_f = TRUE;
1182 #endif
1183 #ifdef UTF8_OUTPUT_ENABLE
1184                         ms_ucs_map_f = UCS_MAP_CP932;
1185 #endif
1186                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1187                         input_f = JIS_INPUT;
1188 #ifdef X0212_ENABLE
1189                         x0212_f = TRUE;
1190 #endif
1191                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1192                         input_f = JIS_INPUT;
1193 #ifdef X0212_ENABLE
1194                         x0212_f = TRUE;
1195 #endif
1196                         x0213_f = TRUE;
1197                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1198                         input_f = SJIS_INPUT;
1199                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1200                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1201                              strcmp(codeset, "CP932") == 0 ||
1202                              strcmp(codeset, "MS932") == 0){
1203                         input_f = SJIS_INPUT;
1204 #ifdef SHIFTJIS_CP932
1205                         cp51932_f = TRUE;
1206 #endif
1207 #ifdef UTF8_OUTPUT_ENABLE
1208                         ms_ucs_map_f = UCS_MAP_CP932;
1209 #endif
1210                     }else if(strcmp(codeset, "CP10001") == 0){
1211                         input_f = SJIS_INPUT;
1212 #ifdef SHIFTJIS_CP932
1213                         cp51932_f = TRUE;
1214 #endif
1215 #ifdef UTF8_OUTPUT_ENABLE
1216                         ms_ucs_map_f = UCS_MAP_CP10001;
1217 #endif
1218                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1219                              strcmp(codeset, "EUC-JP") == 0){
1220                         input_f = EUC_INPUT;
1221                     }else if(strcmp(codeset, "CP51932") == 0){
1222                         input_f = EUC_INPUT;
1223 #ifdef SHIFTJIS_CP932
1224                         cp51932_f = TRUE;
1225 #endif
1226 #ifdef UTF8_OUTPUT_ENABLE
1227                         ms_ucs_map_f = UCS_MAP_CP932;
1228 #endif
1229                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1230                              strcmp(codeset, "EUCJP-MS") == 0 ||
1231                              strcmp(codeset, "EUCJPMS") == 0){
1232                         input_f = EUC_INPUT;
1233 #ifdef SHIFTJIS_CP932
1234                         cp51932_f = FALSE;
1235 #endif
1236 #ifdef UTF8_OUTPUT_ENABLE
1237                         ms_ucs_map_f = UCS_MAP_MS;
1238 #endif
1239                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1240                              strcmp(codeset, "EUCJP-ASCII") == 0){
1241                         input_f = EUC_INPUT;
1242 #ifdef SHIFTJIS_CP932
1243                         cp51932_f = FALSE;
1244 #endif
1245 #ifdef UTF8_OUTPUT_ENABLE
1246                         ms_ucs_map_f = UCS_MAP_ASCII;
1247 #endif
1248                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1249                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1250                         input_f = SJIS_INPUT;
1251                         x0213_f = TRUE;
1252 #ifdef SHIFTJIS_CP932
1253                         cp51932_f = FALSE;
1254 #endif
1255                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1256                              strcmp(codeset, "EUC-JIS-2004") == 0){
1257                         input_f = EUC_INPUT;
1258                         x0213_f = TRUE;
1259 #ifdef SHIFTJIS_CP932
1260                         cp51932_f = FALSE;
1261 #endif
1262 #ifdef UTF8_INPUT_ENABLE
1263                     }else if(strcmp(codeset, "UTF-8") == 0 ||
1264                              strcmp(codeset, "UTF-8N") == 0 ||
1265                              strcmp(codeset, "UTF-8-BOM") == 0){
1266                         input_f = UTF8_INPUT;
1267 #ifdef UNICODE_NORMALIZATION
1268                     }else if(strcmp(codeset, "UTF8-MAC") == 0 ||
1269                              strcmp(codeset, "UTF-8-MAC") == 0){
1270                         input_f = UTF8_INPUT;
1271                         nfc_f = TRUE;
1272 #endif
1273                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1274                              strcmp(codeset, "UTF-16BE") == 0 ||
1275                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1276                         input_f = UTF16_INPUT;
1277                         input_endian = ENDIAN_BIG;
1278                     }else if(strcmp(codeset, "UTF-16LE") == 0 ||
1279                              strcmp(codeset, "UTF-16LE-BOM") == 0){
1280                         input_f = UTF16_INPUT;
1281                         input_endian = ENDIAN_LITTLE;
1282                     }else if(strcmp(codeset, "UTF-32") == 0 ||
1283                              strcmp(codeset, "UTF-32BE") == 0 ||
1284                              strcmp(codeset, "UTF-32BE-BOM") == 0){
1285                         input_f = UTF32_INPUT;
1286                         input_endian = ENDIAN_BIG;
1287                     }else if(strcmp(codeset, "UTF-32LE") == 0 ||
1288                              strcmp(codeset, "UTF-32LE-BOM") == 0){
1289                         input_f = UTF32_INPUT;
1290                         input_endian = ENDIAN_LITTLE;
1291 #endif
1292                     } else {
1293                         fprintf(stderr, "unknown input encoding: %s\n", codeset);
1294                     }
1295                     continue;
1296                 }
1297                 if (strcmp(long_option[i].name, "oc=") == 0){
1298                     x0201_f = FALSE;
1299                     for (i=0; i < 16 && SP < p[i] && p[i] < DEL; i++){
1300                         codeset[i] = nkf_toupper(p[i]);
1301                     }
1302                     codeset[i] = 0;
1303                     if(strcmp(codeset, "ISO-2022-JP") == 0){
1304                         output_conv = j_oconv;
1305                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0){
1306                         output_conv = j_oconv;
1307                         no_cp932ext_f = TRUE;
1308 #ifdef SHIFTJIS_CP932
1309                         cp932inv_f = FALSE;
1310 #endif
1311 #ifdef UTF8_OUTPUT_ENABLE
1312                         ms_ucs_map_f = UCS_MAP_CP932;
1313 #endif
1314                     }else if(strcmp(codeset, "CP50220") == 0){
1315                         output_conv = j_oconv;
1316                         x0201_f = TRUE;
1317 #ifdef SHIFTJIS_CP932
1318                         cp932inv_f = FALSE;
1319 #endif
1320 #ifdef UTF8_OUTPUT_ENABLE
1321                         ms_ucs_map_f = UCS_MAP_CP932;
1322 #endif
1323                     }else if(strcmp(codeset, "CP50221") == 0){
1324                         output_conv = j_oconv;
1325 #ifdef SHIFTJIS_CP932
1326                         cp932inv_f = FALSE;
1327 #endif
1328 #ifdef UTF8_OUTPUT_ENABLE
1329                         ms_ucs_map_f = UCS_MAP_CP932;
1330 #endif
1331                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1332                         output_conv = j_oconv;
1333 #ifdef X0212_ENABLE
1334                         x0212_f = TRUE;
1335 #endif
1336 #ifdef SHIFTJIS_CP932
1337                         cp932inv_f = FALSE;
1338 #endif
1339                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1340                         output_conv = j_oconv;
1341 #ifdef X0212_ENABLE
1342                         x0212_f = TRUE;
1343 #endif
1344                         x0213_f = TRUE;
1345 #ifdef SHIFTJIS_CP932
1346                         cp932inv_f = FALSE;
1347 #endif
1348                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1349                         output_conv = s_oconv;
1350                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1351                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1352                              strcmp(codeset, "CP932") == 0 ||
1353                              strcmp(codeset, "MS932") == 0){
1354                         output_conv = s_oconv;
1355 #ifdef UTF8_OUTPUT_ENABLE
1356                         ms_ucs_map_f = UCS_MAP_CP932;
1357 #endif
1358                     }else if(strcmp(codeset, "CP10001") == 0){
1359                         output_conv = s_oconv;
1360 #ifdef UTF8_OUTPUT_ENABLE
1361                         ms_ucs_map_f = UCS_MAP_CP10001;
1362 #endif
1363                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1364                              strcmp(codeset, "EUC-JP") == 0){
1365                         output_conv = e_oconv;
1366                     }else if(strcmp(codeset, "CP51932") == 0){
1367                         output_conv = e_oconv;
1368 #ifdef SHIFTJIS_CP932
1369                         cp932inv_f = FALSE;
1370 #endif
1371 #ifdef UTF8_OUTPUT_ENABLE
1372                         ms_ucs_map_f = UCS_MAP_CP932;
1373 #endif
1374                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1375                              strcmp(codeset, "EUCJP-MS") == 0 ||
1376                              strcmp(codeset, "EUCJPMS") == 0){
1377                         output_conv = e_oconv;
1378 #ifdef X0212_ENABLE
1379                         x0212_f = TRUE;
1380 #endif
1381 #ifdef UTF8_OUTPUT_ENABLE
1382                         ms_ucs_map_f = UCS_MAP_MS;
1383 #endif
1384                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1385                              strcmp(codeset, "EUCJP-ASCII") == 0){
1386                         output_conv = e_oconv;
1387 #ifdef X0212_ENABLE
1388                         x0212_f = TRUE;
1389 #endif
1390 #ifdef UTF8_OUTPUT_ENABLE
1391                         ms_ucs_map_f = UCS_MAP_ASCII;
1392 #endif
1393                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1394                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1395                         output_conv = s_oconv;
1396                         x0213_f = TRUE;
1397 #ifdef SHIFTJIS_CP932
1398                         cp932inv_f = FALSE;
1399 #endif
1400                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1401                              strcmp(codeset, "EUC-JIS-2004") == 0){
1402                         output_conv = e_oconv;
1403 #ifdef X0212_ENABLE
1404                         x0212_f = TRUE;
1405 #endif
1406                         x0213_f = TRUE;
1407 #ifdef SHIFTJIS_CP932
1408                         cp932inv_f = FALSE;
1409 #endif
1410 #ifdef UTF8_OUTPUT_ENABLE
1411                     }else if(strcmp(codeset, "UTF-8") == 0){
1412                         output_conv = w_oconv;
1413                     }else if(strcmp(codeset, "UTF-8N") == 0){
1414                         output_conv = w_oconv;
1415                     }else if(strcmp(codeset, "UTF-8-BOM") == 0){
1416                         output_conv = w_oconv;
1417                         output_bom_f = TRUE;
1418                     }else if(strcmp(codeset, "UTF-16BE") == 0){
1419                         output_conv = w_oconv16;
1420                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1421                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1422                         output_conv = w_oconv16;
1423                         output_bom_f = TRUE;
1424                     }else if(strcmp(codeset, "UTF-16LE") == 0){
1425                         output_conv = w_oconv16;
1426                         output_endian = ENDIAN_LITTLE;
1427                     }else if(strcmp(codeset, "UTF-16LE-BOM") == 0){
1428                         output_conv = w_oconv16;
1429                         output_endian = ENDIAN_LITTLE;
1430                         output_bom_f = TRUE;
1431                     }else if(strcmp(codeset, "UTF-32") == 0 ||
1432                              strcmp(codeset, "UTF-32BE") == 0){
1433                         output_conv = w_oconv32;
1434                     }else if(strcmp(codeset, "UTF-32BE-BOM") == 0){
1435                         output_conv = w_oconv32;
1436                         output_bom_f = TRUE;
1437                     }else if(strcmp(codeset, "UTF-32LE") == 0){
1438                         output_conv = w_oconv32;
1439                         output_endian = ENDIAN_LITTLE;
1440                     }else if(strcmp(codeset, "UTF-32LE-BOM") == 0){
1441                         output_conv = w_oconv32;
1442                         output_endian = ENDIAN_LITTLE;
1443                         output_bom_f = TRUE;
1444 #endif
1445                     } else {
1446                         fprintf(stderr, "unknown output encoding: %s\n", codeset);
1447                     }
1448                     continue;
1449                 }
1450                 if (strcmp(long_option[i].name, "guess=") == 0){
1451                     if (p[0] == '1') {
1452                         guess_f = 2;
1453                     } else {
1454                         guess_f = 1;
1455                     }
1456                     continue;
1457                 }
1458 #ifdef OVERWRITE
1459                 if (strcmp(long_option[i].name, "overwrite") == 0){
1460                     file_out_f = TRUE;
1461                     overwrite_f = TRUE;
1462                     preserve_time_f = TRUE;
1463                     continue;
1464                 }
1465                 if (strcmp(long_option[i].name, "overwrite=") == 0){
1466                     file_out_f = TRUE;
1467                     overwrite_f = TRUE;
1468                     preserve_time_f = TRUE;
1469                     backup_f = TRUE;
1470                     backup_suffix = malloc(strlen((char *) p) + 1);
1471                     strcpy(backup_suffix, (char *) p);
1472                     continue;
1473                 }
1474                 if (strcmp(long_option[i].name, "in-place") == 0){
1475                     file_out_f = TRUE;
1476                     overwrite_f = TRUE;
1477                     preserve_time_f = FALSE;
1478                     continue;
1479                 }
1480                 if (strcmp(long_option[i].name, "in-place=") == 0){
1481                     file_out_f = TRUE;
1482                     overwrite_f = TRUE;
1483                     preserve_time_f = FALSE;
1484                     backup_f = TRUE;
1485                     backup_suffix = malloc(strlen((char *) p) + 1);
1486                     strcpy(backup_suffix, (char *) p);
1487                     continue;
1488                 }
1489 #endif
1490 #ifdef INPUT_OPTION
1491                 if (strcmp(long_option[i].name, "cap-input") == 0){
1492                     cap_f = TRUE;
1493                     continue;
1494                 }
1495                 if (strcmp(long_option[i].name, "url-input") == 0){
1496                     url_f = TRUE;
1497                     continue;
1498                 }
1499 #endif
1500 #ifdef NUMCHAR_OPTION
1501                 if (strcmp(long_option[i].name, "numchar-input") == 0){
1502                     numchar_f = TRUE;
1503                     continue;
1504                 }
1505 #endif
1506 #ifdef CHECK_OPTION
1507                 if (strcmp(long_option[i].name, "no-output") == 0){
1508                     noout_f = TRUE;
1509                     continue;
1510                 }
1511                 if (strcmp(long_option[i].name, "debug") == 0){
1512                     debug_f = TRUE;
1513                     continue;
1514                 }
1515 #endif
1516                 if (strcmp(long_option[i].name, "cp932") == 0){
1517 #ifdef SHIFTJIS_CP932
1518                     cp51932_f = TRUE;
1519                     cp932inv_f = TRUE;
1520 #endif
1521 #ifdef UTF8_OUTPUT_ENABLE
1522                     ms_ucs_map_f = UCS_MAP_CP932;
1523 #endif
1524                     continue;
1525                 }
1526                 if (strcmp(long_option[i].name, "no-cp932") == 0){
1527 #ifdef SHIFTJIS_CP932
1528                     cp51932_f = FALSE;
1529                     cp932inv_f = FALSE;
1530 #endif
1531 #ifdef UTF8_OUTPUT_ENABLE
1532                     ms_ucs_map_f = UCS_MAP_ASCII;
1533 #endif
1534                     continue;
1535                 }
1536 #ifdef SHIFTJIS_CP932
1537                 if (strcmp(long_option[i].name, "cp932inv") == 0){
1538                     cp932inv_f = TRUE;
1539                     continue;
1540                 }
1541 #endif
1542
1543 #ifdef X0212_ENABLE
1544                 if (strcmp(long_option[i].name, "x0212") == 0){
1545                     x0212_f = TRUE;
1546                     continue;
1547                 }
1548 #endif
1549
1550 #ifdef EXEC_IO
1551                   if (strcmp(long_option[i].name, "exec-in") == 0){
1552                       exec_f = 1;
1553                       return;
1554                   }
1555                   if (strcmp(long_option[i].name, "exec-out") == 0){
1556                       exec_f = -1;
1557                       return;
1558                   }
1559 #endif
1560 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1561                 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
1562                     no_cp932ext_f = TRUE;
1563                     continue;
1564                 }
1565                 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
1566                     no_best_fit_chars_f = TRUE;
1567                     continue;
1568                 }
1569                 if (strcmp(long_option[i].name, "fb-skip") == 0){
1570                     encode_fallback = NULL;
1571                     continue;
1572                 }
1573                 if (strcmp(long_option[i].name, "fb-html") == 0){
1574                     encode_fallback = encode_fallback_html;
1575                     continue;
1576                 }
1577                 if (strcmp(long_option[i].name, "fb-xml") == 0){
1578                     encode_fallback = encode_fallback_xml;
1579                     continue;
1580                 }
1581                 if (strcmp(long_option[i].name, "fb-java") == 0){
1582                     encode_fallback = encode_fallback_java;
1583                     continue;
1584                 }
1585                 if (strcmp(long_option[i].name, "fb-perl") == 0){
1586                     encode_fallback = encode_fallback_perl;
1587                     continue;
1588                 }
1589                 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1590                     encode_fallback = encode_fallback_subchar;
1591                     continue;
1592                 }
1593                 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1594                     encode_fallback = encode_fallback_subchar;
1595                     unicode_subchar = 0;
1596                     if (p[0] != '0'){
1597                         /* decimal number */
1598                         for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1599                             unicode_subchar *= 10;
1600                             unicode_subchar += hex2bin(p[i]);
1601                         }
1602                     }else if(p[1] == 'x' || p[1] == 'X'){
1603                         /* hexadecimal number */
1604                         for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1605                             unicode_subchar <<= 4;
1606                             unicode_subchar |= hex2bin(p[i]);
1607                         }
1608                     }else{
1609                         /* octal number */
1610                         for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1611                             unicode_subchar *= 8;
1612                             unicode_subchar += hex2bin(p[i]);
1613                         }
1614                     }
1615                     w16e_conv(unicode_subchar, &i, &j);
1616                     unicode_subchar = i<<8 | j;
1617                     continue;
1618                 }
1619 #endif
1620 #ifdef UTF8_OUTPUT_ENABLE
1621                 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1622                     ms_ucs_map_f = UCS_MAP_MS;
1623                     continue;
1624                 }
1625 #endif
1626 #ifdef UNICODE_NORMALIZATION
1627                 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1628                     input_f = UTF8_INPUT;
1629                     nfc_f = TRUE;
1630                     continue;
1631                 }
1632 #endif
1633                 if (strcmp(long_option[i].name, "prefix=") == 0){
1634                     if (nkf_isgraph(p[0])){
1635                         for (i = 1; nkf_isgraph(p[i]); i++){
1636                             prefix_table[p[i]] = p[0];
1637                         }
1638                     }
1639                     continue;
1640                 }
1641             }
1642             continue;
1643         case 'b':           /* buffered mode */
1644             unbuf_f = FALSE;
1645             continue;
1646         case 'u':           /* non bufferd mode */
1647             unbuf_f = TRUE;
1648             continue;
1649         case 't':           /* transparent mode */
1650             if (*cp=='1') {
1651                 /* alias of -t */
1652                 nop_f = TRUE;
1653                 *cp++;
1654             } else if (*cp=='2') {
1655                 /*
1656                  * -t with put/get
1657                  *
1658                  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
1659                  *
1660                  */
1661                 nop_f = 2;
1662                 *cp++;
1663             } else
1664                 nop_f = TRUE;
1665             continue;
1666         case 'j':           /* JIS output */
1667         case 'n':
1668             output_conv = j_oconv;
1669             continue;
1670         case 'e':           /* AT&T EUC output */
1671             output_conv = e_oconv;
1672             cp932inv_f = FALSE;
1673             continue;
1674         case 's':           /* SJIS output */
1675             output_conv = s_oconv;
1676             continue;
1677         case 'l':           /* ISO8859 Latin-1 support, no conversion */
1678             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
1679             input_f = LATIN1_INPUT;
1680             continue;
1681         case 'i':           /* Kanji IN ESC-$-@/B */
1682             if (*cp=='@'||*cp=='B')
1683                 kanji_intro = *cp++;
1684             continue;
1685         case 'o':           /* ASCII IN ESC-(-J/B */
1686             if (*cp=='J'||*cp=='B'||*cp=='H')
1687                 ascii_intro = *cp++;
1688             continue;
1689         case 'h':
1690             /*
1691                 bit:1   katakana->hiragana
1692                 bit:2   hiragana->katakana
1693             */
1694             if ('9'>= *cp && *cp>='0')
1695                 hira_f |= (*cp++ -'0');
1696             else
1697                 hira_f |= 1;
1698             continue;
1699         case 'r':
1700             rot_f = TRUE;
1701             continue;
1702 #if defined(MSDOS) || defined(__OS2__)
1703         case 'T':
1704             binmode_f = FALSE;
1705             continue;
1706 #endif
1707 #ifndef PERL_XS
1708         case 'V':
1709             version();
1710             exit(1);
1711             break;
1712         case 'v':
1713             usage();
1714             exit(1);
1715             break;
1716 #endif
1717 #ifdef UTF8_OUTPUT_ENABLE
1718         case 'w':           /* UTF-8 output */
1719             if (cp[0] == '8') {
1720                 output_conv = w_oconv; cp++;
1721                 if (cp[0] == '0'){
1722                     cp++;
1723                 } else {
1724                     output_bom_f = TRUE;
1725                 }
1726             } else {
1727                 if ('1'== cp[0] && '6'==cp[1]) {
1728                     output_conv = w_oconv16; cp+=2;
1729                 } else if ('3'== cp[0] && '2'==cp[1]) {
1730                     output_conv = w_oconv32; cp+=2;
1731                 } else {
1732                     output_conv = w_oconv;
1733                     continue;
1734                 }
1735                 if (cp[0]=='L') {
1736                     cp++;
1737                     output_endian = ENDIAN_LITTLE;
1738                 } else if (cp[0] == 'B') {
1739                     cp++;
1740                 } else {
1741                     continue;
1742                 }
1743                 if (cp[0] == '0'){
1744                     cp++;
1745                 } else {
1746                     output_bom_f = TRUE;
1747                 }
1748             }
1749             continue;
1750 #endif
1751 #ifdef UTF8_INPUT_ENABLE
1752         case 'W':           /* UTF input */
1753             if (cp[0] == '8') {
1754                 cp++;
1755                 input_f = UTF8_INPUT;
1756             }else{
1757                 if ('1'== cp[0] && '6'==cp[1]) {
1758                     cp += 2;
1759                     input_f = UTF16_INPUT;
1760                     input_endian = ENDIAN_BIG;
1761                 } else if ('3'== cp[0] && '2'==cp[1]) {
1762                     cp += 2;
1763                     input_f = UTF32_INPUT;
1764                     input_endian = ENDIAN_BIG;
1765                 } else {
1766                     input_f = UTF8_INPUT;
1767                     continue;
1768                 }
1769                 if (cp[0]=='L') {
1770                     cp++;
1771                     input_endian = ENDIAN_LITTLE;
1772                 } else if (cp[0] == 'B') {
1773                     cp++;
1774                 }
1775             }
1776             continue;
1777 #endif
1778         /* Input code assumption */
1779         case 'J':   /* JIS input */
1780             input_f = JIS_INPUT;
1781             continue;
1782         case 'E':   /* AT&T EUC input */
1783             input_f = EUC_INPUT;
1784             continue;
1785         case 'S':   /* MS Kanji input */
1786             input_f = SJIS_INPUT;
1787             if (x0201_f==NO_X0201) x0201_f=TRUE;
1788             continue;
1789         case 'Z':   /* Convert X0208 alphabet to asii */
1790             /* alpha_f
1791                bit:0   Convert JIS X 0208 Alphabet to ASCII
1792                bit:1   Convert Kankaku to one space
1793                bit:2   Convert Kankaku to two spaces
1794                bit:3   Convert HTML Entity
1795                bit:4   Convert JIS X 0208 Katakana to JIS X 0201 Katakana
1796             */
1797             while ('0'<= *cp && *cp <='9') {
1798                 alpha_f |= 1 << (*cp++ - '0');
1799             }
1800             if (!alpha_f) alpha_f = 1;
1801             continue;
1802         case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
1803             x0201_f = FALSE;    /* No X0201->X0208 conversion */
1804             /* accept  X0201
1805                     ESC-(-I     in JIS, EUC, MS Kanji
1806                     SI/SO       in JIS, EUC, MS Kanji
1807                     SSO         in EUC, JIS, not in MS Kanji
1808                     MS Kanji (0xa0-0xdf)
1809                output  X0201
1810                     ESC-(-I     in JIS (0x20-0x5f)
1811                     SSO         in EUC (0xa0-0xdf)
1812                     0xa0-0xd    in MS Kanji (0xa0-0xdf)
1813             */
1814             continue;
1815         case 'X':   /* Assume X0201 kana */
1816             /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1817             x0201_f = TRUE;
1818             continue;
1819         case 'F':   /* prserve new lines */
1820             fold_preserve_f = TRUE;
1821         case 'f':   /* folding -f60 or -f */
1822             fold_f = TRUE;
1823             fold_len = 0;
1824             while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1825                 fold_len *= 10;
1826                 fold_len += *cp++ - '0';
1827             }
1828             if (!(0<fold_len && fold_len<BUFSIZ))
1829                 fold_len = DEFAULT_FOLD;
1830             if (*cp=='-') {
1831                 fold_margin = 0;
1832                 cp++;
1833                 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1834                     fold_margin *= 10;
1835                     fold_margin += *cp++ - '0';
1836                 }
1837             }
1838             continue;
1839         case 'm':   /* MIME support */
1840             /* mime_decode_f = TRUE; */ /* this has too large side effects... */
1841             if (*cp=='B'||*cp=='Q') {
1842                 mime_decode_mode = *cp++;
1843                 mimebuf_f = FIXED_MIME;
1844             } else if (*cp=='N') {
1845                 mime_f = TRUE; cp++;
1846             } else if (*cp=='S') {
1847                 mime_f = STRICT_MIME; cp++;
1848             } else if (*cp=='0') {
1849                 mime_decode_f = FALSE;
1850                 mime_f = FALSE; cp++;
1851             }
1852             continue;
1853         case 'M':   /* MIME output */
1854             if (*cp=='B') {
1855                 mimeout_mode = 'B';
1856                 mimeout_f = FIXED_MIME; cp++;
1857             } else if (*cp=='Q') {
1858                 mimeout_mode = 'Q';
1859                 mimeout_f = FIXED_MIME; cp++;
1860             } else {
1861                 mimeout_f = TRUE;
1862             }
1863             continue;
1864         case 'B':   /* Broken JIS support */
1865             /*  bit:0   no ESC JIS
1866                 bit:1   allow any x on ESC-(-x or ESC-$-x
1867                 bit:2   reset to ascii on NL
1868             */
1869             if ('9'>= *cp && *cp>='0')
1870                 broken_f |= 1<<(*cp++ -'0');
1871             else
1872                 broken_f |= TRUE;
1873             continue;
1874 #ifndef PERL_XS
1875         case 'O':/* for Output file */
1876             file_out_f = TRUE;
1877             continue;
1878 #endif
1879         case 'c':/* add cr code */
1880             nlmode_f = CRLF;
1881             continue;
1882         case 'd':/* delete cr code */
1883             nlmode_f = LF;
1884             continue;
1885         case 'I':   /* ISO-2022-JP output */
1886             iso2022jp_f = TRUE;
1887             continue;
1888         case 'L':  /* line mode */
1889             if (*cp=='u') {         /* unix */
1890                 nlmode_f = LF; cp++;
1891             } else if (*cp=='m') { /* mac */
1892                 nlmode_f = CR; cp++;
1893             } else if (*cp=='w') { /* windows */
1894                 nlmode_f = CRLF; cp++;
1895             } else if (*cp=='0') { /* no conversion  */
1896                 nlmode_f = 0; cp++;
1897             }
1898             continue;
1899 #ifndef PERL_XS
1900         case 'g':
1901             if (*cp == '1') {
1902                 guess_f = 2;
1903                 cp++;
1904             } else if (*cp == '0') {
1905                 guess_f = 1;
1906                 cp++;
1907             } else {
1908                 guess_f = 1;
1909             }
1910             continue;
1911 #endif
1912         case SP:
1913         /* module muliple options in a string are allowed for Perl moudle  */
1914             while(*cp && *cp++!='-');
1915             continue;
1916         default:
1917             fprintf(stderr, "unknown option: -%c\n", *(cp-1));
1918             /* bogus option but ignored */
1919             continue;
1920         }
1921     }
1922 }
1923
1924 struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1925 {
1926     if (iconv_func){
1927         struct input_code *p = input_code_list;
1928         while (p->name){
1929             if (iconv_func == p->iconv_func){
1930                 return p;
1931             }
1932             p++;
1933         }
1934     }
1935     return 0;
1936 }
1937
1938 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1939 {
1940 #ifdef INPUT_CODE_FIX
1941     if (f || !input_f)
1942 #endif
1943         if (estab_f != f){
1944             estab_f = f;
1945         }
1946
1947     if (iconv_func
1948 #ifdef INPUT_CODE_FIX
1949         && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1950 #endif
1951         ){
1952         iconv = iconv_func;
1953     }
1954 #ifdef CHECK_OPTION
1955     if (estab_f && iconv_for_check != iconv){
1956         struct input_code *p = find_inputcode_byfunc(iconv);
1957         if (p){
1958             set_input_codename(p->name);
1959             debug(p->name);
1960         }
1961         iconv_for_check = iconv;
1962     }
1963 #endif
1964 }
1965
1966 #define SCORE_L2       (1)                   /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
1967 #define SCORE_KANA     (SCORE_L2 << 1)       /* \e$B$$$o$f$kH>3Q%+%J\e(B */
1968 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* \e$B5!<o0MB8J8;z\e(B */
1969 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* CP932 \e$B$K$h$kFI$_49$(\e(B (IBM extended characters) */
1970 #define SCORE_X0212    (SCORE_CP932 << 1)    /* JIS X 0212 */
1971 #define SCORE_NO_EXIST (SCORE_X0212 << 1)    /* \e$BB8:_$7$J$$J8;z\e(B */
1972 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
1973 #define SCORE_ERROR    (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
1974
1975 #define SCORE_INIT (SCORE_iMIME)
1976
1977 static const char score_table_A0[] = {
1978     0, 0, 0, 0,
1979     0, 0, 0, 0,
1980     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1981     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1982 };
1983
1984 static const char score_table_F0[] = {
1985     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1986     SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1987     SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
1988     SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1989 };
1990
1991 void set_code_score(struct input_code *ptr, nkf_char score)
1992 {
1993     if (ptr){
1994         ptr->score |= score;
1995     }
1996 }
1997
1998 void clr_code_score(struct input_code *ptr, nkf_char score)
1999 {
2000     if (ptr){
2001         ptr->score &= ~score;
2002     }
2003 }
2004
2005 void code_score(struct input_code *ptr)
2006 {
2007     nkf_char c2 = ptr->buf[0];
2008 #ifdef UTF8_OUTPUT_ENABLE
2009     nkf_char c1 = ptr->buf[1];
2010 #endif
2011     if (c2 < 0){
2012         set_code_score(ptr, SCORE_ERROR);
2013     }else if (c2 == SSO){
2014         set_code_score(ptr, SCORE_KANA);
2015     }else if (c2 == 0x8f){
2016         set_code_score(ptr, SCORE_X0212);
2017 #ifdef UTF8_OUTPUT_ENABLE
2018     }else if (!e2w_conv(c2, c1)){
2019         set_code_score(ptr, SCORE_NO_EXIST);
2020 #endif
2021     }else if ((c2 & 0x70) == 0x20){
2022         set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2023     }else if ((c2 & 0x70) == 0x70){
2024         set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2025     }else if ((c2 & 0x70) >= 0x50){
2026         set_code_score(ptr, SCORE_L2);
2027     }
2028 }
2029
2030 void status_disable(struct input_code *ptr)
2031 {
2032     ptr->stat = -1;
2033     ptr->buf[0] = -1;
2034     code_score(ptr);
2035     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2036 }
2037
2038 void status_push_ch(struct input_code *ptr, nkf_char c)
2039 {
2040     ptr->buf[ptr->index++] = c;
2041 }
2042
2043 void status_clear(struct input_code *ptr)
2044 {
2045     ptr->stat = 0;
2046     ptr->index = 0;
2047 }
2048
2049 void status_reset(struct input_code *ptr)
2050 {
2051     status_clear(ptr);
2052     ptr->score = SCORE_INIT;
2053 }
2054
2055 void status_reinit(struct input_code *ptr)
2056 {
2057     status_reset(ptr);
2058     ptr->_file_stat = 0;
2059 }
2060
2061 void status_check(struct input_code *ptr, nkf_char c)
2062 {
2063     if (c <= DEL && estab_f){
2064         status_reset(ptr);
2065     }
2066 }
2067
2068 void s_status(struct input_code *ptr, nkf_char c)
2069 {
2070     switch(ptr->stat){
2071       case -1:
2072           status_check(ptr, c);
2073           break;
2074       case 0:
2075           if (c <= DEL){
2076               break;
2077 #ifdef NUMCHAR_OPTION
2078           }else if (is_unicode_capsule(c)){
2079               break;
2080 #endif
2081           }else if (0xa1 <= c && c <= 0xdf){
2082               status_push_ch(ptr, SSO);
2083               status_push_ch(ptr, c);
2084               code_score(ptr);
2085               status_clear(ptr);
2086           }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
2087               ptr->stat = 1;
2088               status_push_ch(ptr, c);
2089           }else if (0xed <= c && c <= 0xee){
2090               ptr->stat = 3;
2091               status_push_ch(ptr, c);
2092 #ifdef SHIFTJIS_CP932
2093           }else if (is_ibmext_in_sjis(c)){
2094               ptr->stat = 2;
2095               status_push_ch(ptr, c);
2096 #endif /* SHIFTJIS_CP932 */
2097 #ifdef X0212_ENABLE
2098           }else if (0xf0 <= c && c <= 0xfc){
2099               ptr->stat = 1;
2100               status_push_ch(ptr, c);
2101 #endif /* X0212_ENABLE */
2102           }else{
2103               status_disable(ptr);
2104           }
2105           break;
2106       case 1:
2107           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2108               status_push_ch(ptr, c);
2109               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2110               code_score(ptr);
2111               status_clear(ptr);
2112           }else{
2113               status_disable(ptr);
2114           }
2115           break;
2116       case 2:
2117 #ifdef SHIFTJIS_CP932
2118         if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
2119             status_push_ch(ptr, c);
2120             if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
2121                 set_code_score(ptr, SCORE_CP932);
2122                 status_clear(ptr);
2123                 break;
2124             }
2125         }
2126 #endif /* SHIFTJIS_CP932 */
2127         status_disable(ptr);
2128           break;
2129       case 3:
2130           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2131               status_push_ch(ptr, c);
2132               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2133             set_code_score(ptr, SCORE_CP932);
2134             status_clear(ptr);
2135           }else{
2136               status_disable(ptr);
2137           }
2138           break;
2139     }
2140 }
2141
2142 void e_status(struct input_code *ptr, nkf_char c)
2143 {
2144     switch (ptr->stat){
2145       case -1:
2146           status_check(ptr, c);
2147           break;
2148       case 0:
2149           if (c <= DEL){
2150               break;
2151 #ifdef NUMCHAR_OPTION
2152           }else if (is_unicode_capsule(c)){
2153               break;
2154 #endif
2155           }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
2156               ptr->stat = 1;
2157               status_push_ch(ptr, c);
2158 #ifdef X0212_ENABLE
2159           }else if (0x8f == c){
2160               ptr->stat = 2;
2161               status_push_ch(ptr, c);
2162 #endif /* X0212_ENABLE */
2163           }else{
2164               status_disable(ptr);
2165           }
2166           break;
2167       case 1:
2168           if (0xa1 <= c && c <= 0xfe){
2169               status_push_ch(ptr, c);
2170               code_score(ptr);
2171               status_clear(ptr);
2172           }else{
2173               status_disable(ptr);
2174           }
2175           break;
2176 #ifdef X0212_ENABLE
2177       case 2:
2178           if (0xa1 <= c && c <= 0xfe){
2179               ptr->stat = 1;
2180               status_push_ch(ptr, c);
2181           }else{
2182               status_disable(ptr);
2183           }
2184 #endif /* X0212_ENABLE */
2185     }
2186 }
2187
2188 #ifdef UTF8_INPUT_ENABLE
2189 void w_status(struct input_code *ptr, nkf_char c)
2190 {
2191     switch (ptr->stat){
2192       case -1:
2193           status_check(ptr, c);
2194           break;
2195       case 0:
2196           if (c <= DEL){
2197               break;
2198 #ifdef NUMCHAR_OPTION
2199           }else if (is_unicode_capsule(c)){
2200               break;
2201 #endif
2202           }else if (0xc0 <= c && c <= 0xdf){
2203               ptr->stat = 1;
2204               status_push_ch(ptr, c);
2205           }else if (0xe0 <= c && c <= 0xef){
2206               ptr->stat = 2;
2207               status_push_ch(ptr, c);
2208           }else if (0xf0 <= c && c <= 0xf4){
2209               ptr->stat = 3;
2210               status_push_ch(ptr, c);
2211           }else{
2212               status_disable(ptr);
2213           }
2214           break;
2215       case 1:
2216       case 2:
2217           if (0x80 <= c && c <= 0xbf){
2218               status_push_ch(ptr, c);
2219               if (ptr->index > ptr->stat){
2220                   int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2221                              && ptr->buf[2] == 0xbf);
2222                   w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2223                            &ptr->buf[0], &ptr->buf[1]);
2224                   if (!bom){
2225                       code_score(ptr);
2226                   }
2227                   status_clear(ptr);
2228               }
2229           }else{
2230               status_disable(ptr);
2231           }
2232           break;
2233       case 3:
2234         if (0x80 <= c && c <= 0xbf){
2235             if (ptr->index < ptr->stat){
2236                 status_push_ch(ptr, c);
2237             } else {
2238                 status_clear(ptr);
2239             }
2240           }else{
2241               status_disable(ptr);
2242           }
2243           break;
2244     }
2245 }
2246 #endif
2247
2248 void code_status(nkf_char c)
2249 {
2250     int action_flag = 1;
2251     struct input_code *result = 0;
2252     struct input_code *p = input_code_list;
2253     while (p->name){
2254         if (!p->status_func) {
2255             ++p;
2256             continue;
2257         }
2258         if (!p->status_func)
2259             continue;
2260         (p->status_func)(p, c);
2261         if (p->stat > 0){
2262             action_flag = 0;
2263         }else if(p->stat == 0){
2264             if (result){
2265                 action_flag = 0;
2266             }else{
2267                 result = p;
2268             }
2269         }
2270         ++p;
2271     }
2272
2273     if (action_flag){
2274         if (result && !estab_f){
2275             set_iconv(TRUE, result->iconv_func);
2276         }else if (c <= DEL){
2277             struct input_code *ptr = input_code_list;
2278             while (ptr->name){
2279                 status_reset(ptr);
2280                 ++ptr;
2281             }
2282         }
2283     }
2284 }
2285
2286 #ifndef WIN32DLL
2287 nkf_char std_getc(FILE *f)
2288 {
2289     if (std_gc_ndx){
2290         return std_gc_buf[--std_gc_ndx];
2291     }
2292     return getc(f);
2293 }
2294 #endif /*WIN32DLL*/
2295
2296 nkf_char std_ungetc(nkf_char c, FILE *f)
2297 {
2298     if (std_gc_ndx == STD_GC_BUFSIZE){
2299         return EOF;
2300     }
2301     std_gc_buf[std_gc_ndx++] = c;
2302     return c;
2303 }
2304
2305 #ifndef WIN32DLL
2306 void std_putc(nkf_char c)
2307 {
2308     if(c!=EOF)
2309       putchar(c);
2310 }
2311 #endif /*WIN32DLL*/
2312
2313 #if !defined(PERL_XS) && !defined(WIN32DLL)
2314 nkf_char noconvert(FILE *f)
2315 {
2316     nkf_char    c;
2317
2318     if (nop_f == 2)
2319         module_connection();
2320     while ((c = (*i_getc)(f)) != EOF)
2321       (*o_putc)(c);
2322     (*o_putc)(EOF);
2323     return 1;
2324 }
2325 #endif
2326
2327 void module_connection(void)
2328 {
2329     oconv = output_conv;
2330     o_putc = std_putc;
2331
2332     /* replace continucation module, from output side */
2333
2334     /* output redicrection */
2335 #ifdef CHECK_OPTION
2336     if (noout_f || guess_f){
2337         o_putc = no_putc;
2338     }
2339 #endif
2340     if (mimeout_f) {
2341         o_mputc = o_putc;
2342         o_putc = mime_putc;
2343         if (mimeout_f == TRUE) {
2344             o_base64conv = oconv; oconv = base64_conv;
2345         }
2346         /* base64_count = 0; */
2347     }
2348
2349     if (nlmode_f || guess_f) {
2350         o_nlconv = oconv; oconv = nl_conv;
2351     }
2352     if (rot_f) {
2353         o_rot_conv = oconv; oconv = rot_conv;
2354     }
2355     if (iso2022jp_f) {
2356         o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2357     }
2358     if (hira_f) {
2359         o_hira_conv = oconv; oconv = hira_conv;
2360     }
2361     if (fold_f) {
2362         o_fconv = oconv; oconv = fold_conv;
2363         f_line = 0;
2364     }
2365     if (alpha_f || x0201_f) {
2366         o_zconv = oconv; oconv = z_conv;
2367     }
2368
2369     i_getc = std_getc;
2370     i_ungetc = std_ungetc;
2371     /* input redicrection */
2372 #ifdef INPUT_OPTION
2373     if (cap_f){
2374         i_cgetc = i_getc; i_getc = cap_getc;
2375         i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2376     }
2377     if (url_f){
2378         i_ugetc = i_getc; i_getc = url_getc;
2379         i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2380     }
2381 #endif
2382 #ifdef NUMCHAR_OPTION
2383     if (numchar_f){
2384         i_ngetc = i_getc; i_getc = numchar_getc;
2385         i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2386     }
2387 #endif
2388 #ifdef UNICODE_NORMALIZATION
2389     if (nfc_f && input_f == UTF8_INPUT){
2390         i_nfc_getc = i_getc; i_getc = nfc_getc;
2391         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2392     }
2393 #endif
2394     if (mime_f && mimebuf_f==FIXED_MIME) {
2395         i_mgetc = i_getc; i_getc = mime_getc;
2396         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2397     }
2398     if (broken_f & 1) {
2399         i_bgetc = i_getc; i_getc = broken_getc;
2400         i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2401     }
2402     if (input_f == JIS_INPUT || input_f == EUC_INPUT || input_f == LATIN1_INPUT) {
2403         set_iconv(-TRUE, e_iconv);
2404     } else if (input_f == SJIS_INPUT) {
2405         set_iconv(-TRUE, s_iconv);
2406 #ifdef UTF8_INPUT_ENABLE
2407     } else if (input_f == UTF8_INPUT) {
2408         set_iconv(-TRUE, w_iconv);
2409     } else if (input_f == UTF16_INPUT) {
2410         set_iconv(-TRUE, w_iconv16);
2411     } else if (input_f == UTF32_INPUT) {
2412         set_iconv(-TRUE, w_iconv32);
2413 #endif
2414     } else {
2415         set_iconv(FALSE, e_iconv);
2416     }
2417
2418     {
2419         struct input_code *p = input_code_list;
2420         while (p->name){
2421             status_reinit(p++);
2422         }
2423     }
2424 }
2425
2426 /*
2427  * Check and Ignore BOM
2428  */
2429 void check_bom(FILE *f)
2430 {
2431     int c2;
2432     switch(c2 = (*i_getc)(f)){
2433     case 0x00:
2434         if((c2 = (*i_getc)(f)) == 0x00){
2435             if((c2 = (*i_getc)(f)) == 0xFE){
2436                 if((c2 = (*i_getc)(f)) == 0xFF){
2437                     if(!input_f){
2438                         set_iconv(TRUE, w_iconv32);
2439                     }
2440                     if (iconv == w_iconv32) {
2441                         input_endian = ENDIAN_BIG;
2442                         return;
2443                     }
2444                     (*i_ungetc)(0xFF,f);
2445                 }else (*i_ungetc)(c2,f);
2446                 (*i_ungetc)(0xFE,f);
2447             }else if(c2 == 0xFF){
2448                 if((c2 = (*i_getc)(f)) == 0xFE){
2449                     if(!input_f){
2450                         set_iconv(TRUE, w_iconv32);
2451                     }
2452                     if (iconv == w_iconv32) {
2453                         input_endian = ENDIAN_2143;
2454                         return;
2455                     }
2456                     (*i_ungetc)(0xFF,f);
2457                 }else (*i_ungetc)(c2,f);
2458                 (*i_ungetc)(0xFF,f);
2459             }else (*i_ungetc)(c2,f);
2460             (*i_ungetc)(0x00,f);
2461         }else (*i_ungetc)(c2,f);
2462         (*i_ungetc)(0x00,f);
2463         break;
2464     case 0xEF:
2465         if((c2 = (*i_getc)(f)) == 0xBB){
2466             if((c2 = (*i_getc)(f)) == 0xBF){
2467                 if(!input_f){
2468                     set_iconv(TRUE, w_iconv);
2469                 }
2470                 if (iconv == w_iconv) {
2471                     return;
2472                 }
2473                 (*i_ungetc)(0xBF,f);
2474             }else (*i_ungetc)(c2,f);
2475             (*i_ungetc)(0xBB,f);
2476         }else (*i_ungetc)(c2,f);
2477         (*i_ungetc)(0xEF,f);
2478         break;
2479     case 0xFE:
2480         if((c2 = (*i_getc)(f)) == 0xFF){
2481             if((c2 = (*i_getc)(f)) == 0x00){
2482                 if((c2 = (*i_getc)(f)) == 0x00){
2483                     if(!input_f){
2484                         set_iconv(TRUE, w_iconv32);
2485                     }
2486                     if (iconv == w_iconv32) {
2487                         input_endian = ENDIAN_3412;
2488                         return;
2489                     }
2490                     (*i_ungetc)(0x00,f);
2491                 }else (*i_ungetc)(c2,f);
2492                 (*i_ungetc)(0x00,f);
2493             }else (*i_ungetc)(c2,f);
2494             if(!input_f){
2495                 set_iconv(TRUE, w_iconv16);
2496             }
2497             if (iconv == w_iconv16) {
2498                 input_endian = ENDIAN_BIG;
2499                 return;
2500             }
2501             (*i_ungetc)(0xFF,f);
2502         }else (*i_ungetc)(c2,f);
2503         (*i_ungetc)(0xFE,f);
2504         break;
2505     case 0xFF:
2506         if((c2 = (*i_getc)(f)) == 0xFE){
2507             if((c2 = (*i_getc)(f)) == 0x00){
2508                 if((c2 = (*i_getc)(f)) == 0x00){
2509                     if(!input_f){
2510                         set_iconv(TRUE, w_iconv32);
2511                     }
2512                     if (iconv == w_iconv32) {
2513                         input_endian = ENDIAN_LITTLE;
2514                         return;
2515                     }
2516                     (*i_ungetc)(0x00,f);
2517                 }else (*i_ungetc)(c2,f);
2518                 (*i_ungetc)(0x00,f);
2519             }else (*i_ungetc)(c2,f);
2520             if(!input_f){
2521                 set_iconv(TRUE, w_iconv16);
2522             }
2523             if (iconv == w_iconv16) {
2524                 input_endian = ENDIAN_LITTLE;
2525                 return;
2526             }
2527             (*i_ungetc)(0xFE,f);
2528         }else (*i_ungetc)(c2,f);
2529         (*i_ungetc)(0xFF,f);
2530         break;
2531     default:
2532         (*i_ungetc)(c2,f);
2533         break;
2534     }
2535 }
2536
2537 /*
2538    Conversion main loop. Code detection only.
2539  */
2540
2541 nkf_char kanji_convert(FILE *f)
2542 {
2543     nkf_char    c3, c2=0, c1, c0=0;
2544     int is_8bit = FALSE;
2545
2546     if(input_f == SJIS_INPUT || input_f == EUC_INPUT
2547 #ifdef UTF8_INPUT_ENABLE
2548        || input_f == UTF8_INPUT || input_f == UTF16_INPUT
2549 #endif
2550       ){
2551         is_8bit = TRUE;
2552     }
2553
2554     input_mode = ASCII;
2555     output_mode = ASCII;
2556     shift_mode = FALSE;
2557
2558 #define NEXT continue      /* no output, get next */
2559 #define SEND ;             /* output c1 and c2, get next */
2560 #define LAST break         /* end of loop, go closing  */
2561
2562     module_connection();
2563     check_bom(f);
2564
2565     while ((c1 = (*i_getc)(f)) != EOF) {
2566 #ifdef INPUT_CODE_FIX
2567         if (!input_f)
2568 #endif
2569             code_status(c1);
2570         if (c2) {
2571             /* second byte */
2572             if (c2 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2573                 /* in case of 8th bit is on */
2574                 if (!estab_f&&!mime_decode_mode) {
2575                     /* in case of not established yet */
2576                     /* It is still ambiguious */
2577                     if (h_conv(f, c2, c1)==EOF)
2578                         LAST;
2579                     else
2580                         c2 = 0;
2581                     NEXT;
2582                 } else {
2583                     /* in case of already established */
2584                     if (c1 < AT) {
2585                         /* ignore bogus code and not CP5022x UCD */
2586                         c2 = 0;
2587                         NEXT;
2588                     } else {
2589                         SEND;
2590                     }
2591                 }
2592             } else
2593                 /* second byte, 7 bit code */
2594                 /* it might be kanji shitfted */
2595                 if ((c1 == DEL) || (c1 <= SP)) {
2596                     /* ignore bogus first code */
2597                     c2 = 0;
2598                     NEXT;
2599                 } else
2600                     SEND;
2601         } else {
2602             /* first byte */
2603 #ifdef UTF8_INPUT_ENABLE
2604             if (iconv == w_iconv16) {
2605                 if (input_endian == ENDIAN_BIG) {
2606                     c2 = c1;
2607                     if ((c1 = (*i_getc)(f)) != EOF) {
2608                         if (0xD8 <= c2 && c2 <= 0xDB) {
2609                             if ((c0 = (*i_getc)(f)) != EOF) {
2610                                 c0 <<= 8;
2611                                 if ((c3 = (*i_getc)(f)) != EOF) {
2612                                     c0 |= c3;
2613                                 } else c2 = EOF;
2614                             } else c2 = EOF;
2615                         }
2616                     } else c2 = EOF;
2617                 } else {
2618                     if ((c2 = (*i_getc)(f)) != EOF) {
2619                         if (0xD8 <= c2 && c2 <= 0xDB) {
2620                             if ((c3 = (*i_getc)(f)) != EOF) {
2621                                 if ((c0 = (*i_getc)(f)) != EOF) {
2622                                     c0 <<= 8;
2623                                     c0 |= c3;
2624                                 } else c2 = EOF;
2625                             } else c2 = EOF;
2626                         }
2627                     } else c2 = EOF;
2628                 }
2629                 SEND;
2630             } else if(iconv == w_iconv32){
2631                 int c3 = c1;
2632                 if((c2 = (*i_getc)(f)) != EOF &&
2633                    (c1 = (*i_getc)(f)) != EOF &&
2634                    (c0 = (*i_getc)(f)) != EOF){
2635                     switch(input_endian){
2636                     case ENDIAN_BIG:
2637                         c1 = (c2&0xFF)<<16 | (c1&0xFF)<<8 | (c0&0xFF);
2638                         break;
2639                     case ENDIAN_LITTLE:
2640                         c1 = (c3&0xFF) | (c2&0xFF)<<8 | (c1&0xFF)<<16;
2641                         break;
2642                     case ENDIAN_2143:
2643                         c1 = (c3&0xFF)<<16 | (c1&0xFF) | (c0&0xFF)<<8;
2644                         break;
2645                     case ENDIAN_3412:
2646                         c1 = (c3&0xFF)<<8 | (c2&0xFF) | (c0&0xFF)<<16;
2647                         break;
2648                     }
2649                     c2 = 0;
2650                 }else{
2651                     c2 = EOF;
2652                 }
2653                 SEND;
2654             } else
2655 #endif
2656 #ifdef NUMCHAR_OPTION
2657             if (is_unicode_capsule(c1)){
2658                 SEND;
2659             } else
2660 #endif
2661             if (c1 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2662                 /* 8 bit code */
2663                 if (!estab_f && !iso8859_f) {
2664                     /* not established yet */
2665                     c2 = c1;
2666                     NEXT;
2667                 } else { /* estab_f==TRUE */
2668                     if (iso8859_f) {
2669                         c2 = ISO8859_1;
2670                         c1 &= 0x7f;
2671                         SEND;
2672                     } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2673                         /* SJIS X0201 Case... */
2674                         if(iso2022jp_f && x0201_f==NO_X0201) {
2675                             (*oconv)(GETA1, GETA2);
2676                             NEXT;
2677                         } else {
2678                             c2 = X0201;
2679                             c1 &= 0x7f;
2680                             SEND;
2681                         }
2682                     } else if (c1==SSO && iconv != s_iconv) {
2683                         /* EUC X0201 Case */
2684                         c1 = (*i_getc)(f);  /* skip SSO */
2685                         code_status(c1);
2686                         if (SSP<=c1 && c1<0xe0) {
2687                             if(iso2022jp_f &&  x0201_f==NO_X0201) {
2688                                 (*oconv)(GETA1, GETA2);
2689                                 NEXT;
2690                             } else {
2691                                 c2 = X0201;
2692                                 c1 &= 0x7f;
2693                                 SEND;
2694                             }
2695                         } else  { /* bogus code, skip SSO and one byte */
2696                             NEXT;
2697                         }
2698                     } else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
2699                                (c1 == 0xFD || c1 == 0xFE)) {
2700                         /* CP10001 */
2701                         c2 = X0201;
2702                         c1 &= 0x7f;
2703                         SEND;
2704                     } else {
2705                        /* already established */
2706                        c2 = c1;
2707                        NEXT;
2708                     }
2709                 }
2710             } else if ((c1 > SP) && (c1 != DEL)) {
2711                 /* in case of Roman characters */
2712                 if (shift_mode) {
2713                     /* output 1 shifted byte */
2714                     if (iso8859_f) {
2715                         c2 = ISO8859_1;
2716                         SEND;
2717                     } else if (SP <= c1 && c1 < (0xe0&0x7f)){
2718                       /* output 1 shifted byte */
2719                         if(iso2022jp_f && x0201_f==NO_X0201) {
2720                             (*oconv)(GETA1, GETA2);
2721                             NEXT;
2722                         } else {
2723                             c2 = X0201;
2724                             SEND;
2725                         }
2726                     } else {
2727                         /* look like bogus code */
2728                         NEXT;
2729                     }
2730                 } else if (input_mode == X0208 || input_mode == X0212 ||
2731                            input_mode == X0213_1 || input_mode == X0213_2) {
2732                     /* in case of Kanji shifted */
2733                     c2 = c1;
2734                     NEXT;
2735                 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
2736                     /* Check MIME code */
2737                     if ((c1 = (*i_getc)(f)) == EOF) {
2738                         (*oconv)(0, '=');
2739                         LAST;
2740                     } else if (c1 == '?') {
2741                         /* =? is mime conversion start sequence */
2742                         if(mime_f == STRICT_MIME) {
2743                             /* check in real detail */
2744                             if (mime_begin_strict(f) == EOF)
2745                                 LAST;
2746                             else
2747                                 NEXT;
2748                         } else if (mime_begin(f) == EOF)
2749                             LAST;
2750                         else
2751                             NEXT;
2752                     } else {
2753                         (*oconv)(0, '=');
2754                         (*i_ungetc)(c1,f);
2755                         NEXT;
2756                     }
2757                 } else {
2758                     /* normal ASCII code */
2759                     SEND;
2760                 }
2761             } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
2762                 shift_mode = FALSE;
2763                 NEXT;
2764             } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
2765                 shift_mode = TRUE;
2766                 NEXT;
2767             } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
2768                 if ((c1 = (*i_getc)(f)) == EOF) {
2769                     /*  (*oconv)(0, ESC); don't send bogus code */
2770                     LAST;
2771                 } else if (c1 == '$') {
2772                     if ((c1 = (*i_getc)(f)) == EOF) {
2773                         /*
2774                         (*oconv)(0, ESC); don't send bogus code
2775                         (*oconv)(0, '$'); */
2776                         LAST;
2777                     } else if (c1 == '@'|| c1 == 'B') {
2778                         /* This is kanji introduction */
2779                         input_mode = X0208;
2780                         shift_mode = FALSE;
2781                         set_input_codename("ISO-2022-JP");
2782 #ifdef CHECK_OPTION
2783                         debug("ISO-2022-JP");
2784 #endif
2785                         NEXT;
2786                     } else if (c1 == '(') {
2787                         if ((c1 = (*i_getc)(f)) == EOF) {
2788                             /* don't send bogus code
2789                             (*oconv)(0, ESC);
2790                             (*oconv)(0, '$');
2791                             (*oconv)(0, '(');
2792                                 */
2793                             LAST;
2794                         } else if (c1 == '@'|| c1 == 'B') {
2795                             /* This is kanji introduction */
2796                             input_mode = X0208;
2797                             shift_mode = FALSE;
2798                             NEXT;
2799 #ifdef X0212_ENABLE
2800                         } else if (c1 == 'D'){
2801                             input_mode = X0212;
2802                             shift_mode = FALSE;
2803                             NEXT;
2804 #endif /* X0212_ENABLE */
2805                         } else if (c1 == (X0213_1&0x7F)){
2806                             input_mode = X0213_1;
2807                             shift_mode = FALSE;
2808                             NEXT;
2809                         } else if (c1 == (X0213_2&0x7F)){
2810                             input_mode = X0213_2;
2811                             shift_mode = FALSE;
2812                             NEXT;
2813                         } else {
2814                             /* could be some special code */
2815                             (*oconv)(0, ESC);
2816                             (*oconv)(0, '$');
2817                             (*oconv)(0, '(');
2818                             (*oconv)(0, c1);
2819                             NEXT;
2820                         }
2821                     } else if (broken_f&0x2) {
2822                         /* accept any ESC-(-x as broken code ... */
2823                         input_mode = X0208;
2824                         shift_mode = FALSE;
2825                         NEXT;
2826                     } else {
2827                         (*oconv)(0, ESC);
2828                         (*oconv)(0, '$');
2829                         (*oconv)(0, c1);
2830                         NEXT;
2831                     }
2832                 } else if (c1 == '(') {
2833                     if ((c1 = (*i_getc)(f)) == EOF) {
2834                         /* don't send bogus code
2835                         (*oconv)(0, ESC);
2836                         (*oconv)(0, '('); */
2837                         LAST;
2838                     } else {
2839                         if (c1 == 'I') {
2840                             /* This is X0201 kana introduction */
2841                             input_mode = X0201; shift_mode = X0201;
2842                             NEXT;
2843                         } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2844                             /* This is X0208 kanji introduction */
2845                             input_mode = ASCII; shift_mode = FALSE;
2846                             NEXT;
2847                         } else if (broken_f&0x2) {
2848                             input_mode = ASCII; shift_mode = FALSE;
2849                             NEXT;
2850                         } else {
2851                             (*oconv)(0, ESC);
2852                             (*oconv)(0, '(');
2853                             /* maintain various input_mode here */
2854                             SEND;
2855                         }
2856                     }
2857                } else if ( c1 == 'N' || c1 == 'n'){
2858                    /* SS2 */
2859                    c3 = (*i_getc)(f);  /* skip SS2 */
2860                    if ( (SP<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2861                        c1 = c3;
2862                        c2 = X0201;
2863                        SEND;
2864                    }else{
2865                        (*i_ungetc)(c3, f);
2866                        /* lonely ESC  */
2867                        (*oconv)(0, ESC);
2868                        SEND;
2869                    }
2870                 } else {
2871                     /* lonely ESC  */
2872                     (*oconv)(0, ESC);
2873                     SEND;
2874                 }
2875             } else if (c1 == ESC && iconv == s_iconv) {
2876                 /* ESC in Shift_JIS */
2877                 if ((c1 = (*i_getc)(f)) == EOF) {
2878                     /*  (*oconv)(0, ESC); don't send bogus code */
2879                     LAST;
2880                 } else if (c1 == '$') {
2881                     /* J-PHONE emoji */
2882                     if ((c1 = (*i_getc)(f)) == EOF) {
2883                         /*
2884                            (*oconv)(0, ESC); don't send bogus code
2885                            (*oconv)(0, '$'); */
2886                         LAST;
2887                     } else {
2888                         if (('E' <= c1 && c1 <= 'G') ||
2889                             ('O' <= c1 && c1 <= 'Q')) {
2890                             /*
2891                                NUM : 0 1 2 3 4 5
2892                                BYTE: G E F O P Q
2893                                C%7 : 1 6 0 2 3 4
2894                                C%7 : 0 1 2 3 4 5 6
2895                                NUM : 2 0 3 4 5 X 1
2896                              */
2897                             static const char jphone_emoji_first_table[7] = {2, 0, 3, 4, 5, 0, 1};
2898                             c0 = (jphone_emoji_first_table[c1 % 7] << 8) - SP + 0xE000 + CLASS_UNICODE;
2899                             while ((c1 = (*i_getc)(f)) != EOF) {
2900                                 if (SP <= c1 && c1 <= 'z') {
2901                                     (*oconv)(0, c1 + c0);
2902                                 } else break; /* c1 == SO */
2903                             }
2904                         }
2905                     }
2906                     if (c1 == EOF) LAST;
2907                     NEXT;
2908                 } else {
2909                     /* lonely ESC  */
2910                     (*oconv)(0, ESC);
2911                     SEND;
2912                 }
2913             } else if (c1 == LF || c1 == CR) {
2914                 if (broken_f&4) {
2915                     input_mode = ASCII; set_iconv(FALSE, 0);
2916                     SEND;
2917                 } else if (mime_decode_f && !mime_decode_mode){
2918                     if (c1 == LF) {
2919                         if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
2920                             i_ungetc(SP,f);
2921                             continue;
2922                         } else {
2923                             i_ungetc(c1,f);
2924                         }
2925                         c1 = LF;
2926                         SEND;
2927                     } else  { /* if (c1 == CR)*/
2928                         if ((c1=(*i_getc)(f))!=EOF) {
2929                             if (c1==SP) {
2930                                 i_ungetc(SP,f);
2931                                 continue;
2932                             } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
2933                                 i_ungetc(SP,f);
2934                                 continue;
2935                             } else {
2936                                 i_ungetc(c1,f);
2937                             }
2938                             i_ungetc(LF,f);
2939                         } else {
2940                             i_ungetc(c1,f);
2941                         }
2942                         c1 = CR;
2943                         SEND;
2944                     }
2945                 }
2946             } else if (c1 == DEL && input_mode == X0208) {
2947                 /* CP5022x */
2948                 c2 = c1;
2949                 NEXT;
2950             } else
2951                 SEND;
2952         }
2953         /* send: */
2954         switch(input_mode){
2955         case ASCII:
2956             switch ((*iconv)(c2, c1, c0)) {  /* can be EUC / SJIS / UTF-8 / UTF-16 */
2957             case -2:
2958                 /* 4 bytes UTF-8 */
2959                 if ((c0 = (*i_getc)(f)) != EOF) {
2960                     code_status(c0);
2961                     c0 <<= 8;
2962                     if ((c3 = (*i_getc)(f)) != EOF) {
2963                         code_status(c3);
2964                         (*iconv)(c2, c1, c0|c3);
2965                     }
2966                 }
2967                 break;
2968             case -1:
2969                 /* 3 bytes EUC or UTF-8 */
2970                 if ((c0 = (*i_getc)(f)) != EOF) {
2971                     code_status(c0);
2972                     (*iconv)(c2, c1, c0);
2973                 }
2974                 break;
2975             }
2976             break;
2977         case X0208:
2978         case X0213_1:
2979             if (ms_ucs_map_f &&
2980                 0x7F <= c2 && c2 <= 0x92 &&
2981                 0x21 <= c1 && c1 <= 0x7E) {
2982                 /* CP932 UDC */
2983                 if(c1 == 0x7F) return 0;
2984                 c1 = (c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000 + CLASS_UNICODE;
2985                 c2 = 0;
2986             }
2987             (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2988             break;
2989 #ifdef X0212_ENABLE
2990         case X0212:
2991             (*oconv)(PREFIX_EUCG3 | c2, c1);
2992             break;
2993 #endif /* X0212_ENABLE */
2994         case X0213_2:
2995             (*oconv)(PREFIX_EUCG3 | c2, c1);
2996             break;
2997         default:
2998             (*oconv)(input_mode, c1);  /* other special case */
2999         }
3000
3001         c2 = 0;
3002         c0 = 0;
3003         continue;
3004         /* goto next_word */
3005     }
3006
3007     /* epilogue */
3008     (*iconv)(EOF, 0, 0);
3009     if (!input_codename)
3010     {
3011         if (is_8bit) {
3012             struct input_code *p = input_code_list;
3013             struct input_code *result = p;
3014             while (p->name){
3015                 if (p->score < result->score) result = p;
3016                 ++p;
3017             }
3018             set_input_codename(result->name);
3019 #ifdef CHECK_OPTION
3020             debug(result->name);
3021 #endif
3022         }
3023     }
3024     return 1;
3025 }
3026
3027 nkf_char
3028 h_conv(FILE *f, nkf_char c2, nkf_char c1)
3029 {
3030     nkf_char ret, c3, c0;
3031     int hold_index;
3032
3033
3034     /** it must NOT be in the kanji shifte sequence      */
3035     /** it must NOT be written in JIS7                   */
3036     /** and it must be after 2 byte 8bit code            */
3037
3038     hold_count = 0;
3039     push_hold_buf(c2);
3040     push_hold_buf(c1);
3041
3042     while ((c1 = (*i_getc)(f)) != EOF) {
3043         if (c1 == ESC){
3044             (*i_ungetc)(c1,f);
3045             break;
3046         }
3047         code_status(c1);
3048         if (push_hold_buf(c1) == EOF || estab_f){
3049             break;
3050         }
3051     }
3052
3053     if (!estab_f){
3054         struct input_code *p = input_code_list;
3055         struct input_code *result = p;
3056         if (c1 == EOF){
3057             code_status(c1);
3058         }
3059         while (p->name){
3060             if (p->status_func && p->score < result->score){
3061                 result = p;
3062             }
3063             ++p;
3064         }
3065         set_iconv(TRUE, result->iconv_func);
3066     }
3067
3068
3069     /** now,
3070      ** 1) EOF is detected, or
3071      ** 2) Code is established, or
3072      ** 3) Buffer is FULL (but last word is pushed)
3073      **
3074      ** in 1) and 3) cases, we continue to use
3075      ** Kanji codes by oconv and leave estab_f unchanged.
3076      **/
3077
3078     ret = c1;
3079     hold_index = 0;
3080     while (hold_index < hold_count){
3081         c2 = hold_buf[hold_index++];
3082         if (c2 <= DEL
3083 #ifdef NUMCHAR_OPTION
3084             || is_unicode_capsule(c2)
3085 #endif
3086             ){
3087             (*iconv)(0, c2, 0);
3088             continue;
3089         }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
3090             (*iconv)(X0201, c2, 0);
3091             continue;
3092         }
3093         if (hold_index < hold_count){
3094             c1 = hold_buf[hold_index++];
3095         }else{
3096             c1 = (*i_getc)(f);
3097             if (c1 == EOF){
3098                 c3 = EOF;
3099                 break;
3100             }
3101             code_status(c1);
3102         }
3103         c0 = 0;
3104         switch ((*iconv)(c2, c1, 0)) {  /* can be EUC/SJIS/UTF-8 */
3105         case -2:
3106             /* 4 bytes UTF-8 */
3107             if (hold_index < hold_count){
3108                 c0 = hold_buf[hold_index++];
3109             } else if ((c0 = (*i_getc)(f)) == EOF) {
3110                 ret = EOF;
3111                 break;
3112             } else {
3113                 code_status(c0);
3114                 c0 <<= 8;
3115                 if (hold_index < hold_count){
3116                     c3 = hold_buf[hold_index++];
3117                 } else if ((c3 = (*i_getc)(f)) == EOF) {
3118                     c0 = ret = EOF;
3119                     break;
3120                 } else {
3121                     code_status(c3);
3122                     (*iconv)(c2, c1, c0|c3);
3123                 }
3124             }
3125             break;
3126         case -1:
3127             /* 3 bytes EUC or UTF-8 */
3128             if (hold_index < hold_count){
3129                 c0 = hold_buf[hold_index++];
3130             } else if ((c0 = (*i_getc)(f)) == EOF) {
3131                 ret = EOF;
3132                 break;
3133             } else {
3134                 code_status(c0);
3135             }
3136             (*iconv)(c2, c1, c0);
3137             break;
3138         }
3139         if (c0 == EOF) break;
3140     }
3141     return ret;
3142 }
3143
3144 nkf_char push_hold_buf(nkf_char c2)
3145 {
3146     if (hold_count >= HOLD_SIZE*2)
3147         return (EOF);
3148     hold_buf[hold_count++] = (unsigned char)c2;
3149     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3150 }
3151
3152 nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
3153 {
3154 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
3155     nkf_char val;
3156 #endif
3157     static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
3158 #ifdef SHIFTJIS_CP932
3159     if (!cp932inv_f && is_ibmext_in_sjis(c2)){
3160         val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
3161         if (val){
3162             c2 = val >> 8;
3163             c1 = val & 0xff;
3164         }
3165     }
3166     if (cp932inv_f
3167         && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3168         nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3169         if (c){
3170             c2 = c >> 8;
3171             c1 = c & 0xff;
3172         }
3173     }
3174 #endif /* SHIFTJIS_CP932 */
3175 #ifdef X0212_ENABLE
3176     if (!x0213_f && is_ibmext_in_sjis(c2)){
3177         val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
3178         if (val){
3179             if (val > 0x7FFF){
3180                 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
3181                 c1 = val & 0xff;
3182             }else{
3183                 c2 = val >> 8;
3184                 c1 = val & 0xff;
3185             }
3186             if (p2) *p2 = c2;
3187             if (p1) *p1 = c1;
3188             return 0;
3189         }
3190     }
3191 #endif
3192     if(c2 >= 0x80){
3193         if(x0213_f && c2 >= 0xF0){
3194             if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
3195                 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
3196             }else{ /* 78<=k<=94 */
3197                 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
3198                 if (0x9E < c1) c2++;
3199             }
3200         }else{
3201             c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
3202             if (0x9E < c1) c2++;
3203         }
3204         if (c1 < 0x9F)
3205             c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
3206         else {
3207             c1 = c1 - 0x7E;
3208         }
3209     }
3210
3211 #ifdef X0212_ENABLE
3212     c2 = x0212_unshift(c2);
3213 #endif
3214     if (p2) *p2 = c2;
3215     if (p1) *p1 = c1;
3216     return 0;
3217 }
3218
3219 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3220 {
3221     if (c2 == X0201) {
3222         c1 &= 0x7f;
3223     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3224         /* NOP */
3225     } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
3226         /* CP932 UDC */
3227         if(c1 == 0x7F) return 0;
3228         c1 = (c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000 + CLASS_UNICODE;
3229         c2 = 0;
3230     } else {
3231         nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
3232         if (ret) return ret;
3233     }
3234     (*oconv)(c2, c1);
3235     return 0;
3236 }
3237
3238 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3239 {
3240     if (c2 == X0201) {
3241         c1 &= 0x7f;
3242 #ifdef X0212_ENABLE
3243     }else if (c2 == 0x8f){
3244         if (c0 == 0){
3245             return -1;
3246         }
3247         if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
3248             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3249             c1 = (c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC + CLASS_UNICODE;
3250             c2 = 0;
3251         } else {
3252             c2 = (c2 << 8) | (c1 & 0x7f);
3253             c1 = c0 & 0x7f;
3254 #ifdef SHIFTJIS_CP932
3255             if (cp51932_f){
3256                 nkf_char s2, s1;
3257                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3258                     s2e_conv(s2, s1, &c2, &c1);
3259                     if (c2 < 0x100){
3260                         c1 &= 0x7f;
3261                         c2 &= 0x7f;
3262                     }
3263                 }
3264             }
3265 #endif /* SHIFTJIS_CP932 */
3266         }
3267 #endif /* X0212_ENABLE */
3268     } else if (c2 == SSO){
3269         c2 = X0201;
3270         c1 &= 0x7f;
3271     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
3272         /* NOP */
3273     } else {
3274         if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
3275             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3276             c1 = (c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000 + CLASS_UNICODE;
3277             c2 = 0;
3278         } else {
3279             c1 &= 0x7f;
3280             c2 &= 0x7f;
3281 #ifdef SHIFTJIS_CP932
3282             if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
3283                 nkf_char s2, s1;
3284                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3285                     s2e_conv(s2, s1, &c2, &c1);
3286                     if (c2 < 0x100){
3287                         c1 &= 0x7f;
3288                         c2 &= 0x7f;
3289                     }
3290                 }
3291             }
3292 #endif /* SHIFTJIS_CP932 */
3293         }
3294     }
3295     (*oconv)(c2, c1);
3296     return 0;
3297 }
3298
3299 #ifdef UTF8_INPUT_ENABLE
3300 nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3301 {
3302     nkf_char ret = 0;
3303
3304     if (!c1){
3305         *p2 = 0;
3306         *p1 = c2;
3307     }else if (0xc0 <= c2 && c2 <= 0xef) {
3308         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3309 #ifdef NUMCHAR_OPTION
3310         if (ret > 0){
3311             if (p2) *p2 = 0;
3312             if (p1) *p1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3313             ret = 0;
3314         }
3315 #endif
3316     }
3317     return ret;
3318 }
3319
3320 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3321 {
3322     nkf_char ret = 0;
3323     static const char w_iconv_utf8_1st_byte[] =
3324     { /* 0xC0 - 0xFF */
3325         20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3326         21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3327         30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
3328         40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
3329
3330     if (c2 < 0 || 0xff < c2) {
3331     }else if (c2 == 0) { /* 0 : 1 byte*/
3332         c0 = 0;
3333     } else if ((c2 & 0xc0) == 0x80) { /* 0x80-0xbf : trail byte */
3334         return 0;
3335     } else{
3336         switch (w_iconv_utf8_1st_byte[c2 - 0xC0]) {
3337         case 21:
3338             if (c1 < 0x80 || 0xBF < c1) return 0;
3339             break;
3340         case 30:
3341             if (c0 == 0) return -1;
3342             if (c1 < 0xA0 || 0xBF < c1 || (c0 & 0xc0) != 0x80)
3343                 return 0;
3344             break;
3345         case 31:
3346         case 33:
3347             if (c0 == 0) return -1;
3348             if ((c1 & 0xc0) != 0x80 || (c0 & 0xc0) != 0x80)
3349                 return 0;
3350             break;
3351         case 32:
3352             if (c0 == 0) return -1;
3353             if (c1 < 0x80 || 0x9F < c1 || (c0 & 0xc0) != 0x80)
3354                 return 0;
3355             break;
3356         case 40:
3357             if (c0 == 0) return -2;
3358             if (c1 < 0x90 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3359                 return 0;
3360             break;
3361         case 41:
3362             if (c0 == 0) return -2;
3363             if (c1 < 0x80 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3364                 return 0;
3365             break;
3366         case 42:
3367             if (c0 == 0) return -2;
3368             if (c1 < 0x80 || 0x8F < c1 || (c0 & 0xc0c0) != 0x8080)
3369                 return 0;
3370             break;
3371         default:
3372             return 0;
3373             break;
3374         }
3375     }
3376     if (c2 == 0 || c2 == EOF){
3377     } else if ((c2 & 0xf8) == 0xf0) { /* 4 bytes */
3378         c1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3379         c2 = 0;
3380     } else {
3381         ret = w2e_conv(c2, c1, c0, &c2, &c1);
3382     }
3383     if (ret == 0){
3384         (*oconv)(c2, c1);
3385     }
3386     return ret;
3387 }
3388 #endif
3389
3390 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3391 void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
3392 {
3393     val &= VALUE_MASK;
3394     if (val < 0x80){
3395         *p2 = val;
3396         *p1 = 0;
3397         *p0 = 0;
3398     }else if (val < 0x800){
3399         *p2 = 0xc0 | (val >> 6);
3400         *p1 = 0x80 | (val & 0x3f);
3401         *p0 = 0;
3402     } else if (val <= NKF_INT32_C(0xFFFF)) {
3403         *p2 = 0xe0 | (val >> 12);
3404         *p1 = 0x80 | ((val >> 6) & 0x3f);
3405         *p0 = 0x80 | (val        & 0x3f);
3406     } else if (val <= NKF_INT32_C(0x10FFFF)) {
3407         *p2 = 0xe0 |  (val >> 16);
3408         *p1 = 0x80 | ((val >> 12) & 0x3f);
3409         *p0 = 0x8080 | ((val << 2) & 0x3f00)| (val & 0x3f);
3410     } else {
3411         *p2 = 0;
3412         *p1 = 0;
3413         *p0 = 0;
3414     }
3415 }
3416 #endif
3417
3418 #ifdef UTF8_INPUT_ENABLE
3419 nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0)
3420 {
3421     nkf_char val;
3422     if (c2 >= 0xf8) {
3423         val = -1;
3424     } else if (c2 >= 0xf0){
3425         /* c2: 1st, c1: 2nd, c0: 3rd/4th */
3426         val = (c2 & 0x0f) << 18;
3427         val |= (c1 & 0x3f) << 12;
3428         val |= (c0 & 0x3f00) >> 2;
3429         val |= (c0 & 0x3f);
3430     }else if (c2 >= 0xe0){
3431         val = (c2 & 0x0f) << 12;
3432         val |= (c1 & 0x3f) << 6;
3433         val |= (c0 & 0x3f);
3434     }else if (c2 >= 0xc0){
3435         val = (c2 & 0x1f) << 6;
3436         val |= (c1 & 0x3f);
3437     }else{
3438         val = c2;
3439     }
3440     return val;
3441 }
3442
3443 nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
3444 {
3445     nkf_char c2, c1, c0;
3446     nkf_char ret = 0;
3447     val &= VALUE_MASK;
3448     if (val < 0x80){
3449         *p2 = 0;
3450         *p1 = val;
3451     }else{
3452         w16w_conv(val, &c2, &c1, &c0);
3453         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3454 #ifdef NUMCHAR_OPTION
3455         if (ret > 0){
3456             *p2 = 0;
3457             *p1 = CLASS_UNICODE | val;
3458             ret = 0;
3459         }
3460 #endif
3461     }
3462     return ret;
3463 }
3464 #endif
3465
3466 #ifdef UTF8_INPUT_ENABLE
3467 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
3468 {
3469     nkf_char ret = 0;
3470     if ((c2==0 && c1 < 0x80) || c2==EOF) {
3471         (*oconv)(c2, c1);
3472         return 0;
3473     }else if (0xD8 <= c2 && c2 <= 0xDB) {
3474         if (c0 < NKF_INT32_C(0xDC00) || NKF_INT32_C(0xDFFF) < c0)
3475             return -2;
3476         c1 =  CLASS_UNICODE | ((c2 << 18) + (c1 << 10) + c0 - NKF_INT32_C(0x35FDC00));
3477         c2 = 0;
3478     }else if ((c2>>3) == 27) { /* unpaired surrogate */
3479         /*
3480            return 2;
3481         */
3482         return 1;
3483     }else ret = w16e_conv(((c2 & 0xff)<<8) + c1, &c2, &c1);
3484     if (ret) return ret;
3485     (*oconv)(c2, c1);
3486     return 0;
3487 }
3488
3489 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
3490 {
3491     int ret = 0;
3492
3493     if ((c2 == 0 && c1 < 0x80) || c2==EOF) {
3494     } else if (is_unicode_bmp(c1)) {
3495         ret = w16e_conv(c1, &c2, &c1);
3496     } else {
3497         c2 = 0;
3498         c1 =  CLASS_UNICODE | c1;
3499     }
3500     if (ret) return ret;
3501     (*oconv)(c2, c1);
3502     return 0;
3503 }
3504
3505 nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3506 {
3507     const unsigned short *const *pp;
3508     const unsigned short *const *const *ppp;
3509     static const char no_best_fit_chars_table_C2[] =
3510     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3511         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3512         1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
3513         0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
3514     static const char no_best_fit_chars_table_C2_ms[] =
3515     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3516         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3517         1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
3518         0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
3519     static const char no_best_fit_chars_table_932_C2[] =
3520     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3521         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3522         1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
3523         0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
3524     static const char no_best_fit_chars_table_932_C3[] =
3525     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3526         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
3527         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3528         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
3529     nkf_char ret = 0;
3530
3531     if(c2 < 0x80){
3532         *p2 = 0;
3533         *p1 = c2;
3534     }else if(c2 < 0xe0){
3535         if(no_best_fit_chars_f){
3536             if(ms_ucs_map_f == UCS_MAP_CP932){
3537                 switch(c2){
3538                 case 0xC2:
3539                     if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
3540                     break;
3541                 case 0xC3:
3542                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3543                     break;
3544                 }
3545             }else if(!cp932inv_f){
3546                 switch(c2){
3547                 case 0xC2:
3548                     if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
3549                     break;
3550                 case 0xC3:
3551                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3552                     break;
3553                 }
3554             }else if(ms_ucs_map_f == UCS_MAP_MS){