OSDN Git Service

* Add option -Z4: Convert JIS X 0208 Katakana to JIS X 0201 Katakana.
[nkf/nkf.git] / nkf.c
1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 ** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B 
5 ** \e$B!J\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp\e$B!K\e(B
6 ** Copyright (C) 1996,1998
7 ** Copyright (C) 2002
8 ** \e$BO"Mm@h!'\e(B \e$BN05eBg3X>pJs9)3X2J\e(B \e$B2OLn\e(B \e$B??<#\e(B  mime/X0208 support
9 ** \e$B!J\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp\e$B!K\e(B
10 ** \e$BO"Mm@h!'\e(B COW for DOS & Win16 & Win32 & OS/2
11 ** \e$B!J\e(BE-Mail Address: GHG00637@niftyserve.or.p\e$B!K\e(B
12 **
13 **    \e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"\e(B
14 **    \e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#\e(B
15 **    \e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#\e(B
16 **    \e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#\e(B
17 **    \e$B%P%$%J%j$NG[I[$N:]$K$O\e(Bversion message\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#\e(B
18 **    \e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#\e(B
19 **
20 **    Everyone is permitted to do anything on this program 
21 **    including copying, modifying, improving,
22 **    as long as you don't try to pretend that you wrote it.
23 **    i.e., the above copyright notice has to appear in all copies.  
24 **    Binary distribution requires original version messages.
25 **    You don't have to ask before copying, redistribution or publishing.
26 **    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
28
29 /***********************************************************************
30 ** UTF-8 \e$B%5%]!<%H$K$D$$$F\e(B
31 **    \e$B=>Mh$N\e(B nkf \e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9\e(B
32 **    nkf -e \e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G\e(B UTF-8 \e$B$HH=Dj$5$l$l$P!"\e(B
33 **    \e$B$=$N$^$^\e(B euc-jp \e$B$KJQ49$5$l$^$9\e(B
34 **
35 **    \e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#\e(B
36 **    (\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O\e(B)
37 **
38 **    \e$B2?$+LdBj$r8+$D$1$?$i!"\e(B
39 **        E-Mail: furukawa@tcp-ip.or.jp
40 **    \e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#\e(B
41 ***********************************************************************/
42 /* $Id: nkf.c,v 1.130 2007/08/31 14:06:08 naruse Exp $ */
43 #define NKF_VERSION "2.0.8"
44 #define NKF_RELEASE_DATE "2007-08-31"
45 #include "config.h"
46 #include "utf8tbl.h"
47
48 #define COPY_RIGHT \
49     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
50     "Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
51
52
53 /*
54 **
55 **
56 **
57 ** USAGE:       nkf [flags] [file] 
58 **
59 ** Flags:
60 ** b    Output is buffered             (DEFAULT)
61 ** u    Output is unbuffered
62 **
63 ** t    no operation
64 **
65 ** j    Output code is JIS 7 bit        (DEFAULT SELECT) 
66 ** s    Output code is MS Kanji         (DEFAULT SELECT) 
67 ** e    Output code is AT&T JIS         (DEFAULT SELECT) 
68 ** w    Output code is AT&T JIS         (DEFAULT SELECT) 
69 ** l    Output code is JIS 7bit and ISO8859-1 Latin-1
70 **
71 ** m    MIME conversion for ISO-2022-JP
72 ** I    Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
73 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
74 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
75 ** M    MIME output conversion 
76 **
77 ** r  {de/en}crypt ROT13/47
78 **
79 ** v  display Version
80 **
81 ** T  Text mode output        (for MS-DOS)
82 **
83 ** x    Do not convert X0201 kana into X0208
84 ** Z    Convert X0208 alphabet to ASCII
85 **
86 ** f60  fold option
87 **
88 ** m    MIME decode
89 ** B    try to fix broken JIS, missing Escape
90 ** B[1-9]  broken level
91 **
92 ** O   Output to 'nkf.out' file or last file name
93 ** d   Delete \r in line feed 
94 ** c   Add \r in line feed 
95 ** -- other long option
96 ** -- ignore following option (don't use with -O )
97 **
98 **/
99
100 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
101 #define MSDOS
102 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
103 #define __WIN32__
104 #endif
105 #endif
106
107 #ifdef PERL_XS
108 #undef OVERWRITE
109 #endif
110
111 #ifndef PERL_XS
112 #include <stdio.h>
113 #endif
114
115 #include <stdlib.h>
116 #include <string.h>
117
118 #if defined(MSDOS) || defined(__OS2__)
119 #include <fcntl.h>
120 #include <io.h>
121 #if defined(_MSC_VER) || defined(__WATCOMC__)
122 #define mktemp _mktemp
123 #endif
124 #endif
125
126 #ifdef MSDOS
127 #ifdef LSI_C
128 #define setbinmode(fp) fsetbin(fp)
129 #elif defined(__DJGPP__)
130 #include <libc/dosio.h>
131 #define setbinmode(fp) djgpp_setbinmode(fp)
132 #else /* Microsoft C, Turbo C */
133 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
134 #endif
135 #else /* UNIX */
136 #define setbinmode(fp)
137 #endif
138
139 #if defined(__DJGPP__)
140 void  djgpp_setbinmode(FILE *fp)
141 {
142     /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
143     int fd, m;
144     fd = fileno(fp);
145     m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
146     __file_handle_set(fd, m);
147 }
148 #endif
149
150 #ifdef _IOFBF /* SysV and MSDOS, Windows */
151 #define       setvbuffer(fp, buf, size)       setvbuf(fp, buf, _IOFBF, size)
152 #else /* BSD */
153 #define       setvbuffer(fp, buf, size)       setbuffer(fp, buf, size)
154 #endif
155
156 /*Borland C++ 4.5 EasyWin*/
157 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
158 #define         EASYWIN
159 #ifndef __WIN16__
160 #define __WIN16__
161 #endif
162 #include <windows.h>
163 #endif
164
165 #ifdef OVERWRITE
166 /* added by satoru@isoternet.org */
167 #if defined(__EMX__)
168 #include <sys/types.h>
169 #endif
170 #include <sys/stat.h>
171 #if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
172 #include <unistd.h>
173 #if defined(__WATCOMC__)
174 #include <sys/utime.h>
175 #else
176 #include <utime.h>
177 #endif
178 #else /* defined(MSDOS) */
179 #ifdef __WIN32__
180 #ifdef __BORLANDC__ /* BCC32 */
181 #include <utime.h>
182 #else /* !defined(__BORLANDC__) */
183 #include <sys/utime.h>
184 #endif /* (__BORLANDC__) */
185 #else /* !defined(__WIN32__) */
186 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__)  /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
187 #include <sys/utime.h>
188 #elif defined(__TURBOC__) /* BCC */
189 #include <utime.h>
190 #elif defined(LSI_C) /* LSI C */
191 #endif /* (__WIN32__) */
192 #endif
193 #endif
194 #endif
195
196 #define         FALSE   0
197 #define         TRUE    1
198
199 /* state of output_mode and input_mode  
200
201    c2           0 means ASCII
202                 X0201
203                 ISO8859_1
204                 X0208
205                 EOF      all termination
206    c1           32bit data
207
208  */
209
210 #define         ASCII           0
211 #define         X0208           1
212 #define         X0201           2
213 #define         ISO8859_1       8
214 #define         NO_X0201        3
215 #define         X0212      0x2844
216 #define         X0213_1    0x284F
217 #define         X0213_2    0x2850
218
219 /* Input Assumption */
220
221 #define         JIS_INPUT       4
222 #define         EUC_INPUT      16
223 #define         SJIS_INPUT      5
224 #define         LATIN1_INPUT    6
225 #define         FIXED_MIME      7
226 #define         STRICT_MIME     8
227
228 /* MIME ENCODE */
229
230 #define         ISO2022JP       9
231 #define         JAPANESE_EUC   10
232 #define         SHIFT_JIS      11
233
234 #define         UTF8           12
235 #define         UTF8_INPUT     13
236 #define         UTF16_INPUT    1015
237 #define         UTF32_INPUT    1017
238
239 /* byte order */
240
241 #define         ENDIAN_BIG      1234
242 #define         ENDIAN_LITTLE   4321
243 #define         ENDIAN_2143     2143
244 #define         ENDIAN_3412     3412
245
246 #define         WISH_TRUE      15
247
248 /* ASCII CODE */
249
250 #define         BS      0x08
251 #define         TAB     0x09
252 #define         NL      0x0a
253 #define         CR      0x0d
254 #define         ESC     0x1b
255 #define         SPACE   0x20
256 #define         AT      0x40
257 #define         SSP     0xa0
258 #define         DEL     0x7f
259 #define         SI      0x0f
260 #define         SO      0x0e
261 #define         SSO     0x8e
262 #define         SS3     0x8f
263
264 #define         is_alnum(c)  \
265             (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
266
267 /* I don't trust portablity of toupper */
268 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
269 #define nkf_isoctal(c)  ('0'<=c && c<='7')
270 #define nkf_isdigit(c)  ('0'<=c && c<='9')
271 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
272 #define nkf_isblank(c) (c == SPACE || c == TAB)
273 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == NL)
274 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
275 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
276 #define nkf_isprint(c) (' '<=c && c<='~')
277 #define nkf_isgraph(c) ('!'<=c && c<='~')
278 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
279                     ('A'<=c&&c<='F') ? (c-'A'+10) : \
280                     ('a'<=c&&c<='f') ? (c-'a'+10) : 0 )
281 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
282
283 #define CP932_TABLE_BEGIN 0xFA
284 #define CP932_TABLE_END   0xFC
285 #define CP932INV_TABLE_BEGIN 0xED
286 #define CP932INV_TABLE_END   0xEE
287 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
288
289 #define         HOLD_SIZE       1024
290 #if defined(INT_IS_SHORT)
291 #define         IOBUF_SIZE      2048
292 #else
293 #define         IOBUF_SIZE      16384
294 #endif
295
296 #define         DEFAULT_J       'B'
297 #define         DEFAULT_R       'B'
298
299 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
300 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
301
302 #define         RANGE_NUM_MAX   18
303 #define         GETA1   0x22
304 #define         GETA2   0x2e
305
306
307 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
308 #define sizeof_euc_to_utf8_1byte 94
309 #define sizeof_euc_to_utf8_2bytes 94
310 #define sizeof_utf8_to_euc_C2 64
311 #define sizeof_utf8_to_euc_E5B8 64
312 #define sizeof_utf8_to_euc_2bytes 112
313 #define sizeof_utf8_to_euc_3bytes 16
314 #endif
315
316 /* MIME preprocessor */
317
318 #ifdef EASYWIN /*Easy Win */
319 extern POINT _BufferSize;
320 #endif
321
322 struct input_code{
323     char *name;
324     nkf_char stat;
325     nkf_char score;
326     nkf_char index;
327     nkf_char buf[3];
328     void (*status_func)(struct input_code *, nkf_char);
329     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
330     int _file_stat;
331 };
332
333 static char *input_codename = "";
334
335 #ifndef PERL_XS
336 static const char *CopyRight = COPY_RIGHT;
337 #endif
338 #if !defined(PERL_XS) && !defined(WIN32DLL)
339 static  nkf_char     noconvert(FILE *f);
340 #endif
341 static  void    module_connection(void);
342 static  nkf_char     kanji_convert(FILE *f);
343 static  nkf_char     h_conv(FILE *f,nkf_char c2,nkf_char c1);
344 static  nkf_char     push_hold_buf(nkf_char c2);
345 static  void    set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
346 static  nkf_char     s_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
347 static  nkf_char     s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
348 static  nkf_char     e_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
349 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
350 /* UCS Mapping
351  * 0: Shift_JIS, eucJP-ascii
352  * 1: eucJP-ms
353  * 2: CP932, CP51932
354  * 3: CP10001
355  */
356 #define UCS_MAP_ASCII   0
357 #define UCS_MAP_MS      1
358 #define UCS_MAP_CP932   2
359 #define UCS_MAP_CP10001 3
360 static int ms_ucs_map_f = UCS_MAP_ASCII;
361 #endif
362 #ifdef UTF8_INPUT_ENABLE
363 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
364 static  int     no_cp932ext_f = FALSE;
365 /* ignore ZERO WIDTH NO-BREAK SPACE */
366 static  int     no_best_fit_chars_f = FALSE;
367 static  int     input_endian = ENDIAN_BIG;
368 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
369 static  void    nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c);
370 static  void    encode_fallback_html(nkf_char c);
371 static  void    encode_fallback_xml(nkf_char c);
372 static  void    encode_fallback_java(nkf_char c);
373 static  void    encode_fallback_perl(nkf_char c);
374 static  void    encode_fallback_subchar(nkf_char c);
375 static  void    (*encode_fallback)(nkf_char c) = NULL;
376 static  nkf_char     w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
377 static  nkf_char     w_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
378 static  nkf_char     w_iconv16(nkf_char c2,nkf_char c1,nkf_char c0);
379 static  nkf_char     w_iconv32(nkf_char c2,nkf_char c1,nkf_char c0);
380 static  nkf_char        unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
381 static  nkf_char        w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
382 static  void    w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
383 static  nkf_char     ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
384 static  nkf_char     w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
385 static  void    w_status(struct input_code *, nkf_char);
386 #endif
387 #ifdef UTF8_OUTPUT_ENABLE
388 static  int     output_bom_f = FALSE;
389 static  int     output_endian = ENDIAN_BIG;
390 static  nkf_char     e2w_conv(nkf_char c2,nkf_char c1);
391 static  void    w_oconv(nkf_char c2,nkf_char c1);
392 static  void    w_oconv16(nkf_char c2,nkf_char c1);
393 static  void    w_oconv32(nkf_char c2,nkf_char c1);
394 #endif
395 static  void    e_oconv(nkf_char c2,nkf_char c1);
396 static  nkf_char     e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
397 static  void    s_oconv(nkf_char c2,nkf_char c1);
398 static  void    j_oconv(nkf_char c2,nkf_char c1);
399 static  void    fold_conv(nkf_char c2,nkf_char c1);
400 static  void    cr_conv(nkf_char c2,nkf_char c1);
401 static  void    z_conv(nkf_char c2,nkf_char c1);
402 static  void    rot_conv(nkf_char c2,nkf_char c1);
403 static  void    hira_conv(nkf_char c2,nkf_char c1);
404 static  void    base64_conv(nkf_char c2,nkf_char c1);
405 static  void    iso2022jp_check_conv(nkf_char c2,nkf_char c1);
406 static  void    no_connection(nkf_char c2,nkf_char c1);
407 static  nkf_char     no_connection2(nkf_char c2,nkf_char c1,nkf_char c0);
408
409 static  void    code_score(struct input_code *ptr);
410 static  void    code_status(nkf_char c);
411
412 static  void    std_putc(nkf_char c);
413 static  nkf_char     std_getc(FILE *f);
414 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
415
416 static  nkf_char     broken_getc(FILE *f);
417 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
418
419 static  nkf_char     mime_begin(FILE *f);
420 static  nkf_char     mime_getc(FILE *f);
421 static  nkf_char     mime_ungetc(nkf_char c,FILE *f);
422
423 static  void    switch_mime_getc(void);
424 static  void    unswitch_mime_getc(void);
425 static  nkf_char     mime_begin_strict(FILE *f);
426 static  nkf_char     mime_getc_buf(FILE *f);
427 static  nkf_char     mime_ungetc_buf(nkf_char c,FILE *f);
428 static  nkf_char     mime_integrity(FILE *f,const unsigned char *p);
429
430 static  nkf_char     base64decode(nkf_char c);
431 static  void    mime_prechar(nkf_char c2, nkf_char c1);
432 static  void    mime_putc(nkf_char c);
433 static  void    open_mime(nkf_char c);
434 static  void    close_mime(void);
435 static  void    eof_mime(void);
436 static  void    mimeout_addchar(nkf_char c);
437 #ifndef PERL_XS
438 static  void    usage(void);
439 static  void    version(void);
440 #endif
441 static  void    options(unsigned char *c);
442 #if defined(PERL_XS) || defined(WIN32DLL)
443 static  void    reinit(void);
444 #endif
445
446 /* buffers */
447
448 #if !defined(PERL_XS) && !defined(WIN32DLL)
449 static unsigned char   stdibuf[IOBUF_SIZE];
450 static unsigned char   stdobuf[IOBUF_SIZE];
451 #endif
452 static unsigned char   hold_buf[HOLD_SIZE*2];
453 static int             hold_count = 0;
454
455 /* MIME preprocessor fifo */
456
457 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
458 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)   
459 #define Fifo(n)         mime_buf[(n)&MIME_BUF_MASK]
460 static unsigned char           mime_buf[MIME_BUF_SIZE];
461 static unsigned int            mime_top = 0;
462 static unsigned int            mime_last = 0;  /* decoded */
463 static unsigned int            mime_input = 0; /* undecoded */
464 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
465
466 /* flags */
467 static int             unbuf_f = FALSE;
468 static int             estab_f = FALSE;
469 static int             nop_f = FALSE;
470 static int             binmode_f = TRUE;       /* binary mode */
471 static int             rot_f = FALSE;          /* rot14/43 mode */
472 static int             hira_f = FALSE;          /* hira/kata henkan */
473 static int             input_f = FALSE;        /* non fixed input code  */
474 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
475 static int             mime_f = STRICT_MIME;   /* convert MIME B base64 or Q */
476 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
477 static int             mimebuf_f = FALSE;      /* MIME buffered input */
478 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
479 static int             iso8859_f = FALSE;      /* ISO8859 through */
480 static int             mimeout_f = FALSE;       /* base64 mode */
481 #if defined(MSDOS) || defined(__OS2__) 
482 static int             x0201_f = TRUE;         /* Assume JISX0201 kana */
483 #else
484 static int             x0201_f = NO_X0201;     /* Assume NO JISX0201 */
485 #endif
486 static int             iso2022jp_f = FALSE;    /* convert ISO-2022-JP */
487
488 #ifdef UNICODE_NORMALIZATION
489 static int nfc_f = FALSE;
490 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
491 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
492 static nkf_char nfc_getc(FILE *f);
493 static nkf_char nfc_ungetc(nkf_char c,FILE *f);
494 #endif
495
496 #ifdef INPUT_OPTION
497 static int cap_f = FALSE;
498 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
499 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
500 static nkf_char cap_getc(FILE *f);
501 static nkf_char cap_ungetc(nkf_char c,FILE *f);
502
503 static int url_f = FALSE;
504 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
505 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
506 static nkf_char url_getc(FILE *f);
507 static nkf_char url_ungetc(nkf_char c,FILE *f);
508 #endif
509
510 #if defined(INT_IS_SHORT)
511 #define NKF_INT32_C(n)   (n##L)
512 #else
513 #define NKF_INT32_C(n)   (n)
514 #endif
515 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
516 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
517 #define CLASS_UNICODE   NKF_INT32_C(0x01000000)
518 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
519 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
520 #define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UNICODE)
521 #define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
522
523 #ifdef NUMCHAR_OPTION
524 static int numchar_f = FALSE;
525 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
526 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
527 static nkf_char numchar_getc(FILE *f);
528 static nkf_char numchar_ungetc(nkf_char c,FILE *f);
529 #endif
530
531 #ifdef CHECK_OPTION
532 static int noout_f = FALSE;
533 static void no_putc(nkf_char c);
534 static nkf_char debug_f = FALSE;
535 static void debug(const char *str);
536 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
537 #endif
538
539 static int guess_f = FALSE;
540 #if !defined PERL_XS
541 static  void    print_guessed_code(char *filename);
542 #endif
543 static  void    set_input_codename(char *codename);
544 static int is_inputcode_mixed = FALSE;
545 static int is_inputcode_set   = FALSE;
546
547 #ifdef EXEC_IO
548 static int exec_f = 0;
549 #endif
550
551 #ifdef SHIFTJIS_CP932
552 /* invert IBM extended characters to others */
553 static int cp51932_f = FALSE;
554
555 /* invert NEC-selected IBM extended characters to IBM extended characters */
556 static int cp932inv_f = TRUE;
557
558 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
559 #endif /* SHIFTJIS_CP932 */
560
561 #ifdef X0212_ENABLE
562 static int x0212_f = FALSE;
563 static nkf_char x0212_shift(nkf_char c);
564 static nkf_char x0212_unshift(nkf_char c);
565 #endif
566 static int x0213_f = FALSE;
567
568 static unsigned char prefix_table[256];
569
570 static void set_code_score(struct input_code *ptr, nkf_char score);
571 static void clr_code_score(struct input_code *ptr, nkf_char score);
572 static void status_disable(struct input_code *ptr);
573 static void status_push_ch(struct input_code *ptr, nkf_char c);
574 static void status_clear(struct input_code *ptr);
575 static void status_reset(struct input_code *ptr);
576 static void status_reinit(struct input_code *ptr);
577 static void status_check(struct input_code *ptr, nkf_char c);
578 static void e_status(struct input_code *, nkf_char);
579 static void s_status(struct input_code *, nkf_char);
580
581 struct input_code input_code_list[] = {
582     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
583     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
584 #ifdef UTF8_INPUT_ENABLE
585     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
586     {"UTF-16",    0, 0, 0, {0, 0, 0},     NULL, w_iconv16, 0},
587     {"UTF-32",    0, 0, 0, {0, 0, 0},     NULL, w_iconv32, 0},
588 #endif
589     {0}
590 };
591
592 static int              mimeout_mode = 0;
593 static int              base64_count = 0;
594
595 /* X0208 -> ASCII converter */
596
597 /* fold parameter */
598 static int             f_line = 0;    /* chars in line */
599 static int             f_prev = 0;
600 static int             fold_preserve_f = FALSE; /* preserve new lines */
601 static int             fold_f  = FALSE;
602 static int             fold_len  = 0;
603
604 /* options */
605 static unsigned char   kanji_intro = DEFAULT_J;
606 static unsigned char   ascii_intro = DEFAULT_R;
607
608 /* Folding */
609
610 #define FOLD_MARGIN  10
611 #define DEFAULT_FOLD 60
612
613 static int             fold_margin  = FOLD_MARGIN;
614
615 /* converters */
616
617 #ifdef DEFAULT_CODE_JIS
618 #   define  DEFAULT_CONV j_oconv
619 #endif
620 #ifdef DEFAULT_CODE_SJIS
621 #   define  DEFAULT_CONV s_oconv
622 #endif
623 #ifdef DEFAULT_CODE_EUC
624 #   define  DEFAULT_CONV e_oconv
625 #endif
626 #ifdef DEFAULT_CODE_UTF8
627 #   define  DEFAULT_CONV w_oconv
628 #endif
629
630 /* process default */
631 static void (*output_conv)(nkf_char c2,nkf_char c1) = DEFAULT_CONV;
632
633 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
634 /* s_iconv or oconv */
635 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
636
637 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
638 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
639 static void (*o_crconv)(nkf_char c2,nkf_char c1) = no_connection;
640 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
641 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
642 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
643 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
644
645 /* static redirections */
646
647 static  void   (*o_putc)(nkf_char c) = std_putc;
648
649 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
650 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
651
652 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
653 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
654
655 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
656
657 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
658 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
659
660 /* for strict mime */
661 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
662 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
663
664 /* Global states */
665 static int output_mode = ASCII,    /* output kanji mode */
666            input_mode =  ASCII,    /* input kanji mode */
667            shift_mode =  FALSE;    /* TRUE shift out, or X0201  */
668 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
669
670 /* X0201 / X0208 conversion tables */
671
672 /* X0201 kana conversion table */
673 /* 90-9F A0-DF */
674 static const
675 unsigned char cv[]= {
676     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
677     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
678     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
679     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
680     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
681     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
682     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
683     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
684     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
685     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
686     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
687     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
688     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
689     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
690     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
691     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
692     0x00,0x00};
693
694
695 /* X0201 kana conversion table for daguten */
696 /* 90-9F A0-DF */
697 static const
698 unsigned char dv[]= { 
699     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
700     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
701     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
702     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
703     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
704     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
705     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
706     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
707     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
708     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
709     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
710     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
711     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
712     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
713     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
714     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
715     0x00,0x00};
716
717 /* X0201 kana conversion table for han-daguten */
718 /* 90-9F A0-DF */
719 static const
720 unsigned char ev[]= { 
721     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
722     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
723     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
724     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
725     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
726     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
727     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
728     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
729     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
730     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
731     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
732     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
733     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
734     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
735     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
736     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
737     0x00,0x00};
738
739
740 /* X0208 kigou conversion table */
741 /* 0x8140 - 0x819e */
742 static const
743 unsigned char fv[] = {
744
745     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
746     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
747     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
748     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
749     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
750     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
751     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
752     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
753     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
754     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
755     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
756     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
757 } ;
758
759
760 #define    CRLF      1
761
762 static int             file_out_f = FALSE;
763 #ifdef OVERWRITE
764 static int             overwrite_f = FALSE;
765 static int             preserve_time_f = FALSE;
766 static int             backup_f = FALSE;
767 static char            *backup_suffix = "";
768 static char *get_backup_filename(const char *suffix, const char *filename);
769 #endif
770
771 static int             crmode_f = 0;   /* CR, NL, CRLF */
772 static nkf_char prev_cr = 0;
773 #ifdef EASYWIN /*Easy Win */
774 static int             end_check;
775 #endif /*Easy Win */
776
777 #define STD_GC_BUFSIZE (256)
778 nkf_char std_gc_buf[STD_GC_BUFSIZE];
779 nkf_char std_gc_ndx;
780
781 #ifdef WIN32DLL
782 #include "nkf32dll.c"
783 #elif defined(PERL_XS)
784 #else /* WIN32DLL */
785 int main(int argc, char **argv)
786 {
787     FILE  *fin;
788     unsigned char  *cp;
789
790     char *outfname = NULL;
791     char *origfname;
792
793 #ifdef EASYWIN /*Easy Win */
794     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
795 #endif
796
797     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
798         cp = (unsigned char *)*argv;
799         options(cp);
800 #ifdef EXEC_IO
801         if (exec_f){
802             int fds[2], pid;
803             if (pipe(fds) < 0 || (pid = fork()) < 0){
804                 abort();
805             }
806             if (pid == 0){
807                 if (exec_f > 0){
808                     close(fds[0]);
809                     dup2(fds[1], 1);
810                 }else{
811                     close(fds[1]);
812                     dup2(fds[0], 0);
813                 }
814                 execvp(argv[1], &argv[1]);
815             }
816             if (exec_f > 0){
817                 close(fds[1]);
818                 dup2(fds[0], 0);
819             }else{
820                 close(fds[0]);
821                 dup2(fds[1], 1);
822             }
823             argc = 0;
824             break;
825         }
826 #endif
827     }
828     if(x0201_f == WISH_TRUE)
829          x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
830
831     if (binmode_f == TRUE)
832 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
833     if (freopen("","wb",stdout) == NULL) 
834         return (-1);
835 #else
836     setbinmode(stdout);
837 #endif
838
839     if (unbuf_f)
840       setbuf(stdout, (char *) NULL);
841     else
842       setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
843
844     if (argc == 0) {
845       if (binmode_f == TRUE)
846 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
847       if (freopen("","rb",stdin) == NULL) return (-1);
848 #else
849       setbinmode(stdin);
850 #endif
851       setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
852       if (nop_f)
853           noconvert(stdin);
854       else {
855           kanji_convert(stdin);
856           if (guess_f) print_guessed_code(NULL);
857       }
858     } else {
859       int nfiles = argc;
860         int is_argument_error = FALSE;
861       while (argc--) {
862             is_inputcode_mixed = FALSE;
863             is_inputcode_set   = FALSE;
864             input_codename = "";
865 #ifdef CHECK_OPTION
866             iconv_for_check = 0;
867 #endif
868           if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
869               perror(*--argv);
870                 *argv++;
871                 is_argument_error = TRUE;
872                 continue;
873           } else {
874 #ifdef OVERWRITE
875               int fd = 0;
876               int fd_backup = 0;
877 #endif
878
879 /* reopen file for stdout */
880               if (file_out_f == TRUE) {
881 #ifdef OVERWRITE
882                   if (overwrite_f){
883                       outfname = malloc(strlen(origfname)
884                                         + strlen(".nkftmpXXXXXX")
885                                         + 1);
886                       if (!outfname){
887                           perror(origfname);
888                           return -1;
889                       }
890                       strcpy(outfname, origfname);
891 #ifdef MSDOS
892                       {
893                           int i;
894                           for (i = strlen(outfname); i; --i){
895                               if (outfname[i - 1] == '/'
896                                   || outfname[i - 1] == '\\'){
897                                   break;
898                               }
899                           }
900                           outfname[i] = '\0';
901                       }
902                       strcat(outfname, "ntXXXXXX");
903                       mktemp(outfname);
904                         fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
905                                 S_IREAD | S_IWRITE);
906 #else
907                       strcat(outfname, ".nkftmpXXXXXX");
908                       fd = mkstemp(outfname);
909 #endif
910                       if (fd < 0
911                           || (fd_backup = dup(fileno(stdout))) < 0
912                           || dup2(fd, fileno(stdout)) < 0
913                           ){
914                           perror(origfname);
915                           return -1;
916                       }
917                   }else
918 #endif
919                   if(argc == 1 ) {
920                       outfname = *argv++;
921                       argc--;
922                   } else {
923                       outfname = "nkf.out";
924                   }
925
926                   if(freopen(outfname, "w", stdout) == NULL) {
927                       perror (outfname);
928                       return (-1);
929                   }
930                   if (binmode_f == TRUE) {
931 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
932                       if (freopen("","wb",stdout) == NULL) 
933                            return (-1);
934 #else
935                       setbinmode(stdout);
936 #endif
937                   }
938               }
939               if (binmode_f == TRUE)
940 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
941                  if (freopen("","rb",fin) == NULL) 
942                     return (-1);
943 #else
944                  setbinmode(fin);
945 #endif 
946               setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
947               if (nop_f)
948                   noconvert(fin);
949               else {
950                   char *filename = NULL;
951                   kanji_convert(fin);
952                   if (nfiles > 1) filename = origfname;
953                   if (guess_f) print_guessed_code(filename);
954               }
955               fclose(fin);
956 #ifdef OVERWRITE
957               if (overwrite_f) {
958                   struct stat     sb;
959 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
960                   time_t tb[2];
961 #else
962                   struct utimbuf  tb;
963 #endif
964
965                   fflush(stdout);
966                   close(fd);
967                   if (dup2(fd_backup, fileno(stdout)) < 0){
968                       perror("dup2");
969                   }
970                   if (stat(origfname, &sb)) {
971                       fprintf(stderr, "Can't stat %s\n", origfname);
972                   }
973                   /* \e$B%Q!<%_%C%7%g%s$rI|85\e(B */
974                   if (chmod(outfname, sb.st_mode)) {
975                       fprintf(stderr, "Can't set permission %s\n", outfname);
976                   }
977
978                   /* \e$B%?%$%`%9%?%s%W$rI|85\e(B */
979                     if(preserve_time_f){
980 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
981                         tb[0] = tb[1] = sb.st_mtime;
982                         if (utime(outfname, tb)) {
983                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
984                         }
985 #else
986                         tb.actime  = sb.st_atime;
987                         tb.modtime = sb.st_mtime;
988                         if (utime(outfname, &tb)) {
989                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
990                         }
991 #endif
992                     }
993                     if(backup_f){
994                         char *backup_filename = get_backup_filename(backup_suffix, origfname);
995 #ifdef MSDOS
996                         unlink(backup_filename);
997 #endif
998                         if (rename(origfname, backup_filename)) {
999                             perror(backup_filename);
1000                             fprintf(stderr, "Can't rename %s to %s\n",
1001                                     origfname, backup_filename);
1002                         }
1003                     }else{
1004 #ifdef MSDOS
1005                         if (unlink(origfname)){
1006                             perror(origfname);
1007                         }
1008 #endif
1009                     }
1010                   if (rename(outfname, origfname)) {
1011                       perror(origfname);
1012                       fprintf(stderr, "Can't rename %s to %s\n",
1013                               outfname, origfname);
1014                   }
1015                   free(outfname);
1016               }
1017 #endif
1018           }
1019       }
1020         if (is_argument_error)
1021             return(-1);
1022     }
1023 #ifdef EASYWIN /*Easy Win */
1024     if (file_out_f == FALSE) 
1025         scanf("%d",&end_check);
1026     else 
1027         fclose(stdout);
1028 #else /* for Other OS */
1029     if (file_out_f == TRUE) 
1030         fclose(stdout);
1031 #endif /*Easy Win */
1032     return (0);
1033 }
1034 #endif /* WIN32DLL */
1035
1036 #ifdef OVERWRITE
1037 char *get_backup_filename(const char *suffix, const char *filename)
1038 {
1039     char *backup_filename;
1040     int asterisk_count = 0;
1041     int i, j;
1042     int filename_length = strlen(filename);
1043
1044     for(i = 0; suffix[i]; i++){
1045         if(suffix[i] == '*') asterisk_count++;
1046     }
1047
1048     if(asterisk_count){
1049         backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1050         if (!backup_filename){
1051             perror("Can't malloc backup filename.");
1052             return NULL;
1053         }
1054
1055         for(i = 0, j = 0; suffix[i];){
1056             if(suffix[i] == '*'){
1057                 backup_filename[j] = '\0';
1058                 strncat(backup_filename, filename, filename_length);
1059                 i++;
1060                 j += filename_length;
1061             }else{
1062                 backup_filename[j++] = suffix[i++];
1063             }
1064         }
1065         backup_filename[j] = '\0';
1066     }else{
1067         j = strlen(suffix) + filename_length;
1068         backup_filename = malloc( + 1);
1069         strcpy(backup_filename, filename);
1070         strcat(backup_filename, suffix);
1071         backup_filename[j] = '\0';
1072     }
1073     return backup_filename;
1074 }
1075 #endif
1076
1077 static const
1078 struct {
1079     const char *name;
1080     const char *alias;
1081 } long_option[] = {
1082     {"ic=", ""},
1083     {"oc=", ""},
1084     {"base64","jMB"},
1085     {"euc","e"},
1086     {"euc-input","E"},
1087     {"fj","jm"},
1088     {"help","v"},
1089     {"jis","j"},
1090     {"jis-input","J"},
1091     {"mac","sLm"},
1092     {"mime","jM"},
1093     {"mime-input","m"},
1094     {"msdos","sLw"},
1095     {"sjis","s"},
1096     {"sjis-input","S"},
1097     {"unix","eLu"},
1098     {"version","V"},
1099     {"windows","sLw"},
1100     {"hiragana","h1"},
1101     {"katakana","h2"},
1102     {"katakana-hiragana","h3"},
1103     {"guess", "g"},
1104     {"cp932", ""},
1105     {"no-cp932", ""},
1106 #ifdef X0212_ENABLE
1107     {"x0212", ""},
1108 #endif
1109 #ifdef UTF8_OUTPUT_ENABLE
1110     {"utf8", "w"},
1111     {"utf16", "w16"},
1112     {"ms-ucs-map", ""},
1113     {"fb-skip", ""},
1114     {"fb-html", ""},
1115     {"fb-xml", ""},
1116     {"fb-perl", ""},
1117     {"fb-java", ""},
1118     {"fb-subchar", ""},
1119     {"fb-subchar=", ""},
1120 #endif
1121 #ifdef UTF8_INPUT_ENABLE
1122     {"utf8-input", "W"},
1123     {"utf16-input", "W16"},
1124     {"no-cp932ext", ""},
1125     {"no-best-fit-chars",""},
1126 #endif
1127 #ifdef UNICODE_NORMALIZATION
1128     {"utf8mac-input", ""},
1129 #endif
1130 #ifdef OVERWRITE
1131     {"overwrite", ""},
1132     {"overwrite=", ""},
1133     {"in-place", ""},
1134     {"in-place=", ""},
1135 #endif
1136 #ifdef INPUT_OPTION
1137     {"cap-input", ""},
1138     {"url-input", ""},
1139 #endif
1140 #ifdef NUMCHAR_OPTION
1141     {"numchar-input", ""},
1142 #endif
1143 #ifdef CHECK_OPTION
1144     {"no-output", ""},
1145     {"debug", ""},
1146 #endif
1147 #ifdef SHIFTJIS_CP932
1148     {"cp932inv", ""},
1149 #endif
1150 #ifdef EXEC_IO
1151     {"exec-in", ""},
1152     {"exec-out", ""},
1153 #endif
1154     {"prefix=", ""},
1155 };
1156
1157 static int option_mode = 0;
1158
1159 void options(unsigned char *cp)
1160 {
1161     nkf_char i, j;
1162     unsigned char *p;
1163     unsigned char *cp_back = NULL;
1164     char codeset[32];
1165
1166     if (option_mode==1)
1167         return;
1168     while(*cp && *cp++!='-');
1169     while (*cp || cp_back) {
1170         if(!*cp){
1171             cp = cp_back;
1172             cp_back = NULL;
1173             continue;
1174         }
1175         p = 0;
1176         switch (*cp++) {
1177         case '-':  /* literal options */
1178             if (!*cp || *cp == SPACE) {        /* ignore the rest of arguments */
1179                 option_mode = 1;
1180                 return;
1181             }
1182             for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1183                 p = (unsigned char *)long_option[i].name;
1184                 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1185                 if (*p == cp[j] || cp[j] == ' '){
1186                     p = &cp[j] + 1;
1187                     break;
1188                 }
1189                 p = 0;
1190             }
1191             if (p == 0) return;
1192             while(*cp && *cp != SPACE && cp++);
1193             if (long_option[i].alias[0]){
1194                 cp_back = cp;
1195                 cp = (unsigned char *)long_option[i].alias;
1196             }else{
1197                 if (strcmp(long_option[i].name, "ic=") == 0){
1198                     for (i=0; i < 16 && SPACE < p[i] && p[i] < DEL; i++){
1199                         codeset[i] = nkf_toupper(p[i]);
1200                     }
1201                     codeset[i] = 0;
1202                     if(strcmp(codeset, "ISO-2022-JP") == 0){
1203                         input_f = JIS_INPUT;
1204                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0 ||
1205                       strcmp(codeset, "CP50220") == 0 ||
1206                       strcmp(codeset, "CP50221") == 0 ||
1207                       strcmp(codeset, "CP50222") == 0){
1208                         input_f = JIS_INPUT;
1209 #ifdef SHIFTJIS_CP932
1210                         cp51932_f = TRUE;
1211 #endif
1212 #ifdef UTF8_OUTPUT_ENABLE
1213                         ms_ucs_map_f = UCS_MAP_CP932;
1214 #endif
1215                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1216                         input_f = JIS_INPUT;
1217 #ifdef X0212_ENABLE
1218                         x0212_f = TRUE;
1219 #endif
1220                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1221                         input_f = JIS_INPUT;
1222 #ifdef X0212_ENABLE
1223                         x0212_f = TRUE;
1224 #endif
1225                         x0213_f = TRUE;
1226                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1227                         input_f = SJIS_INPUT;
1228                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1229                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1230                              strcmp(codeset, "CP932") == 0 ||
1231                              strcmp(codeset, "MS932") == 0){
1232                         input_f = SJIS_INPUT;
1233 #ifdef SHIFTJIS_CP932
1234                         cp51932_f = TRUE;
1235 #endif
1236 #ifdef UTF8_OUTPUT_ENABLE
1237                         ms_ucs_map_f = UCS_MAP_CP932;
1238 #endif
1239                     }else if(strcmp(codeset, "CP10001") == 0){
1240                         input_f = SJIS_INPUT;
1241 #ifdef SHIFTJIS_CP932
1242                         cp51932_f = TRUE;
1243 #endif
1244 #ifdef UTF8_OUTPUT_ENABLE
1245                         ms_ucs_map_f = UCS_MAP_CP10001;
1246 #endif
1247                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1248                              strcmp(codeset, "EUC-JP") == 0){
1249                         input_f = EUC_INPUT;
1250                     }else if(strcmp(codeset, "CP51932") == 0){
1251                         input_f = EUC_INPUT;
1252 #ifdef SHIFTJIS_CP932
1253                         cp51932_f = TRUE;
1254 #endif
1255 #ifdef UTF8_OUTPUT_ENABLE
1256                         ms_ucs_map_f = UCS_MAP_CP932;
1257 #endif
1258                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1259                              strcmp(codeset, "EUCJP-MS") == 0 ||
1260                              strcmp(codeset, "EUCJPMS") == 0){
1261                         input_f = EUC_INPUT;
1262 #ifdef SHIFTJIS_CP932
1263                         cp51932_f = FALSE;
1264 #endif
1265 #ifdef UTF8_OUTPUT_ENABLE
1266                         ms_ucs_map_f = UCS_MAP_MS;
1267 #endif
1268                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1269                              strcmp(codeset, "EUCJP-ASCII") == 0){
1270                         input_f = EUC_INPUT;
1271 #ifdef SHIFTJIS_CP932
1272                         cp51932_f = FALSE;
1273 #endif
1274 #ifdef UTF8_OUTPUT_ENABLE
1275                         ms_ucs_map_f = UCS_MAP_ASCII;
1276 #endif
1277                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1278                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1279                         input_f = SJIS_INPUT;
1280                         x0213_f = TRUE;
1281 #ifdef SHIFTJIS_CP932
1282                         cp51932_f = FALSE;
1283 #endif
1284                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1285                              strcmp(codeset, "EUC-JIS-2004") == 0){
1286                         input_f = EUC_INPUT;
1287                         x0213_f = TRUE;
1288 #ifdef SHIFTJIS_CP932
1289                         cp51932_f = FALSE;
1290 #endif
1291 #ifdef UTF8_INPUT_ENABLE
1292                     }else if(strcmp(codeset, "UTF-8") == 0 ||
1293                              strcmp(codeset, "UTF-8N") == 0 ||
1294                              strcmp(codeset, "UTF-8-BOM") == 0){
1295                         input_f = UTF8_INPUT;
1296 #ifdef UNICODE_NORMALIZATION
1297                     }else if(strcmp(codeset, "UTF8-MAC") == 0 ||
1298                              strcmp(codeset, "UTF-8-MAC") == 0){
1299                         input_f = UTF8_INPUT;
1300                         nfc_f = TRUE;
1301 #endif
1302                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1303                              strcmp(codeset, "UTF-16BE") == 0 ||
1304                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1305                         input_f = UTF16_INPUT;
1306                         input_endian = ENDIAN_BIG;
1307                     }else if(strcmp(codeset, "UTF-16LE") == 0 ||
1308                              strcmp(codeset, "UTF-16LE-BOM") == 0){
1309                         input_f = UTF16_INPUT;
1310                         input_endian = ENDIAN_LITTLE;
1311                     }else if(strcmp(codeset, "UTF-32") == 0 ||
1312                              strcmp(codeset, "UTF-32BE") == 0 ||
1313                              strcmp(codeset, "UTF-32BE-BOM") == 0){
1314                         input_f = UTF32_INPUT;
1315                         input_endian = ENDIAN_BIG;
1316                     }else if(strcmp(codeset, "UTF-32LE") == 0 ||
1317                              strcmp(codeset, "UTF-32LE-BOM") == 0){
1318                         input_f = UTF32_INPUT;
1319                         input_endian = ENDIAN_LITTLE;
1320 #endif
1321                     }
1322                     continue;
1323                 }
1324                 if (strcmp(long_option[i].name, "oc=") == 0){
1325                     x0201_f = FALSE;
1326                     for (i=0; i < 16 && SPACE < p[i] && p[i] < DEL; i++){
1327                         codeset[i] = nkf_toupper(p[i]);
1328                     }
1329                     codeset[i] = 0;
1330                     if(strcmp(codeset, "ISO-2022-JP") == 0){
1331                         output_conv = j_oconv;
1332                     }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0){
1333                         output_conv = j_oconv;
1334                         no_cp932ext_f = TRUE;
1335 #ifdef SHIFTJIS_CP932
1336                         cp932inv_f = FALSE;
1337 #endif
1338 #ifdef UTF8_OUTPUT_ENABLE
1339                         ms_ucs_map_f = UCS_MAP_CP932;
1340 #endif
1341                     }else if(strcmp(codeset, "CP50220") == 0){
1342                         output_conv = j_oconv;
1343                         x0201_f = TRUE;
1344 #ifdef SHIFTJIS_CP932
1345                         cp932inv_f = FALSE;
1346 #endif
1347 #ifdef UTF8_OUTPUT_ENABLE
1348                         ms_ucs_map_f = UCS_MAP_CP932;
1349 #endif
1350                     }else if(strcmp(codeset, "CP50221") == 0){
1351                         output_conv = j_oconv;
1352 #ifdef SHIFTJIS_CP932
1353                         cp932inv_f = FALSE;
1354 #endif
1355 #ifdef UTF8_OUTPUT_ENABLE
1356                         ms_ucs_map_f = UCS_MAP_CP932;
1357 #endif
1358                     }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1359                         output_conv = j_oconv;
1360 #ifdef X0212_ENABLE
1361                         x0212_f = TRUE;
1362 #endif
1363 #ifdef SHIFTJIS_CP932
1364                         cp932inv_f = FALSE;
1365 #endif
1366                     }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1367                         output_conv = j_oconv;
1368 #ifdef X0212_ENABLE
1369                         x0212_f = TRUE;
1370 #endif
1371                         x0213_f = TRUE;
1372 #ifdef SHIFTJIS_CP932
1373                         cp932inv_f = FALSE;
1374 #endif
1375                     }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1376                         output_conv = s_oconv;
1377                     }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1378                              strcmp(codeset, "CSWINDOWS31J") == 0 ||
1379                              strcmp(codeset, "CP932") == 0 ||
1380                              strcmp(codeset, "MS932") == 0){
1381                         output_conv = s_oconv;
1382 #ifdef UTF8_OUTPUT_ENABLE
1383                         ms_ucs_map_f = UCS_MAP_CP932;
1384 #endif
1385                     }else if(strcmp(codeset, "CP10001") == 0){
1386                         output_conv = s_oconv;
1387 #ifdef UTF8_OUTPUT_ENABLE
1388                         ms_ucs_map_f = UCS_MAP_CP10001;
1389 #endif
1390                     }else if(strcmp(codeset, "EUCJP") == 0 ||
1391                              strcmp(codeset, "EUC-JP") == 0){
1392                         output_conv = e_oconv;
1393                     }else if(strcmp(codeset, "CP51932") == 0){
1394                         output_conv = e_oconv;
1395 #ifdef SHIFTJIS_CP932
1396                         cp932inv_f = FALSE;
1397 #endif
1398 #ifdef UTF8_OUTPUT_ENABLE
1399                         ms_ucs_map_f = UCS_MAP_CP932;
1400 #endif
1401                     }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1402                              strcmp(codeset, "EUCJP-MS") == 0 ||
1403                              strcmp(codeset, "EUCJPMS") == 0){
1404                         output_conv = e_oconv;
1405 #ifdef X0212_ENABLE
1406                         x0212_f = TRUE;
1407 #endif
1408 #ifdef UTF8_OUTPUT_ENABLE
1409                         ms_ucs_map_f = UCS_MAP_MS;
1410 #endif
1411                     }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1412                              strcmp(codeset, "EUCJP-ASCII") == 0){
1413                         output_conv = e_oconv;
1414 #ifdef X0212_ENABLE
1415                         x0212_f = TRUE;
1416 #endif
1417 #ifdef UTF8_OUTPUT_ENABLE
1418                         ms_ucs_map_f = UCS_MAP_ASCII;
1419 #endif
1420                     }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1421                              strcmp(codeset, "SHIFT_JIS-2004") == 0){
1422                         output_conv = s_oconv;
1423                         x0213_f = TRUE;
1424 #ifdef SHIFTJIS_CP932
1425                         cp932inv_f = FALSE;
1426 #endif
1427                     }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1428                              strcmp(codeset, "EUC-JIS-2004") == 0){
1429                         output_conv = e_oconv;
1430 #ifdef X0212_ENABLE
1431                         x0212_f = TRUE;
1432 #endif
1433                         x0213_f = TRUE;
1434 #ifdef SHIFTJIS_CP932
1435                         cp932inv_f = FALSE;
1436 #endif
1437 #ifdef UTF8_OUTPUT_ENABLE
1438                     }else if(strcmp(codeset, "UTF-8") == 0){
1439                         output_conv = w_oconv;
1440                     }else if(strcmp(codeset, "UTF-8N") == 0){
1441                         output_conv = w_oconv;
1442                     }else if(strcmp(codeset, "UTF-8-BOM") == 0){
1443                         output_conv = w_oconv;
1444                         output_bom_f = TRUE;
1445                     }else if(strcmp(codeset, "UTF-16BE") == 0){
1446                         output_conv = w_oconv16;
1447                     }else if(strcmp(codeset, "UTF-16") == 0 ||
1448                              strcmp(codeset, "UTF-16BE-BOM") == 0){
1449                         output_conv = w_oconv16;
1450                         output_bom_f = TRUE;
1451                     }else if(strcmp(codeset, "UTF-16LE") == 0){
1452                         output_conv = w_oconv16;
1453                         output_endian = ENDIAN_LITTLE;
1454                     }else if(strcmp(codeset, "UTF-16LE-BOM") == 0){
1455                         output_conv = w_oconv16;
1456                         output_endian = ENDIAN_LITTLE;
1457                         output_bom_f = TRUE;
1458                     }else if(strcmp(codeset, "UTF-32") == 0 ||
1459                              strcmp(codeset, "UTF-32BE") == 0){
1460                         output_conv = w_oconv32;
1461                     }else if(strcmp(codeset, "UTF-32BE-BOM") == 0){
1462                         output_conv = w_oconv32;
1463                         output_bom_f = TRUE;
1464                     }else if(strcmp(codeset, "UTF-32LE") == 0){
1465                         output_conv = w_oconv32;
1466                         output_endian = ENDIAN_LITTLE;
1467                     }else if(strcmp(codeset, "UTF-32LE-BOM") == 0){
1468                         output_conv = w_oconv32;
1469                         output_endian = ENDIAN_LITTLE;
1470                         output_bom_f = TRUE;
1471 #endif
1472                     }
1473                     continue;
1474                 }
1475 #ifdef OVERWRITE
1476                 if (strcmp(long_option[i].name, "overwrite") == 0){
1477                     file_out_f = TRUE;
1478                     overwrite_f = TRUE;
1479                     preserve_time_f = TRUE;
1480                     continue;
1481                 }
1482                 if (strcmp(long_option[i].name, "overwrite=") == 0){
1483                     file_out_f = TRUE;
1484                     overwrite_f = TRUE;
1485                     preserve_time_f = TRUE;
1486                     backup_f = TRUE;
1487                     backup_suffix = malloc(strlen((char *) p) + 1);
1488                     strcpy(backup_suffix, (char *) p);
1489                     continue;
1490                 }
1491                 if (strcmp(long_option[i].name, "in-place") == 0){
1492                     file_out_f = TRUE;
1493                     overwrite_f = TRUE;
1494                     preserve_time_f = FALSE;
1495                     continue;
1496                 }
1497                 if (strcmp(long_option[i].name, "in-place=") == 0){
1498                     file_out_f = TRUE;
1499                     overwrite_f = TRUE;
1500                     preserve_time_f = FALSE;
1501                     backup_f = TRUE;
1502                     backup_suffix = malloc(strlen((char *) p) + 1);
1503                     strcpy(backup_suffix, (char *) p);
1504                     continue;
1505                 }
1506 #endif
1507 #ifdef INPUT_OPTION
1508                 if (strcmp(long_option[i].name, "cap-input") == 0){
1509                     cap_f = TRUE;
1510                     continue;
1511                 }
1512                 if (strcmp(long_option[i].name, "url-input") == 0){
1513                     url_f = TRUE;
1514                     continue;
1515                 }
1516 #endif
1517 #ifdef NUMCHAR_OPTION
1518                 if (strcmp(long_option[i].name, "numchar-input") == 0){
1519                     numchar_f = TRUE;
1520                     continue;
1521                 }
1522 #endif
1523 #ifdef CHECK_OPTION
1524                 if (strcmp(long_option[i].name, "no-output") == 0){
1525                     noout_f = TRUE;
1526                     continue;
1527                 }
1528                 if (strcmp(long_option[i].name, "debug") == 0){
1529                     debug_f = TRUE;
1530                     continue;
1531                 }
1532 #endif
1533                 if (strcmp(long_option[i].name, "cp932") == 0){
1534 #ifdef SHIFTJIS_CP932
1535                     cp51932_f = TRUE;
1536                     cp932inv_f = TRUE;
1537 #endif
1538 #ifdef UTF8_OUTPUT_ENABLE
1539                     ms_ucs_map_f = UCS_MAP_CP932;
1540 #endif
1541                     continue;
1542                 }
1543                 if (strcmp(long_option[i].name, "no-cp932") == 0){
1544 #ifdef SHIFTJIS_CP932
1545                     cp51932_f = FALSE;
1546                     cp932inv_f = FALSE;
1547 #endif
1548 #ifdef UTF8_OUTPUT_ENABLE
1549                     ms_ucs_map_f = UCS_MAP_ASCII;
1550 #endif
1551                     continue;
1552                 }
1553 #ifdef SHIFTJIS_CP932
1554                 if (strcmp(long_option[i].name, "cp932inv") == 0){
1555                     cp932inv_f = TRUE;
1556                     continue;
1557                 }
1558 #endif
1559
1560 #ifdef X0212_ENABLE
1561                 if (strcmp(long_option[i].name, "x0212") == 0){
1562                     x0212_f = TRUE;
1563                     continue;
1564                 }
1565 #endif
1566
1567 #ifdef EXEC_IO
1568                   if (strcmp(long_option[i].name, "exec-in") == 0){
1569                       exec_f = 1;
1570                       return;
1571                   }
1572                   if (strcmp(long_option[i].name, "exec-out") == 0){
1573                       exec_f = -1;
1574                       return;
1575                   }
1576 #endif
1577 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1578                 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
1579                     no_cp932ext_f = TRUE;
1580                     continue;
1581                 }
1582                 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
1583                     no_best_fit_chars_f = TRUE;
1584                     continue;
1585                 }
1586                 if (strcmp(long_option[i].name, "fb-skip") == 0){
1587                     encode_fallback = NULL;
1588                     continue;
1589                 }
1590                 if (strcmp(long_option[i].name, "fb-html") == 0){
1591                     encode_fallback = encode_fallback_html;
1592                     continue;
1593                 }
1594                 if (strcmp(long_option[i].name, "fb-xml" ) == 0){
1595                     encode_fallback = encode_fallback_xml;
1596                     continue;
1597                 }
1598                 if (strcmp(long_option[i].name, "fb-java") == 0){
1599                     encode_fallback = encode_fallback_java;
1600                     continue;
1601                 }
1602                 if (strcmp(long_option[i].name, "fb-perl") == 0){
1603                     encode_fallback = encode_fallback_perl;
1604                     continue;
1605                 }
1606                 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1607                     encode_fallback = encode_fallback_subchar;
1608                     continue;
1609                 }
1610                 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1611                     encode_fallback = encode_fallback_subchar;
1612                     unicode_subchar = 0;
1613                     if (p[0] != '0'){
1614                         /* decimal number */
1615                         for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1616                             unicode_subchar *= 10;
1617                             unicode_subchar += hex2bin(p[i]);
1618                         }
1619                     }else if(p[1] == 'x' || p[1] == 'X'){
1620                         /* hexadecimal number */
1621                         for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1622                             unicode_subchar <<= 4;
1623                             unicode_subchar |= hex2bin(p[i]);
1624                         }
1625                     }else{
1626                         /* octal number */
1627                         for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1628                             unicode_subchar *= 8;
1629                             unicode_subchar += hex2bin(p[i]);
1630                         }
1631                     }
1632                     w16e_conv(unicode_subchar, &i, &j);
1633                     unicode_subchar = i<<8 | j;
1634                     continue;
1635                 }
1636 #endif
1637 #ifdef UTF8_OUTPUT_ENABLE
1638                 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1639                     ms_ucs_map_f = UCS_MAP_MS;
1640                     continue;
1641                 }
1642 #endif
1643 #ifdef UNICODE_NORMALIZATION
1644                 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1645                     input_f = UTF8_INPUT;
1646                     nfc_f = TRUE;
1647                     continue;
1648                 }
1649 #endif
1650                 if (strcmp(long_option[i].name, "prefix=") == 0){
1651                     if (nkf_isgraph(p[0])){
1652                         for (i = 1; nkf_isgraph(p[i]); i++){
1653                             prefix_table[p[i]] = p[0];
1654                         }
1655                     }
1656                     continue;
1657                 }
1658             }
1659             continue;
1660         case 'b':           /* buffered mode */
1661             unbuf_f = FALSE;
1662             continue;
1663         case 'u':           /* non bufferd mode */
1664             unbuf_f = TRUE;
1665             continue;
1666         case 't':           /* transparent mode */
1667             if (*cp=='1') {
1668                 /* alias of -t */
1669                 nop_f = TRUE;
1670                 *cp++;
1671             } else if (*cp=='2') {
1672                 /*
1673                  * -t with put/get
1674                  *
1675                  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
1676                  *
1677                  */
1678                 nop_f = 2;
1679                 *cp++;
1680             } else
1681                 nop_f = TRUE;
1682             continue;
1683         case 'j':           /* JIS output */
1684         case 'n':
1685             output_conv = j_oconv;
1686             continue;
1687         case 'e':           /* AT&T EUC output */
1688             output_conv = e_oconv;
1689             cp932inv_f = FALSE;
1690             continue;
1691         case 's':           /* SJIS output */
1692             output_conv = s_oconv;
1693             continue;
1694         case 'l':           /* ISO8859 Latin-1 support, no conversion */
1695             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
1696             input_f = LATIN1_INPUT;
1697             continue;
1698         case 'i':           /* Kanji IN ESC-$-@/B */
1699             if (*cp=='@'||*cp=='B') 
1700                 kanji_intro = *cp++;
1701             continue;
1702         case 'o':           /* ASCII IN ESC-(-J/B */
1703             if (*cp=='J'||*cp=='B'||*cp=='H') 
1704                 ascii_intro = *cp++;
1705             continue;
1706         case 'h':
1707             /*  
1708                 bit:1   katakana->hiragana
1709                 bit:2   hiragana->katakana
1710             */
1711             if ('9'>= *cp && *cp>='0') 
1712                 hira_f |= (*cp++ -'0');
1713             else 
1714                 hira_f |= 1;
1715             continue;
1716         case 'r':
1717             rot_f = TRUE;
1718             continue;
1719 #if defined(MSDOS) || defined(__OS2__) 
1720         case 'T':
1721             binmode_f = FALSE;
1722             continue;
1723 #endif
1724 #ifndef PERL_XS
1725         case 'V':
1726             version();
1727             exit(1);
1728             break;
1729         case 'v':
1730             usage();
1731             exit(1);
1732             break;
1733 #endif
1734 #ifdef UTF8_OUTPUT_ENABLE
1735         case 'w':           /* UTF-8 output */
1736             if (cp[0] == '8') {
1737                 output_conv = w_oconv; cp++;
1738                 if (cp[0] == '0'){
1739                     cp++;
1740                 } else {
1741                     output_bom_f = TRUE;
1742                 }
1743             } else {
1744                 if ('1'== cp[0] && '6'==cp[1]) {
1745                     output_conv = w_oconv16; cp+=2;
1746                 } else if ('3'== cp[0] && '2'==cp[1]) {
1747                     output_conv = w_oconv32; cp+=2;
1748                 } else {
1749                     output_conv = w_oconv;
1750                     continue;
1751                 }
1752                 if (cp[0]=='L') {
1753                     cp++;
1754                     output_endian = ENDIAN_LITTLE;
1755                 } else if (cp[0] == 'B') {
1756                     cp++;
1757                 } else {
1758                     continue;
1759                 }
1760                 if (cp[0] == '0'){
1761                     cp++;
1762                 } else {
1763                     output_bom_f = TRUE;
1764                 }
1765             }
1766             continue;
1767 #endif
1768 #ifdef UTF8_INPUT_ENABLE
1769         case 'W':           /* UTF input */
1770             if (cp[0] == '8') {
1771                 cp++;
1772                 input_f = UTF8_INPUT;
1773             }else{
1774                 if ('1'== cp[0] && '6'==cp[1]) {
1775                     cp += 2;
1776                     input_f = UTF16_INPUT;
1777                     input_endian = ENDIAN_BIG;
1778                 } else if ('3'== cp[0] && '2'==cp[1]) {
1779                     cp += 2;
1780                     input_f = UTF32_INPUT;
1781                     input_endian = ENDIAN_BIG;
1782                 } else {
1783                     input_f = UTF8_INPUT;
1784                     continue;
1785                 }
1786                 if (cp[0]=='L') {
1787                     cp++;
1788                     input_endian = ENDIAN_LITTLE;
1789                 } else if (cp[0] == 'B') {
1790                     cp++;
1791                 }
1792             }
1793             continue;
1794 #endif
1795         /* Input code assumption */
1796         case 'J':   /* JIS input */
1797             input_f = JIS_INPUT;
1798             continue;
1799         case 'E':   /* AT&T EUC input */
1800             input_f = EUC_INPUT;
1801             continue;
1802         case 'S':   /* MS Kanji input */
1803             input_f = SJIS_INPUT;
1804             if (x0201_f==NO_X0201) x0201_f=TRUE;
1805             continue;
1806         case 'Z':   /* Convert X0208 alphabet to asii */
1807             /* alpha_f
1808                bit:0   Convert JIS X 0208 Alphabet to ASCII
1809                bit:1   Convert Kankaku to one space
1810                bit:2   Convert Kankaku to two spaces
1811                bit:3   Convert HTML Entity
1812                bit:4   Convert JIS X 0208 Katakana to JIS X 0201 Katakana
1813             */
1814             if ('9'>= *cp && *cp>='0') 
1815                 alpha_f |= 1<<(*cp++ -'0');
1816             else 
1817                 alpha_f |= TRUE;
1818             continue;
1819         case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
1820             x0201_f = FALSE;    /* No X0201->X0208 conversion */
1821             /* accept  X0201
1822                     ESC-(-I     in JIS, EUC, MS Kanji
1823                     SI/SO       in JIS, EUC, MS Kanji
1824                     SSO         in EUC, JIS, not in MS Kanji
1825                     MS Kanji (0xa0-0xdf) 
1826                output  X0201
1827                     ESC-(-I     in JIS (0x20-0x5f)
1828                     SSO         in EUC (0xa0-0xdf)
1829                     0xa0-0xd    in MS Kanji (0xa0-0xdf) 
1830             */
1831             continue;
1832         case 'X':   /* Assume X0201 kana */
1833             /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1834             x0201_f = TRUE;
1835             continue;
1836         case 'F':   /* prserve new lines */
1837             fold_preserve_f = TRUE;
1838         case 'f':   /* folding -f60 or -f */
1839             fold_f = TRUE;
1840             fold_len = 0;
1841             while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1842                 fold_len *= 10;
1843                 fold_len += *cp++ - '0';
1844             }
1845             if (!(0<fold_len && fold_len<BUFSIZ)) 
1846                 fold_len = DEFAULT_FOLD;
1847             if (*cp=='-') {
1848                 fold_margin = 0;
1849                 cp++;
1850                 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1851                     fold_margin *= 10;
1852                     fold_margin += *cp++ - '0';
1853                 }
1854             }
1855             continue;
1856         case 'm':   /* MIME support */
1857             /* mime_decode_f = TRUE; */ /* this has too large side effects... */
1858             if (*cp=='B'||*cp=='Q') {
1859                 mime_decode_mode = *cp++;
1860                 mimebuf_f = FIXED_MIME;
1861             } else if (*cp=='N') {
1862                 mime_f = TRUE; cp++;
1863             } else if (*cp=='S') {
1864                 mime_f = STRICT_MIME; cp++;
1865             } else if (*cp=='0') {
1866                 mime_decode_f = FALSE;
1867                 mime_f = FALSE; cp++;
1868             }
1869             continue;
1870         case 'M':   /* MIME output */
1871             if (*cp=='B') {
1872                 mimeout_mode = 'B';
1873                 mimeout_f = FIXED_MIME; cp++;
1874             } else if (*cp=='Q') {
1875                 mimeout_mode = 'Q';
1876                 mimeout_f = FIXED_MIME; cp++;
1877             } else {
1878                 mimeout_f = TRUE;
1879             }
1880             continue;
1881         case 'B':   /* Broken JIS support */
1882             /*  bit:0   no ESC JIS
1883                 bit:1   allow any x on ESC-(-x or ESC-$-x
1884                 bit:2   reset to ascii on NL
1885             */
1886             if ('9'>= *cp && *cp>='0') 
1887                 broken_f |= 1<<(*cp++ -'0');
1888             else 
1889                 broken_f |= TRUE;
1890             continue;
1891 #ifndef PERL_XS
1892         case 'O':/* for Output file */
1893             file_out_f = TRUE;
1894             continue;
1895 #endif
1896         case 'c':/* add cr code */
1897             crmode_f = CRLF;
1898             continue;
1899         case 'd':/* delete cr code */
1900             crmode_f = NL;
1901             continue;
1902         case 'I':   /* ISO-2022-JP output */
1903             iso2022jp_f = TRUE;
1904             continue;
1905         case 'L':  /* line mode */
1906             if (*cp=='u') {         /* unix */
1907                 crmode_f = NL; cp++;
1908             } else if (*cp=='m') { /* mac */
1909                 crmode_f = CR; cp++;
1910             } else if (*cp=='w') { /* windows */
1911                 crmode_f = CRLF; cp++;
1912             } else if (*cp=='0') { /* no conversion  */
1913                 crmode_f = 0; cp++;
1914             }
1915             continue;
1916         case 'g':
1917 #ifndef PERL_XS
1918             guess_f = TRUE;
1919 #endif
1920             continue;
1921         case ' ':    
1922         /* module muliple options in a string are allowed for Perl moudle  */
1923             while(*cp && *cp++!='-');
1924             continue;
1925         default:
1926             /* bogus option but ignored */
1927             continue;
1928         }
1929     }
1930 }
1931
1932 struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1933 {
1934     if (iconv_func){
1935         struct input_code *p = input_code_list;
1936         while (p->name){
1937             if (iconv_func == p->iconv_func){
1938                 return p;
1939             }
1940             p++;
1941         }
1942     }
1943     return 0;
1944 }
1945
1946 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1947 {
1948 #ifdef INPUT_CODE_FIX
1949     if (f || !input_f)
1950 #endif
1951         if (estab_f != f){
1952             estab_f = f;
1953         }
1954
1955     if (iconv_func
1956 #ifdef INPUT_CODE_FIX
1957         && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1958 #endif
1959         ){
1960         iconv = iconv_func;
1961     }
1962 #ifdef CHECK_OPTION
1963     if (estab_f && iconv_for_check != iconv){
1964         struct input_code *p = find_inputcode_byfunc(iconv);
1965         if (p){
1966             set_input_codename(p->name);
1967             debug(input_codename);
1968         }
1969         iconv_for_check = iconv;
1970     }
1971 #endif
1972 }
1973
1974 #define SCORE_L2       (1)                   /* \e$BBh\e(B2\e$B?e=`4A;z\e(B */
1975 #define SCORE_KANA     (SCORE_L2 << 1)       /* \e$B$$$o$f$kH>3Q%+%J\e(B */
1976 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* \e$B5!<o0MB8J8;z\e(B */
1977 #ifdef SHIFTJIS_CP932
1978 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* CP932 \e$B$K$h$kFI$_49$(\e(B */
1979 #define SCORE_NO_EXIST (SCORE_CP932 << 1)    /* \e$BB8:_$7$J$$J8;z\e(B */
1980 #else
1981 #define SCORE_NO_EXIST (SCORE_DEPEND << 1)   /* \e$BB8:_$7$J$$J8;z\e(B */
1982 #endif
1983 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
1984 #define SCORE_ERROR    (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
1985
1986 #define SCORE_INIT (SCORE_iMIME)
1987
1988 const nkf_char score_table_A0[] = {
1989     0, 0, 0, 0,
1990     0, 0, 0, 0,
1991     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1992     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1993 };
1994
1995 const nkf_char score_table_F0[] = {
1996     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1997     SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1998     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1999     SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
2000 };
2001
2002 void set_code_score(struct input_code *ptr, nkf_char score)
2003 {
2004     if (ptr){
2005         ptr->score |= score;
2006     }
2007 }
2008
2009 void clr_code_score(struct input_code *ptr, nkf_char score)
2010 {
2011     if (ptr){
2012         ptr->score &= ~score;
2013     }
2014 }
2015
2016 void code_score(struct input_code *ptr)
2017 {
2018     nkf_char c2 = ptr->buf[0];
2019 #ifdef UTF8_OUTPUT_ENABLE
2020     nkf_char c1 = ptr->buf[1];
2021 #endif
2022     if (c2 < 0){
2023         set_code_score(ptr, SCORE_ERROR);
2024     }else if (c2 == SSO){
2025         set_code_score(ptr, SCORE_KANA);
2026 #ifdef UTF8_OUTPUT_ENABLE
2027     }else if (!e2w_conv(c2, c1)){
2028         set_code_score(ptr, SCORE_NO_EXIST);
2029 #endif
2030     }else if ((c2 & 0x70) == 0x20){
2031         set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2032     }else if ((c2 & 0x70) == 0x70){
2033         set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2034     }else if ((c2 & 0x70) >= 0x50){
2035         set_code_score(ptr, SCORE_L2);
2036     }
2037 }
2038
2039 void status_disable(struct input_code *ptr)
2040 {
2041     ptr->stat = -1;
2042     ptr->buf[0] = -1;
2043     code_score(ptr);
2044     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2045 }
2046
2047 void status_push_ch(struct input_code *ptr, nkf_char c)
2048 {
2049     ptr->buf[ptr->index++] = c;
2050 }
2051
2052 void status_clear(struct input_code *ptr)
2053 {
2054     ptr->stat = 0;
2055     ptr->index = 0;
2056 }
2057
2058 void status_reset(struct input_code *ptr)
2059 {
2060     status_clear(ptr);
2061     ptr->score = SCORE_INIT;
2062 }
2063
2064 void status_reinit(struct input_code *ptr)
2065 {
2066     status_reset(ptr);
2067     ptr->_file_stat = 0;
2068 }
2069
2070 void status_check(struct input_code *ptr, nkf_char c)
2071 {
2072     if (c <= DEL && estab_f){
2073         status_reset(ptr);
2074     }
2075 }
2076
2077 void s_status(struct input_code *ptr, nkf_char c)
2078 {
2079     switch(ptr->stat){
2080       case -1:
2081           status_check(ptr, c);
2082           break;
2083       case 0:
2084           if (c <= DEL){
2085               break;
2086 #ifdef NUMCHAR_OPTION
2087           }else if (is_unicode_capsule(c)){
2088               break;
2089 #endif
2090           }else if (0xa1 <= c && c <= 0xdf){
2091               status_push_ch(ptr, SSO);
2092               status_push_ch(ptr, c);
2093               code_score(ptr);
2094               status_clear(ptr);
2095           }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
2096               ptr->stat = 1;
2097               status_push_ch(ptr, c);
2098 #ifdef SHIFTJIS_CP932
2099           }else if (cp51932_f
2100                     && is_ibmext_in_sjis(c)){
2101               ptr->stat = 2;
2102               status_push_ch(ptr, c);
2103 #endif /* SHIFTJIS_CP932 */
2104 #ifdef X0212_ENABLE
2105           }else if (x0212_f && 0xf0 <= c && c <= 0xfc){
2106               ptr->stat = 1;
2107               status_push_ch(ptr, c);
2108 #endif /* X0212_ENABLE */
2109           }else{
2110               status_disable(ptr);
2111           }
2112           break;
2113       case 1:
2114           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2115               status_push_ch(ptr, c);
2116               s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2117               code_score(ptr);
2118               status_clear(ptr);
2119           }else{
2120               status_disable(ptr);
2121           }
2122           break;
2123       case 2:
2124 #ifdef SHIFTJIS_CP932
2125           if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2126               status_push_ch(ptr, c);
2127               if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
2128                   set_code_score(ptr, SCORE_CP932);
2129                   status_clear(ptr);
2130                   break;
2131               }
2132           }
2133 #endif /* SHIFTJIS_CP932 */
2134 #ifndef X0212_ENABLE
2135           status_disable(ptr);
2136 #endif
2137           break;
2138     }
2139 }
2140
2141 void e_status(struct input_code *ptr, nkf_char c)
2142 {
2143     switch (ptr->stat){
2144       case -1:
2145           status_check(ptr, c);
2146           break;
2147       case 0:
2148           if (c <= DEL){
2149               break;
2150 #ifdef NUMCHAR_OPTION
2151           }else if (is_unicode_capsule(c)){
2152               break;
2153 #endif
2154           }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
2155               ptr->stat = 1;
2156               status_push_ch(ptr, c);
2157 #ifdef X0212_ENABLE
2158           }else if (0x8f == c){
2159               ptr->stat = 2;
2160               status_push_ch(ptr, c);
2161 #endif /* X0212_ENABLE */
2162           }else{
2163               status_disable(ptr);
2164           }
2165           break;
2166       case 1:
2167           if (0xa1 <= c && c <= 0xfe){
2168               status_push_ch(ptr, c);
2169               code_score(ptr);
2170               status_clear(ptr);
2171           }else{
2172               status_disable(ptr);
2173           }
2174           break;
2175 #ifdef X0212_ENABLE
2176       case 2:
2177           if (0xa1 <= c && c <= 0xfe){
2178               ptr->stat = 1;
2179               status_push_ch(ptr, c);
2180           }else{
2181               status_disable(ptr);
2182           }
2183 #endif /* X0212_ENABLE */
2184     }
2185 }
2186
2187 #ifdef UTF8_INPUT_ENABLE
2188 void w_status(struct input_code *ptr, nkf_char c)
2189 {
2190     switch (ptr->stat){
2191       case -1:
2192           status_check(ptr, c);
2193           break;
2194       case 0:
2195           if (c <= DEL){
2196               break;
2197 #ifdef NUMCHAR_OPTION
2198           }else if (is_unicode_capsule(c)){
2199               break;
2200 #endif
2201           }else if (0xc0 <= c && c <= 0xdf){
2202               ptr->stat = 1;
2203               status_push_ch(ptr, c);
2204           }else if (0xe0 <= c && c <= 0xef){
2205               ptr->stat = 2;
2206               status_push_ch(ptr, c);
2207           }else if (0xf0 <= c && c <= 0xf4){
2208               ptr->stat = 3;
2209               status_push_ch(ptr, c);
2210           }else{
2211               status_disable(ptr);
2212           }
2213           break;
2214       case 1:
2215       case 2:
2216           if (0x80 <= c && c <= 0xbf){
2217               status_push_ch(ptr, c);
2218               if (ptr->index > ptr->stat){
2219                   int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2220                              && ptr->buf[2] == 0xbf);
2221                   w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2222                            &ptr->buf[0], &ptr->buf[1]);
2223                   if (!bom){
2224                       code_score(ptr);
2225                   }
2226                   status_clear(ptr);
2227               }
2228           }else{
2229               status_disable(ptr);
2230           }
2231           break;
2232       case 3:
2233         if (0x80 <= c && c <= 0xbf){
2234             if (ptr->index < ptr->stat){
2235                 status_push_ch(ptr, c);
2236             } else {
2237                 status_clear(ptr);
2238             }
2239           }else{
2240               status_disable(ptr);
2241           }
2242           break;
2243     }
2244 }
2245 #endif
2246
2247 void code_status(nkf_char c)
2248 {
2249     int action_flag = 1;
2250     struct input_code *result = 0;
2251     struct input_code *p = input_code_list;
2252     while (p->name){
2253         if (!p->status_func) {
2254             ++p;
2255             continue;
2256         }
2257         if (!p->status_func)
2258             continue;
2259         (p->status_func)(p, c);
2260         if (p->stat > 0){
2261             action_flag = 0;
2262         }else if(p->stat == 0){
2263             if (result){
2264                 action_flag = 0;
2265             }else{
2266                 result = p;
2267             }
2268         }
2269         ++p;
2270     }
2271
2272     if (action_flag){
2273         if (result && !estab_f){
2274             set_iconv(TRUE, result->iconv_func);
2275         }else if (c <= DEL){
2276             struct input_code *ptr = input_code_list;
2277             while (ptr->name){
2278                 status_reset(ptr);
2279                 ++ptr;
2280             }
2281         }
2282     }
2283 }
2284
2285 #ifndef WIN32DLL
2286 nkf_char std_getc(FILE *f)
2287 {
2288     if (std_gc_ndx){
2289         return std_gc_buf[--std_gc_ndx];
2290     }
2291     return getc(f);
2292 }
2293 #endif /*WIN32DLL*/
2294
2295 nkf_char std_ungetc(nkf_char c, FILE *f)
2296 {
2297     if (std_gc_ndx == STD_GC_BUFSIZE){
2298         return EOF;
2299     }
2300     std_gc_buf[std_gc_ndx++] = c;
2301     return c;
2302 }
2303
2304 #ifndef WIN32DLL
2305 void std_putc(nkf_char c)
2306 {
2307     if(c!=EOF)
2308       putchar(c);
2309 }
2310 #endif /*WIN32DLL*/
2311
2312 #if !defined(PERL_XS) && !defined(WIN32DLL)
2313 nkf_char noconvert(FILE *f)
2314 {
2315     nkf_char    c;
2316
2317     if (nop_f == 2)
2318         module_connection();
2319     while ((c = (*i_getc)(f)) != EOF)
2320       (*o_putc)(c);
2321     (*o_putc)(EOF);
2322     return 1;
2323 }
2324 #endif
2325
2326 void module_connection(void)
2327 {
2328     oconv = output_conv; 
2329     o_putc = std_putc;
2330
2331     /* replace continucation module, from output side */
2332
2333     /* output redicrection */
2334 #ifdef CHECK_OPTION
2335     if (noout_f || guess_f){
2336         o_putc = no_putc;
2337     }
2338 #endif
2339     if (mimeout_f) {
2340         o_mputc = o_putc;
2341         o_putc = mime_putc;
2342         if (mimeout_f == TRUE) {
2343             o_base64conv = oconv; oconv = base64_conv;
2344         }
2345         /* base64_count = 0; */
2346     }
2347
2348     if (crmode_f) {
2349         o_crconv = oconv; oconv = cr_conv;
2350     }
2351     if (rot_f) {
2352         o_rot_conv = oconv; oconv = rot_conv;
2353     }
2354     if (iso2022jp_f) {
2355         o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2356     }
2357     if (hira_f) {
2358         o_hira_conv = oconv; oconv = hira_conv;
2359     }
2360     if (fold_f) {
2361         o_fconv = oconv; oconv = fold_conv;
2362         f_line = 0;
2363     }
2364     if (alpha_f || x0201_f) {
2365         o_zconv = oconv; oconv = z_conv;
2366     }
2367
2368     i_getc = std_getc;
2369     i_ungetc = std_ungetc;
2370     /* input redicrection */
2371 #ifdef INPUT_OPTION
2372     if (cap_f){
2373         i_cgetc = i_getc; i_getc = cap_getc;
2374         i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2375     }
2376     if (url_f){
2377         i_ugetc = i_getc; i_getc = url_getc;
2378         i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2379     }
2380 #endif
2381 #ifdef NUMCHAR_OPTION
2382     if (numchar_f){
2383         i_ngetc = i_getc; i_getc = numchar_getc;
2384         i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2385     }
2386 #endif
2387 #ifdef UNICODE_NORMALIZATION
2388     if (nfc_f && input_f == UTF8_INPUT){
2389         i_nfc_getc = i_getc; i_getc = nfc_getc;
2390         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2391     }
2392 #endif
2393     if (mime_f && mimebuf_f==FIXED_MIME) {
2394         i_mgetc = i_getc; i_getc = mime_getc;
2395         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2396     }
2397     if (broken_f & 1) {
2398         i_bgetc = i_getc; i_getc = broken_getc;
2399         i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2400     }
2401     if (input_f == JIS_INPUT || input_f == EUC_INPUT || input_f == LATIN1_INPUT) {
2402         set_iconv(-TRUE, e_iconv);
2403     } else if (input_f == SJIS_INPUT) {
2404         set_iconv(-TRUE, s_iconv);
2405 #ifdef UTF8_INPUT_ENABLE
2406     } else if (input_f == UTF8_INPUT) {
2407         set_iconv(-TRUE, w_iconv);
2408     } else if (input_f == UTF16_INPUT) {
2409         set_iconv(-TRUE, w_iconv16);
2410     } else if (input_f == UTF32_INPUT) {
2411         set_iconv(-TRUE, w_iconv32);
2412 #endif
2413     } else {
2414         set_iconv(FALSE, e_iconv);
2415     }
2416
2417     {
2418         struct input_code *p = input_code_list;
2419         while (p->name){
2420             status_reinit(p++);
2421         }
2422     }
2423 }
2424
2425 /*
2426  * Check and Ignore BOM
2427  */
2428 void check_bom(FILE *f)
2429 {
2430     int c2;
2431     switch(c2 = (*i_getc)(f)){
2432     case 0x00:
2433         if((c2 = (*i_getc)(f)) == 0x00){
2434             if((c2 = (*i_getc)(f)) == 0xFE){
2435                 if((c2 = (*i_getc)(f)) == 0xFF){
2436                     if(!input_f){
2437                         set_iconv(TRUE, w_iconv32);
2438                     }
2439                     if (iconv == w_iconv32) {
2440                         input_endian = ENDIAN_BIG;
2441                         return;
2442                     }
2443                     (*i_ungetc)(0xFF,f);
2444                 }else (*i_ungetc)(c2,f);
2445                 (*i_ungetc)(0xFE,f);
2446             }else if(c2 == 0xFF){
2447                 if((c2 = (*i_getc)(f)) == 0xFE){
2448                     if(!input_f){
2449                         set_iconv(TRUE, w_iconv32);
2450                     }
2451                     if (iconv == w_iconv32) {
2452                         input_endian = ENDIAN_2143;
2453                         return;
2454                     }
2455                     (*i_ungetc)(0xFF,f);
2456                 }else (*i_ungetc)(c2,f);
2457                 (*i_ungetc)(0xFF,f);
2458             }else (*i_ungetc)(c2,f);
2459             (*i_ungetc)(0x00,f);
2460         }else (*i_ungetc)(c2,f);
2461         (*i_ungetc)(0x00,f);
2462         break;
2463     case 0xEF:
2464         if((c2 = (*i_getc)(f)) == 0xBB){
2465             if((c2 = (*i_getc)(f)) == 0xBF){
2466                 if(!input_f){
2467                     set_iconv(TRUE, w_iconv);
2468                 }
2469                 if (iconv == w_iconv) {
2470                     return;
2471                 }
2472                 (*i_ungetc)(0xBF,f);
2473             }else (*i_ungetc)(c2,f);
2474             (*i_ungetc)(0xBB,f);
2475         }else (*i_ungetc)(c2,f);
2476         (*i_ungetc)(0xEF,f);
2477         break;
2478     case 0xFE:
2479         if((c2 = (*i_getc)(f)) == 0xFF){
2480             if((c2 = (*i_getc)(f)) == 0x00){
2481                 if((c2 = (*i_getc)(f)) == 0x00){
2482                     if(!input_f){
2483                         set_iconv(TRUE, w_iconv32);
2484                     }
2485                     if (iconv == w_iconv32) {
2486                         input_endian = ENDIAN_3412;
2487                         return;
2488                     }
2489                     (*i_ungetc)(0x00,f);
2490                 }else (*i_ungetc)(c2,f);
2491                 (*i_ungetc)(0x00,f);
2492             }else (*i_ungetc)(c2,f);
2493             if(!input_f){
2494                 set_iconv(TRUE, w_iconv16);
2495             }
2496             if (iconv == w_iconv16) {
2497                 input_endian = ENDIAN_BIG;
2498                 return;
2499             }
2500             (*i_ungetc)(0xFF,f);
2501         }else (*i_ungetc)(c2,f);
2502         (*i_ungetc)(0xFE,f);
2503         break;
2504     case 0xFF:
2505         if((c2 = (*i_getc)(f)) == 0xFE){
2506             if((c2 = (*i_getc)(f)) == 0x00){
2507                 if((c2 = (*i_getc)(f)) == 0x00){
2508                     if(!input_f){
2509                         set_iconv(TRUE, w_iconv32);
2510                     }
2511                     if (iconv == w_iconv32) {
2512                         input_endian = ENDIAN_LITTLE;
2513                         return;
2514                     }
2515                     (*i_ungetc)(0x00,f);
2516                 }else (*i_ungetc)(c2,f);
2517                 (*i_ungetc)(0x00,f);
2518             }else (*i_ungetc)(c2,f);
2519             if(!input_f){
2520                 set_iconv(TRUE, w_iconv16);
2521             }
2522             if (iconv == w_iconv16) {
2523                 input_endian = ENDIAN_LITTLE;
2524                 return;
2525             }
2526             (*i_ungetc)(0xFE,f);
2527         }else (*i_ungetc)(c2,f);
2528         (*i_ungetc)(0xFF,f);
2529         break;
2530     default:
2531         (*i_ungetc)(c2,f);
2532         break;
2533     }
2534 }
2535
2536 /*
2537    Conversion main loop. Code detection only. 
2538  */
2539
2540 nkf_char kanji_convert(FILE *f)
2541 {
2542     nkf_char    c3, c2=0, c1, c0=0;
2543     int is_8bit = FALSE;
2544
2545     if(input_f == SJIS_INPUT || input_f == EUC_INPUT
2546 #ifdef UTF8_INPUT_ENABLE
2547        || input_f == UTF8_INPUT || input_f == UTF16_INPUT
2548 #endif
2549       ){
2550         is_8bit = TRUE;
2551     }
2552
2553     input_mode = ASCII;
2554     output_mode = ASCII;
2555     shift_mode = FALSE;
2556
2557 #define NEXT continue      /* no output, get next */
2558 #define SEND ;             /* output c1 and c2, get next */
2559 #define LAST break         /* end of loop, go closing  */
2560
2561     module_connection();
2562     check_bom(f);
2563
2564     while ((c1 = (*i_getc)(f)) != EOF) {
2565 #ifdef INPUT_CODE_FIX
2566         if (!input_f)
2567 #endif
2568             code_status(c1);
2569         if (c2) {
2570             /* second byte */
2571             if (c2 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2572                 /* in case of 8th bit is on */
2573                 if (!estab_f&&!mime_decode_mode) {
2574                     /* in case of not established yet */
2575                     /* It is still ambiguious */
2576                     if (h_conv(f, c2, c1)==EOF) 
2577                         LAST;
2578                     else 
2579                         c2 = 0;
2580                     NEXT;
2581                 } else {
2582                     /* in case of already established */
2583                     if (c1 < AT) {
2584                         /* ignore bogus code and not CP5022x UCD */
2585                         c2 = 0;
2586                         NEXT;
2587                     } else {
2588                         SEND;
2589                     }
2590                 }
2591             } else
2592                 /* second byte, 7 bit code */
2593                 /* it might be kanji shitfted */
2594                 if ((c1 == DEL) || (c1 <= SPACE)) {
2595                     /* ignore bogus first code */
2596                     c2 = 0;
2597                     NEXT;
2598                 } else
2599                     SEND;
2600         } else {
2601             /* first byte */
2602 #ifdef UTF8_INPUT_ENABLE
2603             if (iconv == w_iconv16) {
2604                 if (input_endian == ENDIAN_BIG) {
2605                     c2 = c1;
2606                     if ((c1 = (*i_getc)(f)) != EOF) {
2607                         if (0xD8 <= c2 && c2 <= 0xDB) {
2608                             if ((c0 = (*i_getc)(f)) != EOF) {
2609                                 c0 <<= 8;
2610                                 if ((c3 = (*i_getc)(f)) != EOF) {
2611                                     c0 |= c3;
2612                                 } else c2 = EOF;
2613                             } else c2 = EOF;
2614                         }
2615                     } else c2 = EOF;
2616                 } else {
2617                     if ((c2 = (*i_getc)(f)) != EOF) {
2618                         if (0xD8 <= c2 && c2 <= 0xDB) {
2619                             if ((c3 = (*i_getc)(f)) != EOF) {
2620                                 if ((c0 = (*i_getc)(f)) != EOF) {
2621                                     c0 <<= 8;
2622                                     c0 |= c3;
2623                                 } else c2 = EOF;
2624                             } else c2 = EOF;
2625                         }
2626                     } else c2 = EOF;
2627                 }
2628                 SEND;
2629             } else if(iconv == w_iconv32){
2630                 int c3 = c1;
2631                 if((c2 = (*i_getc)(f)) != EOF &&
2632                    (c1 = (*i_getc)(f)) != EOF &&
2633                    (c0 = (*i_getc)(f)) != EOF){
2634                     switch(input_endian){
2635                     case ENDIAN_BIG:
2636                         c1 = (c2&0xFF)<<16 | (c1&0xFF)<<8 | (c0&0xFF);
2637                         break;
2638                     case ENDIAN_LITTLE:
2639                         c1 = (c3&0xFF) | (c2&0xFF)<<8 | (c1&0xFF)<<16;
2640                         break;
2641                     case ENDIAN_2143:
2642                         c1 = (c3&0xFF)<<16 | (c1&0xFF) | (c0&0xFF)<<8;
2643                         break;
2644                     case ENDIAN_3412:
2645                         c1 = (c3&0xFF)<<8 | (c2&0xFF) | (c0&0xFF)<<16;
2646                         break;
2647                     }
2648                     c2 = 0;
2649                 }else{
2650                     c2 = EOF;
2651                 }
2652                 SEND;
2653             } else
2654 #endif
2655 #ifdef NUMCHAR_OPTION
2656             if (is_unicode_capsule(c1)){
2657                 SEND;
2658             } else
2659 #endif
2660             if (c1 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
2661                 /* 8 bit code */
2662                 if (!estab_f && !iso8859_f) {
2663                     /* not established yet */
2664                     c2 = c1;
2665                     NEXT;
2666                 } else { /* estab_f==TRUE */
2667                     if (iso8859_f) {
2668                         c2 = ISO8859_1;
2669                         c1 &= 0x7f;
2670                         SEND;
2671                     } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2672                         /* SJIS X0201 Case... */
2673                         if(iso2022jp_f && x0201_f==NO_X0201) {
2674                             (*oconv)(GETA1, GETA2);
2675                             NEXT;
2676                         } else {
2677                             c2 = X0201;
2678                             c1 &= 0x7f;
2679                             SEND;
2680                         }
2681                     } else if (c1==SSO && iconv != s_iconv) {
2682                         /* EUC X0201 Case */
2683                         c1 = (*i_getc)(f);  /* skip SSO */
2684                         code_status(c1);
2685                         if (SSP<=c1 && c1<0xe0) {
2686                             if(iso2022jp_f &&  x0201_f==NO_X0201) {
2687                                 (*oconv)(GETA1, GETA2);
2688                                 NEXT;
2689                             } else {
2690                                 c2 = X0201;
2691                                 c1 &= 0x7f;
2692                                 SEND;
2693                             }
2694                         } else  { /* bogus code, skip SSO and one byte */
2695                             NEXT;
2696                         }
2697                     } else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
2698                                (c1 == 0xFD || c1 == 0xFE)) {
2699                         /* CP10001 */
2700                         c2 = X0201;
2701                         c1 &= 0x7f;
2702                         SEND;
2703                     } else {
2704                        /* already established */
2705                        c2 = c1;
2706                        NEXT;
2707                     }
2708                 }
2709             } else if ((c1 > SPACE) && (c1 != DEL)) {
2710                 /* in case of Roman characters */
2711                 if (shift_mode) { 
2712                     /* output 1 shifted byte */
2713                     if (iso8859_f) {
2714                         c2 = ISO8859_1;
2715                         SEND;
2716                     } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
2717                       /* output 1 shifted byte */
2718                         if(iso2022jp_f && x0201_f==NO_X0201) {
2719                             (*oconv)(GETA1, GETA2);
2720                             NEXT;
2721                         } else {
2722                             c2 = X0201;
2723                             SEND;
2724                         }
2725                     } else {
2726                         /* look like bogus code */
2727                         NEXT;
2728                     }
2729                 } else if (input_mode == X0208 || input_mode == X0212 ||
2730                            input_mode == X0213_1 || input_mode == X0213_2) {
2731                     /* in case of Kanji shifted */
2732                     c2 = c1;
2733                     NEXT;
2734                 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
2735                     /* Check MIME code */
2736                     if ((c1 = (*i_getc)(f)) == EOF) {
2737                         (*oconv)(0, '=');
2738                         LAST;
2739                     } else if (c1 == '?') {
2740                         /* =? is mime conversion start sequence */
2741                         if(mime_f == STRICT_MIME) {
2742                             /* check in real detail */
2743                             if (mime_begin_strict(f) == EOF) 
2744                                 LAST;
2745                             else
2746                                 NEXT;
2747                         } else if (mime_begin(f) == EOF) 
2748                             LAST;
2749                         else
2750                             NEXT;
2751                     } else {
2752                         (*oconv)(0, '=');
2753                         (*i_ungetc)(c1,f);
2754                         NEXT;
2755                     }
2756                 } else {
2757                     /* normal ASCII code */ 
2758                     SEND;
2759                 }
2760             } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {\r
2761                 shift_mode = FALSE; 
2762                 NEXT;
2763             } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {\r
2764                 shift_mode = TRUE; 
2765                 NEXT;
2766             } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {\r
2767                 if ((c1 = (*i_getc)(f)) == EOF) {
2768                     /*  (*oconv)(0, ESC); don't send bogus code */
2769                     LAST;
2770                 } else if (c1 == '$') {
2771                     if ((c1 = (*i_getc)(f)) == EOF) {
2772                         /*
2773                         (*oconv)(0, ESC); don't send bogus code 
2774                         (*oconv)(0, '$'); */
2775                         LAST;
2776                     } else if (c1 == '@'|| c1 == 'B') {
2777                         /* This is kanji introduction */
2778                         input_mode = X0208;
2779                         shift_mode = FALSE;
2780                         set_input_codename("ISO-2022-JP");
2781 #ifdef CHECK_OPTION
2782                         debug(input_codename);
2783 #endif
2784                         NEXT;
2785                     } else if (c1 == '(') {
2786                         if ((c1 = (*i_getc)(f)) == EOF) {
2787                             /* don't send bogus code 
2788                             (*oconv)(0, ESC);
2789                             (*oconv)(0, '$');
2790                             (*oconv)(0, '(');
2791                                 */
2792                             LAST;
2793                         } else if (c1 == '@'|| c1 == 'B') {
2794                             /* This is kanji introduction */
2795                             input_mode = X0208;
2796                             shift_mode = FALSE;
2797                             NEXT;
2798 #ifdef X0212_ENABLE
2799                         } else if (c1 == 'D'){
2800                             input_mode = X0212;
2801                             shift_mode = FALSE;
2802                             NEXT;
2803 #endif /* X0212_ENABLE */
2804                         } else if (c1 == (X0213_1&0x7F)){
2805                             input_mode = X0213_1;
2806                             shift_mode = FALSE;
2807                             NEXT;
2808                         } else if (c1 == (X0213_2&0x7F)){
2809                             input_mode = X0213_2;
2810                             shift_mode = FALSE;
2811                             NEXT;
2812                         } else {
2813                             /* could be some special code */
2814                             (*oconv)(0, ESC);
2815                             (*oconv)(0, '$');
2816                             (*oconv)(0, '(');
2817                             (*oconv)(0, c1);
2818                             NEXT;
2819                         }
2820                     } else if (broken_f&0x2) {
2821                         /* accept any ESC-(-x as broken code ... */
2822                         input_mode = X0208;
2823                         shift_mode = FALSE;
2824                         NEXT;
2825                     } else {
2826                         (*oconv)(0, ESC);
2827                         (*oconv)(0, '$');
2828                         (*oconv)(0, c1);
2829                         NEXT;
2830                     }
2831                 } else if (c1 == '(') {
2832                     if ((c1 = (*i_getc)(f)) == EOF) {
2833                         /* don't send bogus code 
2834                         (*oconv)(0, ESC);
2835                         (*oconv)(0, '('); */
2836                         LAST;
2837                     } else {
2838                         if (c1 == 'I') {
2839                             /* This is X0201 kana introduction */
2840                             input_mode = X0201; shift_mode = X0201;
2841                             NEXT;
2842                         } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2843                             /* This is X0208 kanji introduction */
2844                             input_mode = ASCII; shift_mode = FALSE;
2845                             NEXT;
2846                         } else if (broken_f&0x2) {
2847                             input_mode = ASCII; shift_mode = FALSE;
2848                             NEXT;
2849                         } else {
2850                             (*oconv)(0, ESC);
2851                             (*oconv)(0, '(');
2852                             /* maintain various input_mode here */
2853                             SEND;
2854                         }
2855                     }
2856                } else if ( c1 == 'N' || c1 == 'n' ){
2857                    /* SS2 */
2858                    c3 = (*i_getc)(f);  /* skip SS2 */
2859                    if ( (SPACE<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2860                        c1 = c3;
2861                        c2 = X0201;
2862                        SEND;
2863                    }else{
2864                        (*i_ungetc)(c3, f);
2865                        /* lonely ESC  */
2866                        (*oconv)(0, ESC);
2867                        SEND;
2868                    }
2869                 } else {
2870                     /* lonely ESC  */
2871                     (*oconv)(0, ESC);
2872                     SEND;
2873                 }
2874             } else if (c1 == ESC && iconv == s_iconv) {
2875                 /* ESC in Shift_JIS */
2876                 if ((c1 = (*i_getc)(f)) == EOF) {
2877                     /*  (*oconv)(0, ESC); don't send bogus code */
2878                     LAST;
2879                 } else if (c1 == '$') {
2880                     /* J-PHONE emoji */
2881                     if ((c1 = (*i_getc)(f)) == EOF) {
2882                         /*
2883                            (*oconv)(0, ESC); don't send bogus code 
2884                            (*oconv)(0, '$'); */
2885                         LAST;
2886                     } else {
2887                         if (('E' <= c1 && c1 <= 'G') ||
2888                             ('O' <= c1 && c1 <= 'Q')) {
2889                             /*
2890                                NUM : 0 1 2 3 4 5
2891                                BYTE: G E F O P Q
2892                                C%7 : 1 6 0 2 3 4
2893                                C%7 : 0 1 2 3 4 5 6
2894                                NUM : 2 0 3 4 5 X 1
2895                              */
2896                             static const int jphone_emoji_first_table[7] = {2, 0, 3, 4, 5, 0, 1};
2897                             c0 = (jphone_emoji_first_table[c1 % 7] << 8) - SPACE + 0xE000 + CLASS_UNICODE;
2898                             while ((c1 = (*i_getc)(f)) != EOF) {
2899                                 if (SPACE <= c1 && c1 <= 'z') {
2900                                     (*oconv)(0, c1 + c0);
2901                                 } else break; /* c1 == SO */
2902                             }
2903                         }
2904                     }
2905                     if (c1 == EOF) LAST;
2906                     NEXT;
2907                 } else {
2908                     /* lonely ESC  */
2909                     (*oconv)(0, ESC);
2910                     SEND;
2911                 }
2912             } else if (c1 == NL || c1 == CR) {
2913                 if (broken_f&4) {
2914                     input_mode = ASCII; set_iconv(FALSE, 0);
2915                     SEND;
2916                 } else if (mime_decode_f && !mime_decode_mode){
2917                     if (c1 == NL) {
2918                         if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2919                             i_ungetc(SPACE,f);
2920                             continue;
2921                         } else {
2922                             i_ungetc(c1,f);
2923                         }
2924                         c1 = NL;
2925                         SEND;
2926                     } else  { /* if (c1 == CR)*/
2927                         if ((c1=(*i_getc)(f))!=EOF) {
2928                             if (c1==SPACE) {
2929                                 i_ungetc(SPACE,f);
2930                                 continue;
2931                             } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2932                                 i_ungetc(SPACE,f);
2933                                 continue;
2934                             } else {
2935                                 i_ungetc(c1,f);
2936                             }
2937                             i_ungetc(NL,f);
2938                         } else {
2939                             i_ungetc(c1,f);
2940                         }
2941                         c1 = CR;
2942                         SEND;
2943                     }
2944                 }
2945                 if (!crmode_f) {
2946                     if (prev_cr && c1 == NL) crmode_f = CRLF;
2947                     else crmode_f = c1;
2948                 }
2949             } else if (c1 == DEL && input_mode == X0208 ) {
2950                 /* CP5022x */
2951                 c2 = c1;
2952                 NEXT;
2953             } else 
2954                 SEND;
2955         }
2956         /* send: */
2957         switch(input_mode){
2958         case ASCII:
2959             switch ((*iconv)(c2, c1, c0)) {  /* can be EUC / SJIS / UTF-8 / UTF-16 */
2960             case -2:
2961                 /* 4 bytes UTF-8 */
2962                 if ((c0 = (*i_getc)(f)) != EOF) {
2963                     code_status(c0);
2964                     c0 <<= 8;
2965                     if ((c3 = (*i_getc)(f)) != EOF) {
2966                         code_status(c3);
2967                         (*iconv)(c2, c1, c0|c3);
2968                     }
2969                 }
2970                 break;
2971             case -1:
2972                 /* 3 bytes EUC or UTF-8 */
2973                 if ((c0 = (*i_getc)(f)) != EOF) {
2974                     code_status(c0);
2975                     (*iconv)(c2, c1, c0);
2976                 }
2977                 break;
2978             }
2979             break;
2980         case X0208:
2981         case X0213_1:
2982             if (ms_ucs_map_f &&
2983                 0x7F <= c2 && c2 <= 0x92 &&
2984                 0x21 <= c1 && c1 <= 0x7E) {
2985                 /* CP932 UDC */
2986                 if(c1 == 0x7F) return 0;
2987                 c1 = (c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000 + CLASS_UNICODE;
2988                 c2 = 0;
2989             }
2990             (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2991             break;
2992 #ifdef X0212_ENABLE
2993         case X0212:
2994             (*oconv)(PREFIX_EUCG3 | c2, c1);
2995             break;
2996 #endif /* X0212_ENABLE */
2997         case X0213_2:
2998             (*oconv)(PREFIX_EUCG3 | c2, c1);
2999             break;
3000         default:
3001             (*oconv)(input_mode, c1);  /* other special case */
3002         }
3003
3004         c2 = 0;
3005         c0 = 0;
3006         continue;
3007         /* goto next_word */
3008     }
3009
3010     /* epilogue */
3011     (*iconv)(EOF, 0, 0);
3012     if (!is_inputcode_set)
3013     {
3014         if (is_8bit) {
3015             struct input_code *p = input_code_list;
3016             struct input_code *result = p;
3017             while (p->name){
3018                 if (p->score < result->score) result = p;
3019                 ++p;
3020             }
3021             set_input_codename(result->name);
3022         }
3023     }
3024     return 1;
3025 }
3026
3027 nkf_char
3028 h_conv(FILE *f, nkf_char c2, nkf_char c1)
3029 {
3030     nkf_char ret, c3, c0;
3031     int hold_index;
3032
3033
3034     /** it must NOT be in the kanji shifte sequence      */
3035     /** it must NOT be written in JIS7                   */
3036     /** and it must be after 2 byte 8bit code            */
3037
3038     hold_count = 0;
3039     push_hold_buf(c2);
3040     push_hold_buf(c1);
3041
3042     while ((c1 = (*i_getc)(f)) != EOF) {
3043         if (c1 == ESC){
3044             (*i_ungetc)(c1,f);
3045             break;
3046         }
3047         code_status(c1);
3048         if (push_hold_buf(c1) == EOF || estab_f){
3049             break;
3050         }
3051     }
3052
3053     if (!estab_f){
3054         struct input_code *p = input_code_list;
3055         struct input_code *result = p;
3056         if (c1 == EOF){
3057             code_status(c1);
3058         }
3059         while (p->name){
3060             if (p->status_func && p->score < result->score){
3061                 result = p;
3062             }
3063             ++p;
3064         }
3065         set_iconv(TRUE, result->iconv_func);
3066     }
3067
3068
3069     /** now,
3070      ** 1) EOF is detected, or
3071      ** 2) Code is established, or
3072      ** 3) Buffer is FULL (but last word is pushed)
3073      **
3074      ** in 1) and 3) cases, we continue to use
3075      ** Kanji codes by oconv and leave estab_f unchanged.
3076      **/
3077
3078     ret = c1;
3079     hold_index = 0;
3080     while (hold_index < hold_count){
3081         c2 = hold_buf[hold_index++];
3082         if (c2 <= DEL
3083 #ifdef NUMCHAR_OPTION
3084             || is_unicode_capsule(c2)
3085 #endif
3086             ){
3087             (*iconv)(0, c2, 0);
3088             continue;
3089         }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
3090             (*iconv)(X0201, c2, 0);
3091             continue;
3092         }
3093         if (hold_index < hold_count){
3094             c1 = hold_buf[hold_index++];
3095         }else{
3096             c1 = (*i_getc)(f);
3097             if (c1 == EOF){
3098                 c3 = EOF;
3099                 break;
3100             }
3101             code_status(c1);
3102         }
3103         c0 = 0;
3104         switch ((*iconv)(c2, c1, 0)) {  /* can be EUC/SJIS/UTF-8 */
3105         case -2:
3106             /* 4 bytes UTF-8 */
3107             if (hold_index < hold_count){
3108                 c0 = hold_buf[hold_index++];
3109             } else if ((c0 = (*i_getc)(f)) == EOF) {
3110                 ret = EOF;
3111                 break;
3112             } else {
3113                 code_status(c0);
3114                 c0 <<= 8;
3115                 if (hold_index < hold_count){
3116                     c3 = hold_buf[hold_index++];
3117                 } else if ((c3 = (*i_getc)(f)) == EOF) {
3118                     c0 = ret = EOF;
3119                     break;
3120                 } else {
3121                     code_status(c3);
3122                     (*iconv)(c2, c1, c0|c3);
3123                 }
3124             }
3125             break;
3126         case -1:
3127             /* 3 bytes EUC or UTF-8 */
3128             if (hold_index < hold_count){
3129                 c0 = hold_buf[hold_index++];
3130             } else if ((c0 = (*i_getc)(f)) == EOF) {
3131                 ret = EOF;
3132                 break;
3133             } else {
3134                 code_status(c0);
3135             }
3136             (*iconv)(c2, c1, c0);
3137             break;
3138         }
3139         if (c0 == EOF) break;
3140     }
3141     return ret;
3142 }
3143
3144 nkf_char push_hold_buf(nkf_char c2)
3145 {
3146     if (hold_count >= HOLD_SIZE*2)
3147         return (EOF);
3148     hold_buf[hold_count++] = (unsigned char)c2;
3149     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3150 }
3151
3152 nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
3153 {
3154 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
3155     nkf_char val;
3156 #endif
3157     static const nkf_char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
3158 #ifdef SHIFTJIS_CP932
3159     if (!cp932inv_f && is_ibmext_in_sjis(c2)){
3160         val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
3161         if (val){
3162             c2 = val >> 8;
3163             c1 = val & 0xff;
3164         }
3165     }
3166     if (cp932inv_f
3167         && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3168         nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3169         if (c){
3170             c2 = c >> 8;
3171             c1 = c & 0xff;
3172         }
3173     }
3174 #endif /* SHIFTJIS_CP932 */
3175 #ifdef X0212_ENABLE
3176     if (!x0213_f && is_ibmext_in_sjis(c2)){
3177         val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
3178         if (val){
3179             if (val > 0x7FFF){
3180                 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
3181                 c1 = val & 0xff;
3182             }else{
3183                 c2 = val >> 8;
3184                 c1 = val & 0xff;
3185             }
3186             if (p2) *p2 = c2;
3187             if (p1) *p1 = c1;
3188             return 0;
3189         }
3190     }
3191 #endif
3192     if(c2 >= 0x80){
3193         if(x0213_f && c2 >= 0xF0){
3194             if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
3195                 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
3196             }else{ /* 78<=k<=94 */
3197                 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
3198                 if (0x9E < c1) c2++;
3199             }
3200         }else{
3201             c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
3202             if (0x9E < c1) c2++;
3203         }
3204         if (c1 < 0x9F)
3205             c1 = c1 - ((c1 > DEL) ? SPACE : 0x1F);
3206         else {
3207             c1 = c1 - 0x7E;
3208         }
3209     }
3210
3211 #ifdef X0212_ENABLE
3212     c2 = x0212_unshift(c2);
3213 #endif
3214     if (p2) *p2 = c2;
3215     if (p1) *p1 = c1;
3216     return 0;
3217 }
3218
3219 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3220 {
3221     if (c2 == X0201) {
3222         c1 &= 0x7f;
3223     } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
3224         /* NOP */
3225     } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
3226         /* CP932 UDC */
3227         if(c1 == 0x7F) return 0;
3228         c1 = (c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000 + CLASS_UNICODE;
3229         c2 = 0;
3230     } else {
3231         nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
3232         if (ret) return ret;
3233     }
3234     (*oconv)(c2, c1);
3235     return 0;
3236 }
3237
3238 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3239 {
3240     if (c2 == X0201) {
3241         c1 &= 0x7f;
3242 #ifdef X0212_ENABLE
3243     }else if (c2 == 0x8f){
3244         if (c0 == 0){
3245             return -1;
3246         }
3247         if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
3248             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3249             c1 = (c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC + CLASS_UNICODE;
3250             c2 = 0;
3251         } else {
3252             c2 = (c2 << 8) | (c1 & 0x7f);
3253             c1 = c0 & 0x7f;
3254 #ifdef SHIFTJIS_CP932
3255             if (cp51932_f){
3256                 nkf_char s2, s1;
3257                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3258                     s2e_conv(s2, s1, &c2, &c1);
3259                     if (c2 < 0x100){
3260                         c1 &= 0x7f;
3261                         c2 &= 0x7f;
3262                     }
3263                 }
3264             }
3265 #endif /* SHIFTJIS_CP932 */
3266         }
3267 #endif /* X0212_ENABLE */
3268     } else if (c2 == SSO){
3269         c2 = X0201;
3270         c1 &= 0x7f;
3271     } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
3272         /* NOP */
3273     } else {
3274         if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
3275             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
3276             c1 = (c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000 + CLASS_UNICODE;
3277             c2 = 0;
3278         } else {
3279             c1 &= 0x7f;
3280             c2 &= 0x7f;
3281 #ifdef SHIFTJIS_CP932
3282             if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
3283                 nkf_char s2, s1;
3284                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3285                     s2e_conv(s2, s1, &c2, &c1);
3286                     if (c2 < 0x100){
3287                         c1 &= 0x7f;
3288                         c2 &= 0x7f;
3289                     }
3290                 }
3291             }
3292 #endif /* SHIFTJIS_CP932 */
3293         }
3294     }
3295     (*oconv)(c2, c1);
3296     return 0;
3297 }
3298
3299 #ifdef UTF8_INPUT_ENABLE
3300 nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3301 {
3302     nkf_char ret = 0;
3303
3304     if (!c1){
3305         *p2 = 0;
3306         *p1 = c2;
3307     }else if (0xc0 <= c2 && c2 <= 0xef) {
3308         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3309 #ifdef NUMCHAR_OPTION
3310         if (ret > 0){
3311             if (p2) *p2 = 0;
3312             if (p1) *p1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3313             ret = 0;
3314         }
3315 #endif
3316     }
3317     return ret;
3318 }
3319
3320 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3321 {
3322     nkf_char ret = 0;
3323     static const int w_iconv_utf8_1st_byte[] =
3324     { /* 0xC0 - 0xFF */
3325         20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3326         21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
3327         30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
3328         40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
3329     
3330     if (c2 < 0 || 0xff < c2) {
3331     }else if (c2 == 0) { /* 0 : 1 byte*/
3332         c0 = 0;
3333     } else if ((c2 & 0xc0) == 0x80) { /* 0x80-0xbf : trail byte */
3334         return 0;
3335     } else{
3336         switch (w_iconv_utf8_1st_byte[c2 - 0xC0]) {
3337         case 21:
3338             if (c1 < 0x80 || 0xBF < c1) return 0;
3339             break;
3340         case 30:
3341             if (c0 == 0) return -1;
3342             if (c1 < 0xA0 || 0xBF < c1 || (c0 & 0xc0) != 0x80)
3343                 return 0;
3344             break;
3345         case 31:
3346         case 33:
3347             if (c0 == 0) return -1;
3348             if ((c1 & 0xc0) != 0x80 || (c0 & 0xc0) != 0x80)
3349                 return 0;
3350             break;
3351         case 32:
3352             if (c0 == 0) return -1;
3353             if (c1 < 0x80 || 0x9F < c1 || (c0 & 0xc0) != 0x80)
3354                 return 0;
3355             break;
3356         case 40:
3357             if (c0 == 0) return -2;
3358             if (c1 < 0x90 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3359                 return 0;
3360             break;
3361         case 41:
3362             if (c0 == 0) return -2;
3363             if (c1 < 0x80 || 0xBF < c1 || (c0 & 0xc0c0) != 0x8080)
3364                 return 0;
3365             break;
3366         case 42:
3367             if (c0 == 0) return -2;
3368             if (c1 < 0x80 || 0x8F < c1 || (c0 & 0xc0c0) != 0x8080)
3369                 return 0;
3370             break;
3371         default:
3372             return 0;
3373             break;
3374         }
3375     }
3376     if (c2 == 0 || c2 == EOF){
3377     } else if ((c2 & 0xf8) == 0xf0) { /* 4 bytes */
3378         c1 = CLASS_UNICODE | ww16_conv(c2, c1, c0);
3379         c2 = 0;
3380     } else {
3381         ret = w2e_conv(c2, c1, c0, &c2, &c1);
3382     }
3383     if (ret == 0){
3384         (*oconv)(c2, c1);
3385     }
3386     return ret;
3387 }
3388 #endif
3389
3390 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3391 void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
3392 {
3393     val &= VALUE_MASK;
3394     if (val < 0x80){
3395         *p2 = val;
3396         *p1 = 0;
3397         *p0 = 0;
3398     }else if (val < 0x800){
3399         *p2 = 0xc0 | (val >> 6);
3400         *p1 = 0x80 | (val & 0x3f);
3401         *p0 = 0;
3402     } else if (val <= NKF_INT32_C(0xFFFF)) {
3403         *p2 = 0xe0 | (val >> 12);
3404         *p1 = 0x80 | ((val >> 6) & 0x3f);
3405         *p0 = 0x80 | (val        & 0x3f);
3406     } else if (val <= NKF_INT32_C(0x10FFFF)) {
3407         *p2 = 0xe0 |  (val >> 16);
3408         *p1 = 0x80 | ((val >> 12) & 0x3f);
3409         *p0 = 0x8080 | ((val << 2) & 0x3f00)| (val & 0x3f);
3410     } else {
3411         *p2 = 0;
3412         *p1 = 0;
3413         *p0 = 0;
3414     }
3415 }
3416 #endif
3417
3418 #ifdef UTF8_INPUT_ENABLE
3419 nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0)
3420 {
3421     nkf_char val;
3422     if (c2 >= 0xf8) {
3423         val = -1;
3424     } else if (c2 >= 0xf0){
3425         /* c2: 1st, c1: 2nd, c0: 3rd/4th */
3426         val = (c2 & 0x0f) << 18;
3427         val |= (c1 & 0x3f) << 12;
3428         val |= (c0 & 0x3f00) >> 2;
3429         val |= (c0 & 0x3f);
3430     }else if (c2 >= 0xe0){
3431         val = (c2 & 0x0f) << 12;
3432         val |= (c1 & 0x3f) << 6;
3433         val |= (c0 & 0x3f);
3434     }else if (c2 >= 0xc0){
3435         val = (c2 & 0x1f) << 6;
3436         val |= (c1 & 0x3f);
3437     }else{
3438         val = c2;
3439     }
3440     return val;
3441 }
3442
3443 nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
3444 {
3445     nkf_char c2, c1, c0;
3446     nkf_char ret = 0;
3447     val &= VALUE_MASK;
3448     if (val < 0x80){
3449         *p2 = 0;
3450         *p1 = val;
3451     }else{
3452         w16w_conv(val, &c2, &c1, &c0);
3453         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
3454 #ifdef NUMCHAR_OPTION
3455         if (ret > 0){
3456             *p2 = 0;
3457             *p1 = CLASS_UNICODE | val;
3458             ret = 0;
3459         }
3460 #endif
3461     }
3462     return ret;
3463 }
3464 #endif
3465
3466 #ifdef UTF8_INPUT_ENABLE
3467 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
3468 {
3469     nkf_char ret = 0;
3470     if ((c2==0 && c1 < 0x80) || c2==EOF) {
3471         (*oconv)(c2, c1);
3472         return 0;
3473     }else if (0xD8 <= c2 && c2 <= 0xDB) {
3474         if (c0 < NKF_INT32_C(0xDC00) || NKF_INT32_C(0xDFFF) < c0)
3475             return -2;
3476         c1 =  CLASS_UNICODE | ((c2 << 18) + (c1 << 10) + c0 - NKF_INT32_C(0x35FDC00));
3477         c2 = 0;
3478     }else if ((c2>>3) == 27) { /* unpaired surrogate */
3479         /*
3480            return 2;
3481         */
3482         return 1;
3483     }else ret = w16e_conv(((c2 & 0xff)<<8) + c1, &c2, &c1);
3484     if (ret) return ret;
3485     (*oconv)(c2, c1);
3486     return 0;
3487 }
3488
3489 nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
3490 {
3491     int ret = 0;
3492
3493     if ((c2 == 0 && c1 < 0x80) || c2==EOF) {
3494     } else if (is_unicode_bmp(c1)) {
3495         ret = w16e_conv(c1, &c2, &c1);
3496     } else {
3497         c2 = 0;
3498         c1 =  CLASS_UNICODE | c1;
3499     }
3500     if (ret) return ret;
3501     (*oconv)(c2, c1);
3502     return 0;
3503 }
3504
3505 nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3506 {
3507     const unsigned short *const *pp;
3508     const unsigned short *const *const *ppp;
3509     static const int no_best_fit_chars_table_C2[] =
3510     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3511         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3512         1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
3513         0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
3514     static const int no_best_fit_chars_table_C2_ms[] =
3515     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3516         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3517         1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
3518         0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
3519     static const int no_best_fit_chars_table_932_C2[] =
3520     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3521         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3522         1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
3523         0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
3524     static const int no_best_fit_chars_table_932_C3[] =
3525     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3526         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
3527         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3528         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
3529     nkf_char ret = 0;
3530
3531     if(c2 < 0x80){
3532         *p2 = 0;
3533         *p1 = c2;
3534     }else if(c2 < 0xe0){
3535         if(no_best_fit_chars_f){
3536             if(ms_ucs_map_f == UCS_MAP_CP932){
3537                 switch(c2){
3538                 case 0xC2:
3539                     if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
3540                     break;
3541                 case 0xC3:
3542                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3543                     break;
3544                 }
3545             }else if(!cp932inv_f){
3546                 switch(c2){
3547                 case 0xC2:
3548                     if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
3549                     break;
3550                 case 0xC3:
3551                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3552                     break;
3553                 }
3554             }else if(ms_ucs_map_f == UCS_MAP_MS){
3555                 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
3556             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3557                 switch(c2){
3558                 case 0xC2:
3559                     switch(c1){
3560                     case 0xA2:
3561                     case 0xA3:
3562                     case 0xA5:
3563                     case 0xA6:
3564                     case 0xAC:
3565                     case 0xAF:
3566                     case 0xB8:
3567                         return 1;
3568                     }
3569                     break;
3570                 }
3571             }
3572         }
3573         pp =
3574             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
3575             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
3576             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
3577             utf8_to_euc_2bytes;
3578         ret =  w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
3579     }else if(c0 < 0xF0){
3580         if(no_best_fit_chars_f){
3581             if(ms_ucs_map_f == UCS_MAP_CP932){
3582                 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
3583             }else if(ms_ucs_map_f == UCS_MAP_MS){
3584                 switch(c2){
3585                 case 0xE2:
3586                     switch(c1){
3587                     case 0x80:
3588                         if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
3589                         break;
3590                     case 0x88:
3591                         if(c0 == 0x92) return 1;
3592                         break;
3593                     }
3594                     break;
3595                 case 0xE3:
3596                     if(c1 == 0x80 || c0 == 0x9C) return 1;
3597                     break;
3598                 }
3599             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
3600                 switch(c2){
3601                 case 0xE3:
3602                     switch(c1){
3603                     case 0x82:
3604                             if(c0 == 0x94) return 1;
3605                         break;
3606                     case 0x83:
3607                             if(c0 == 0xBB) return 1;
3608                         break;
3609                     }
3610                     break;
3611                 }
3612             }else{
3613                 switch(c2){
3614                 case 0xE2:
3615                     switch(c1){
3616                     case 0x80:
3617                         if(c0 == 0x95) return 1;
3618                         break;
3619                     case 0x88:
3620                         if(c0 == 0xA5) return 1;
3621                         break;
3622                     }
3623                     break;
3624                 case 0xEF:
3625                     switch(c1){
3626                     case 0xBC:
3627                         if(c0 == 0x8D) return 1;
3628                         break;
3629                     case 0xBD:
3630                         if(c0 == 0x9E && !cp932inv_f) return 1;
3631                         break;
3632                     case 0xBF:
3633                         if(0xA0 <= c0 && c0 <= 0xA5) return 1;
3634                         break;
3635                     }
3636                     break;
3637                 }
3638             }
3639         }
3640         ppp =
3641             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
3642             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
3643             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
3644             utf8_to_euc_3bytes;
3645         ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
3646     }else return -1;
3647 #ifdef SHIFTJIS_CP932
3648     if (!ret && !cp932inv_f && is_eucg3(*p2)) {
3649         nkf_char s2, s1;
3650         if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
3651             s2e_conv(s2, s1, p2, p1);
3652         }else{
3653             ret = 1;
3654         }
3655     }
3656 #endif
3657     return ret;
3658 }
3659
3660 nkf_char w_iconv_common(nkf_char c1, nkf_char c0, const unsigned short *const *pp, nkf_char psize, nkf_char *p2, nkf_char *p1)
3661 {
3662     nkf_char c2;
3663     const unsigned short *p;
3664     unsigned short val;
3665
3666     if (pp == 0) return 1;
3667
3668     c1 -= 0x80;
3669     if (c1 < 0 || psize <= c1) return 1;
3670     p = pp[c1];
3671     if (p == 0)  return 1;
3672
3673     c0 -= 0x80;
3674     if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
3675     val = p[c0];
3676     if (val == 0) return 1;
3677     if (no_cp932ext_f && (
3678         (val>>8) == 0x2D || /* NEC special characters */
3679         val > NKF_INT32_C(0xF300) /* IBM extended characters */
3680         )) return 1;
3681
3682     c2 = val >> 8;
3683    if (val > 0x7FFF){
3684         c2 &= 0x7f;
3685         c2 |= PREFIX_EUCG3;
3686     }
3687     if (c2 == SO) c2 = X0201;
3688     c1 = val & 0x7f;
3689     if (p2) *p2 = c2;
3690     if (p1) *p1 = c1;
3691     return 0;
3692 }
3693
3694 void nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
3695 {
3696     const char *hex = "0123456789ABCDEF";
3697     int shift = 20;
3698     c &= VALUE_MASK;
3699     while(shift >= 0){
3700         if(c >= 1<<shift){
3701             while(shift >= 0){
3702                 (*f)(0, hex[(c>>shift)&0xF]);
3703                 shift -= 4;
3704             }
3705         }else{
3706             shift -= 4;
3707         }
3708     }
3709     return;
3710 }
3711
3712 void encode_fallback_html(nkf_char c)
3713 {
3714     (*oconv)(0, '&');
3715     (*oconv)(0, '#');
3716     c &= VALUE_MASK;
3717     if(c >= NKF_INT32_C(1000000))
3718         (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
3719     if(c >= NKF_INT32_C(100000))
3720         (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
3721     if(c >= 10000)
3722         (*oconv)(0, 0x30+(c/10000  )%10);
3723     if(c >= 1000)
3724         (*oconv)(0, 0x30+(c/1000   )%10);
3725     if(c >= 100)
3726         (*oconv)(0, 0x30+(c/100    )%10);
3727     if(c >= 10)
3728         (*oconv)(0, 0x30+(c/10     )%10);
3729     if(c >= 0)
3730         (*oconv)(0, 0x30+ c         %10);
3731     (*oconv)(0, ';');
3732     return;
3733 }
3734
3735 void encode_fallback_xml(nkf_char c)
3736 {
3737     (*oconv)(0, '&');
3738     (*oconv)(0, '#');
3739     (*oconv)(0, 'x');
3740     nkf_each_char_to_hex(oconv, c);
3741     (*oconv)(0, ';');
3742     return;
3743 }
3744
3745 void encode_fallback_java(nkf_char c)
3746 {
3747     const char *hex = "0123456789ABCDEF";
3748     (*oconv)(0, '\\');
3749     c &= VALUE_MASK;
3750     if(!is_unicode_bmp(c)){
3751         (*oconv)(0, 'U');
3752         (*oconv)(0, '0');
3753         (*oconv)(0, '0');
3754         (*oconv)(0, hex[(c>>20)&0xF]);
3755         (*oconv)(0, hex[(c>>16)&0xF]);
3756     }else{
3757         (*oconv)(0, 'u');
3758     }
3759     (*oconv)(0, hex[(c>>12)&0xF]);
3760     (*oconv)(0, hex[(c>> 8)&0xF]);
3761     (*oconv)(0, hex[(c>> 4)&0xF]);
3762     (*oconv)(0, hex[ c     &0xF]);
3763     return;
3764 }
3765
3766 void encode_fallback_perl(nkf_char c)
3767 {
3768     (*oconv)(0, '\\');
3769     (*oconv)(0, 'x');
3770     (*oconv)(0, '{');
3771     nkf_each_char_to_hex(oconv, c);
3772     (*oconv)(0, '}');
3773     return;
3774 }
3775
3776 void encode_fallback_subchar(nkf_char c)
3777 {
3778     c = unicode_subchar;
3779     (*oconv)((c>>8)&0xFF, c&0xFF);
3780     return;
3781 }
3782 #endif
3783
3784 #ifdef UTF8_OUTPUT_ENABLE
3785 nkf_char e2w_conv(nkf_char c2, nkf_char c1)
3786 {
3787     const unsigned short *p;
3788
3789     if (c2 == X0201) {
3790         if (ms_ucs_map_f == UCS_MAP_CP10001) {
3791             switch (c1) {
3792             case 0x20:
3793                 return 0xA0;
3794             case 0x7D:
3795                 return 0xA9;
3796             }
3797         }
3798         p = euc_to_utf8_1byte;
3799 #ifdef X0212_ENABLE
3800     } else if (is_eucg3(c2)){
3801         if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
3802             return 0xA6;
3803         }
3804         c2 = (c2&0x7f) - 0x21;
3805         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3806             p = x0212_to_utf8_2bytes[c2];
3807         else
3808             return 0;
3809 #endif
3810     } else {
3811         c2 &= 0x7f;
3812         c2 = (c2&0x7f) - 0x21;
3813         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3814             p =
3815                 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
3816                 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
3817                 euc_to_utf8_2bytes_ms[c2];
3818         else
3819             return 0;
3820     }
3821     if (!p) return 0;
3822     c1 = (c1 & 0x7f) - 0x21;
3823     if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
3824         return p[c1];
3825     return 0;
3826 }
3827
3828 void w_oconv(nkf_char c2, nkf_char c1)
3829 {
3830     nkf_char c0;
3831     nkf_char val;
3832
3833     if (output_bom_f) {
3834         output_bom_f = FALSE;
3835         (*o_putc)('\357');
3836         (*o_putc)('\273');
3837         (*o_putc)('\277');
3838     }
3839
3840     if (c2 == EOF) {
3841         (*o_putc)(EOF);
3842         return;
3843     }
3844
3845 #ifdef NUMCHAR_OPTION
3846     if (c2 == 0 && is_unicode_capsule(c1)){
3847         val = c1 & VALUE_MASK;
3848         if (val < 0x80){
3849             (*o_putc)(val);
3850         }else if (val < 0x800){
3851             (*o_putc)(0xC0 | (val >> 6));
3852             (*o_putc)(0x80 | (val & 0x3f));
3853         } else if (val <= NKF_INT32_C(0xFFFF)) {
3854             (*o_putc)(0xE0 | (val >> 12));
3855             (*o_putc)(0x80 | ((val >> 6) & 0x3f));
3856             (*o_putc)(0x80 | (val        & 0x3f));
3857         } else if (val <= NKF_INT32_C(0x10FFFF)) {
3858             (*o_putc)(0xF0 | ( val>>18));
3859             (*o_putc)(0x80 | ((val>>12) & 0x3f));
3860             (*o_putc)(0x80 | ((val>> 6) & 0x3f));
3861             (*o_putc)(0x80 | ( val      & 0x3f));
3862         }
3863         return;
3864     }
3865 #endif
3866
3867     if (c2 == 0) { 
3868         output_mode = ASCII;
3869         (*o_putc)(c1);
3870     } else if (c2 == ISO8859_1) {
3871         output_mode = ISO8859_1;
3872         (*o_putc)(c1 | 0x080);
3873     } else {
3874         output_mode = UTF8;
3875         val = e2w_conv(c2, c1);
3876         if (val){
3877             w16w_conv(val, &c2, &c1, &c0);
3878             (*o_putc)(c2);
3879             if (c1){
3880                 (*o_putc)(c1);
3881                 if (c0) (*o_putc)(c0);
3882             }
3883         }
3884     }
3885 }
3886
3887 void w_oconv16(nkf_char c2, nkf_char c1)
3888 {
3889     if (output_bom_f) {
3890         output_bom_f = FALSE;
3891         if (output_endian == ENDIAN_LITTLE){
3892             (*o_putc)((unsigned char)'\377');
3893             (*o_putc)('\376');
3894         }else{
3895             (*o_putc)('\376');
3896             (*o_putc)((unsigned char)'\377');
3897         }
3898     }
3899
3900     if (c2 == EOF) {
3901         (*o_putc)(EOF);
3902         return;
3903     }
3904
3905     if (c2 == ISO8859_1) {
3906         c2 = 0;
3907         c1 |= 0x80;
3908 #ifdef NUMCHAR_OPTION
3909     } else if (c2 == 0 && is_unicode_capsule(c1)) {
3910         if (is_unicode_bmp(c1)) {
3911             c2 = (c1 >> 8) & 0xff;
3912             c1 &= 0xff;
3913         } else {
3914             c1 &= VALUE_MASK;
3915             if (c1 <= UNICODE_MAX) {
3916                 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */
3917                 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
3918                 if (output_endian == ENDIAN_LITTLE){
3919                     (*o_putc)(c2 & 0xff);
3920                     (*o_putc)((c2 >> 8) & 0xff);
3921                     (*o_putc)(c1 & 0xff);
3922                     (*o_putc)((c1 >> 8) & 0xff);
3923                 }else{
3924                     (*o_putc)((c2 >> 8) & 0xff);
3925                     (*o_putc)(c2 & 0xff);
3926                     (*o_putc)((c1 >> 8) & 0xff);
3927                     (*o_putc)(c1 & 0xff);
3928                 }
3929             }
3930             return;
3931         }
3932 #endif
3933     } else if (c2) {
3934         nkf_char val = e2w_conv(c2, c1);
3935         c2 = (val >> 8) & 0xff;
3936         c1 = val & 0xff;
3937         if (!val) return;
3938     }
3939     if (output_endian == ENDIAN_LITTLE){
3940         (*o_putc)(c1);
3941         (*o_putc)(c2);
3942     }else{
3943         (*o_putc)(c2);
3944         (*o_putc)(c1);
3945     }
3946 }
3947
3948 void w_oconv32(nkf_char c2, nkf_char c1)
3949 {
3950     if (output_bom_f) {
3951         output_bom_f = FALSE;
3952         if (output_endian == ENDIAN_LITTLE){
3953             (*o_putc)((unsigned char)'\377');
3954             (*o_putc)('\376');
3955             (*o_putc)('\000');
3956             (*o_putc)('\000');
3957         }else{
3958             (*o_putc)('\000');
3959             (*o_putc)('\000');
3960             (*o_putc)('\376');
3961             (*o_putc)((unsigned char)'\377');
3962         }
3963     }
3964
3965     if (c2 == EOF) {
3966         (*o_putc)(EOF);
3967         return;
3968     }
3969
3970     if (c2 == ISO8859_1) {
3971         c1 |= 0x80;
3972 #ifdef NUMCHAR_OPTION
3973     } else if (c2 == 0 && is_unicode_capsule(c1)) {
3974         c1 &= VALUE_MASK;
3975 #endif
3976     } else if (c2) {
3977         c1 = e2w_conv(c2, c1);
3978         if (!c1) return;
3979     }
3980     if (output_endian == ENDIAN_LITTLE){
3981         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
3982         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
3983         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
3984         (*o_putc)('\000');
3985     }else{
3986         (*o_putc)('\000');
3987         (*o_putc)((c1 & NKF_INT32_C(0x00FF0000)) >> 16);
3988         (*o_putc)((c1 & NKF_INT32_C(0x0000FF00)) >>  8);
3989         (*o_putc)( c1 & NKF_INT32_C(0x000000FF));
3990     }
3991 }
3992 #endif
3993
3994 void e_oconv(nkf_char c2, nkf_char c1)
3995 {
3996 #ifdef NUMCHAR_OPTION
3997     if (c2 == 0 && is_unicode_capsule(c1)){
3998         w16e_conv(c1, &c2, &c1);
3999         if (c2 == 0 && is_unicode_capsule(c1)){
4000             c2 = c1 & VALUE_MASK;
4001             if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
4002                 /* eucJP-ms UDC */
4003                 c1 &= 0xFFF;
4004                 c2 = c1 / 94;
4005                 c2 += c2 < 10 ? 0x75 : 0x8FEB;
4006                 c1 = 0x21 + c1 % 94;
4007                 if (is_eucg3(c2)){
4008                     (*o_putc)(0x8f);
4009                     (*o_putc)((c2 & 0x7f) | 0x080);
4010                     (*o_putc)(c1 | 0x080);
4011                 }else{
4012                     (*o_putc)((c2 & 0x7f) | 0x080);
4013                     (*o_putc)(c1 | 0x080);
4014                 }
4015                 return;
4016             } else {
4017                 if (encode_fallback) (*encode_fallback)(c1);
4018                 return;
4019             }
4020         }
4021     }
4022 #endif
4023     if (c2 == EOF) {
4024         (*o_putc)(EOF);
4025         return;
4026     } else if (c2 == 0) { 
4027         output_mode = ASCII;
4028         (*o_putc)(c1);
4029     } else if (c2 == X0201) {
4030         output_mode = JAPANESE_EUC;
4031         (*o_putc)(SSO); (*o_putc)(c1|0x80);
4032     } else if (c2 == ISO8859_1) {
4033         output_mode = ISO8859_1;
4034         (*o_putc)(c1 | 0x080);
4035 #ifdef X0212_ENABLE
4036     } else if (is_eucg3(c2)){
4037         output_mode = JAPANESE_EUC;
4038 #ifdef SHIFTJIS_CP932
4039         if (!cp932inv_f){
4040             nkf_char s2, s1;
4041             if (e2s_conv(c2, c1, &s2, &s1) == 0){
4042                 s2e_conv(s2, s1, &c2, &c1);
4043             }
4044         }
4045 #endif
4046         if (c2 == 0) {
4047             output_mode = ASCII;
4048             (*o_putc)(c1);
4049         }else if (is_eucg3(c2)){
4050             if (x0212_f){
4051                 (*o_putc)(0x8f);
4052                 (*o_putc)((c2 & 0x7f) | 0x080);
4053                 (*o_putc)(c1 | 0x080);
4054             }
4055         }else{
4056             (*o_putc)((c2 & 0x7f) | 0x080);
4057             (*o_putc)(c1 | 0x080);
4058         }
4059 #endif
4060     } else {
4061         if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
4062             set_iconv(FALSE, 0);
4063             return; /* too late to rescue this char */
4064         }
4065         output_mode = JAPANESE_EUC;
4066         (*o_putc)(c2 | 0x080);
4067         (*o_putc)(c1 | 0x080);
4068     }
4069 }
4070
4071 #ifdef X0212_ENABLE
4072 nkf_char x0212_shift(nkf_char c)
4073 {
4074     nkf_char ret = c;
4075     c &= 0x7f;
4076     if (is_eucg3(ret)){
4077         if (0x75 <= c && c <= 0x7f){
4078             ret = c + (0x109 - 0x75);
4079         }
4080     }else{
4081         if (0x75 <= c && c <= 0x7f){
4082             ret = c + (0x113 - 0x75);
4083         }
4084     }
4085     return ret;
4086 }
4087
4088
4089 nkf_char x0212_unshift(nkf_char c)
4090 {
4091     nkf_char ret = c;
4092     if (0x7f <= c && c <= 0x88){
4093         ret = c + (0x75 - 0x7f);
4094     }else if (0x89 <= c && c <= 0x92){
4095         ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
4096     }
4097     return ret;
4098 }
4099 #endif /* X0212_ENABLE */
4100
4101 nkf_char e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
4102 {
4103     nkf_char ndx;
4104     if (is_eucg3(c2)){
4105         ndx = c2 & 0x7f;
4106         if (x0213_f){
4107             if((0x21 <= ndx && ndx <= 0x2F)){
4108                 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
4109                 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4110                 return 0;
4111             }else if(0x6E <= ndx && ndx <= 0x7E){
4112                 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
4113                 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4114                 return 0;
4115             }
4116             return 1;
4117         }
4118 #ifdef X0212_ENABLE
4119         else if(nkf_isgraph(ndx)){
4120             nkf_char val = 0;
4121             const unsigned short *ptr;
4122             ptr = x0212_shiftjis[ndx - 0x21];
4123             if (ptr){
4124                 val = ptr[(c1 & 0x7f) - 0x21];
4125             }
4126             if (val){
4127                 c2 = val >> 8;
4128                 c1 = val & 0xff;
4129                 if (p2) *p2 = c2;
4130                 if (p1) *p1 = c1;
4131                 return 0;
4132             }
4133             c2 = x0212_shift(c2);
4134         }
4135 #endif /* X0212_ENABLE */
4136     }
4137     if(0x7F < c2) return 1;
4138     if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
4139     if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
4140     return 0;
4141 }
4142
4143 void s_oconv(nkf_char c2, nkf_char c1)
4144 {
4145 #ifdef NUMCHAR_OPTION
4146     if (c2 == 0 && is_unicode_capsule(c1)){
4147         w16e_conv(c1, &c2, &c1);
4148         if (c2 == 0 && is_unicode_capsule(c1)){
4149             c2 = c1 & VALUE_MASK;
4150             if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
4151                 /* CP932 UDC */
4152                 c1 &= 0xFFF;
4153                 c2 = c1 / 188 + 0xF0;
4154                 c1 = c1 % 188;
4155                 c1 += 0x40 + (c1 > 0x3e);
4156                 (*o_putc)(c2);
4157                 (*o_putc)(c1);
4158                 return;
4159             } else {
4160                 if(encode_fallback)(*encode_fallback)(c1);
4161                 return;
4162             }
4163         }
4164     }
4165 #endif
4166     if (c2 == EOF) {
4167         (*o_putc)(EOF);
4168         return;
4169     } else if (c2 == 0) {
4170         output_mode = ASCII;
4171         (*o_putc)(c1);
4172     } else if (c2 == X0201) {
4173         output_mode = SHIFT_JIS;
4174         (*o_putc)(c1|0x80);
4175     } else if (c2 == ISO8859_1) {
4176         output_mode = ISO8859_1;
4177         (*o_putc)(c1 | 0x080);
4178 #ifdef X0212_ENABLE
4179     } else if (is_eucg3(c2)){
4180         output_mode = SHIFT_JIS;
4181         if (e2s_conv(c2, c1, &c2, &c1) == 0){
4182             (*o_putc)(c2);
4183             (*o_putc)(c1);
4184         }
4185 #endif
4186     } else {
4187         if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
4188             set_iconv(FALSE, 0);
4189             return; /* too late to rescue this char */
4190         }
4191         output_mode = SHIFT_JIS;
4192         e2s_conv(c2, c1, &c2, &c1);
4193
4194 #ifdef SHIFTJIS_CP932
4195         if (cp932inv_f
4196             && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
4197             nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
4198             if (c){
4199                 c2 = c >> 8;
4200                 c1 = c & 0xff;
4201             }
4202         }
4203 #endif /* SHIFTJIS_CP932 */
4204
4205         (*o_putc)(c2);
4206         if (prefix_table[(unsigned char)c1]){
4207             (*o_putc)(prefix_table[(unsigned char)c1]);