1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
5 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
6 ** Copyright (C) 1996,1998
8 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
9 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
10 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
11 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
13 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
14 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
15 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
16 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
17 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
18 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
20 ** Everyone is permitted to do anything on this program
21 ** including copying, modifying, improving,
22 ** as long as you don't try to pretend that you wrote it.
23 ** i.e., the above copyright notice has to appear in all copies.
24 ** Binary distribution requires original version messages.
25 ** You don't have to ask before copying, redistribution or publishing.
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
29 /***********************************************************************
30 ** UTF-8
\e$B%5%]!<%H$K$D$$$F
\e(B
31 **
\e$B=>Mh$N
\e(B nkf
\e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9
\e(B
32 ** nkf -e
\e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G
\e(B UTF-8
\e$B$HH=Dj$5$l$l$P!"
\e(B
33 **
\e$B$=$N$^$^
\e(B euc-jp
\e$B$KJQ49$5$l$^$9
\e(B
35 **
\e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#
\e(B
36 ** (
\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O
\e(B)
38 **
\e$B2?$+LdBj$r8+$D$1$?$i!"
\e(B
39 ** E-Mail: furukawa@tcp-ip.or.jp
40 **
\e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#
\e(B
41 ***********************************************************************/
42 /* $Id: nkf.c,v 1.70 2005/07/05 12:39:00 naruse Exp $ */
43 #define NKF_VERSION "2.0.5"
44 #define NKF_RELEASE_DATE "2005-07-05"
47 static char *CopyRight =
48 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2005 Kono, Furukawa, Naruse";
55 ** USAGE: nkf [flags] [file]
58 ** b Output is buffered (DEFAULT)
59 ** u Output is unbuffered
63 ** j Outout code is JIS 7 bit (DEFAULT SELECT)
64 ** s Output code is MS Kanji (DEFAULT SELECT)
65 ** e Output code is AT&T JIS (DEFAULT SELECT)
66 ** w Output code is AT&T JIS (DEFAULT SELECT)
67 ** l Output code is JIS 7bit and ISO8859-1 Latin-1
69 ** m MIME conversion for ISO-2022-JP
70 ** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
71 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
72 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
73 ** M MIME output conversion
75 ** r {de/en}crypt ROT13/47
79 ** T Text mode output (for MS-DOS)
81 ** x Do not convert X0201 kana into X0208
82 ** Z Convert X0208 alphabet to ASCII
87 ** B try to fix broken JIS, missing Escape
88 ** B[1-9] broken level
90 ** O Output to 'nkf.out' file or last file name
91 ** d Delete \r in line feed
92 ** c Add \r in line feed
93 ** -- other long option
94 ** -- ignore following option (don't use with -O )
98 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__)) && !defined(MSDOS)
100 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
108 #if defined( UTF8_OUTPUT_ENABLE ) || defined( UTF8_INPUT_ENABLE )
109 #define UNICODE_ENABLE
111 #undef UNICODE_NORMALIZATION
120 #if defined(MSDOS) || defined(__OS2__)
127 #define setbinmode(fp) fsetbin(fp)
128 #else /* Microsoft C, Turbo C */
129 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
131 #else /* UNIX,OS/2 */
132 #define setbinmode(fp)
135 #ifdef _IOFBF /* SysV and MSDOS, Windows */
136 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
138 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
141 /*Borland C++ 4.5 EasyWin*/
142 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
151 /* added by satoru@isoternet.org */
153 #include <sys/stat.h>
154 #ifndef MSDOS /* UNIX, OS/2 */
157 #else /* defined(MSDOS) */
159 #ifdef __BORLANDC__ /* BCC32 */
161 #else /* !defined(__BORLANDC__) */
162 #include <sys/utime.h>
163 #endif /* (__BORLANDC__) */
164 #else /* !defined(__WIN32__) */
165 #if defined(_MSC_VER) || defined(__MINGW32__) /* VC++, MinGW */
166 #include <sys/utime.h>
167 #elif defined(__TURBOC__) /* BCC */
169 #elif defined(LSI_C) /* LSI C */
170 #endif /* (__WIN32__) */
182 /* state of output_mode and input_mode
200 /* Input Assumption */
204 #define LATIN1_INPUT 6
206 #define STRICT_MIME 8
211 #define JAPANESE_EUC 10
215 #define UTF8_INPUT 13
216 #define UTF16BE_INPUT 14
217 #define UTF16LE_INPUT 15
237 #define is_alnum(c) \
238 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
240 #define HOLD_SIZE 1024
241 #define IOBUF_SIZE 16384
243 #define DEFAULT_J 'B'
244 #define DEFAULT_R 'B'
246 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
247 #define SJ6394 0x0161 /* 63 - 94 ku offset */
249 #define RANGE_NUM_MAX 18
254 #ifdef UNICODE_ENABLE
255 #define sizeof_euc_utf8 94
256 #define sizeof_euc_to_utf8_1byte 94
257 #define sizeof_euc_to_utf8_2bytes 94
258 #define sizeof_utf8_to_euc_C2 64
259 #define sizeof_utf8_to_euc_E5B8 64
260 #define sizeof_utf8_to_euc_2bytes 112
261 #define sizeof_utf8_to_euc_3bytes 112
264 /* MIME preprocessor */
267 #ifdef EASYWIN /*Easy Win */
268 extern POINT _BufferSize;
271 /* function prototype */
273 #ifdef ANSI_C_PROTOTYPE
275 #define STATIC static
287 void (*status_func)PROTO((struct input_code *, int));
288 int (*iconv_func)PROTO((int c2, int c1, int c0));
292 STATIC char *input_codename = "";
294 STATIC int noconvert PROTO((FILE *f));
295 STATIC int kanji_convert PROTO((FILE *f));
296 STATIC int h_conv PROTO((FILE *f,int c2,int c1));
297 STATIC int push_hold_buf PROTO((int c2));
298 STATIC void set_iconv PROTO((int f, int (*iconv_func)(int c2,int c1,int c0)));
299 STATIC int s_iconv PROTO((int c2,int c1,int c0));
300 STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
301 STATIC int e_iconv PROTO((int c2,int c1,int c0));
302 #ifdef UTF8_INPUT_ENABLE
303 STATIC int w2e_conv PROTO((int c2,int c1,int c0,int *p2,int *p1));
304 STATIC int w_iconv PROTO((int c2,int c1,int c0));
305 STATIC int w_iconv16 PROTO((int c2,int c1,int c0));
306 STATIC int w_iconv_common PROTO((int c1,int c0,unsigned short **pp,int psize,int *p2,int *p1));
307 STATIC int ww16_conv PROTO((int c2, int c1, int c0));
309 #ifdef UTF8_OUTPUT_ENABLE
310 STATIC int e2w_conv PROTO((int c2,int c1));
311 STATIC void w_oconv PROTO((int c2,int c1));
312 STATIC void w_oconv16 PROTO((int c2,int c1));
314 STATIC void e_oconv PROTO((int c2,int c1));
315 STATIC int e2s_conv PROTO((int c2, int c1, int *p2, int *p1));
316 STATIC void s_oconv PROTO((int c2,int c1));
317 STATIC void j_oconv PROTO((int c2,int c1));
318 STATIC void fold_conv PROTO((int c2,int c1));
319 STATIC void cr_conv PROTO((int c2,int c1));
320 STATIC void z_conv PROTO((int c2,int c1));
321 STATIC void rot_conv PROTO((int c2,int c1));
322 STATIC void hira_conv PROTO((int c2,int c1));
323 STATIC void base64_conv PROTO((int c2,int c1));
324 STATIC void iso2022jp_check_conv PROTO((int c2,int c1));
325 STATIC void no_connection PROTO((int c2,int c1));
326 STATIC int no_connection2 PROTO((int c2,int c1,int c0));
328 STATIC void code_score PROTO((struct input_code *ptr));
329 STATIC void code_status PROTO((int c));
331 STATIC void std_putc PROTO((int c));
332 STATIC int std_getc PROTO((FILE *f));
333 STATIC int std_ungetc PROTO((int c,FILE *f));
335 STATIC int broken_getc PROTO((FILE *f));
336 STATIC int broken_ungetc PROTO((int c,FILE *f));
338 STATIC int mime_begin PROTO((FILE *f));
339 STATIC int mime_getc PROTO((FILE *f));
340 STATIC int mime_ungetc PROTO((int c,FILE *f));
342 STATIC int mime_begin_strict PROTO((FILE *f));
343 STATIC int mime_getc_buf PROTO((FILE *f));
344 STATIC int mime_ungetc_buf PROTO((int c,FILE *f));
345 STATIC int mime_integrity PROTO((FILE *f,unsigned char *p));
347 STATIC int base64decode PROTO((int c));
348 STATIC void mime_prechar PROTO((int c2, int c1));
349 STATIC void mime_putc PROTO((int c));
350 STATIC void open_mime PROTO((int c));
351 STATIC void close_mime PROTO(());
352 STATIC void usage PROTO(());
353 STATIC void version PROTO(());
354 STATIC void options PROTO((unsigned char *c));
355 #if defined(PERL_XS) || defined(WIN32DLL)
356 STATIC void reinit PROTO(());
361 static unsigned char stdibuf[IOBUF_SIZE];
362 static unsigned char stdobuf[IOBUF_SIZE];
363 static unsigned char hold_buf[HOLD_SIZE*2];
364 static int hold_count;
366 /* MIME preprocessor fifo */
368 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
369 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
370 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
371 static unsigned char mime_buf[MIME_BUF_SIZE];
372 static unsigned int mime_top = 0;
373 static unsigned int mime_last = 0; /* decoded */
374 static unsigned int mime_input = 0; /* undecoded */
377 static int unbuf_f = FALSE;
378 static int estab_f = FALSE;
379 static int nop_f = FALSE;
380 static int binmode_f = TRUE; /* binary mode */
381 static int rot_f = FALSE; /* rot14/43 mode */
382 static int hira_f = FALSE; /* hira/kata henkan */
383 static int input_f = FALSE; /* non fixed input code */
384 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
385 static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
386 static int mime_decode_f = FALSE; /* mime decode is explicitly on */
387 static int mimebuf_f = FALSE; /* MIME buffered input */
388 static int broken_f = FALSE; /* convert ESC-less broken JIS */
389 static int iso8859_f = FALSE; /* ISO8859 through */
390 static int mimeout_f = FALSE; /* base64 mode */
391 #if defined(MSDOS) || defined(__OS2__)
392 static int x0201_f = TRUE; /* Assume JISX0201 kana */
394 static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
396 static int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
397 #ifdef UNICODE_ENABLE
398 static int internal_unicode_f = FALSE; /* Internal Unicode Processing */
400 #ifdef UTF8_OUTPUT_ENABLE
401 static int unicode_bom_f= 0; /* Output Unicode BOM */
402 static int w_oconv16_LE = 0; /* utf-16 little endian */
403 static int ms_ucs_map_f = FALSE; /* Microsoft UCS Mapping Compatible */
406 #ifdef UNICODE_NORMALIZATION
407 static int nfc_f = FALSE;
408 static int (*i_nfc_getc)PROTO((FILE *)) = std_getc; /* input of ugetc */
409 static int (*i_nfc_ungetc)PROTO((int c ,FILE *f)) = std_ungetc;
410 STATIC int nfc_getc PROTO((FILE *f));
411 STATIC int nfc_ungetc PROTO((int c,FILE *f));
415 static int cap_f = FALSE;
416 static int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */
417 static int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc;
418 STATIC int cap_getc PROTO((FILE *f));
419 STATIC int cap_ungetc PROTO((int c,FILE *f));
421 static int url_f = FALSE;
422 static int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
423 static int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc;
424 STATIC int url_getc PROTO((FILE *f));
425 STATIC int url_ungetc PROTO((int c,FILE *f));
428 #ifdef NUMCHAR_OPTION
429 #define CLASS_MASK 0x0f000000
430 #define CLASS_UTF16 0x01000000
431 static int numchar_f = FALSE;
432 static int (*i_ngetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
433 static int (*i_nungetc)PROTO((int c ,FILE *f)) = std_ungetc;
434 STATIC int numchar_getc PROTO((FILE *f));
435 STATIC int numchar_ungetc PROTO((int c,FILE *f));
439 static int noout_f = FALSE;
440 STATIC void no_putc PROTO((int c));
441 static int debug_f = FALSE;
442 STATIC void debug PROTO((char *str));
445 static int guess_f = FALSE;
446 STATIC void print_guessed_code PROTO((char *filename));
447 STATIC void set_input_codename PROTO((char *codename));
448 static int is_inputcode_mixed = FALSE;
449 static int is_inputcode_set = FALSE;
452 static int exec_f = 0;
455 #ifdef SHIFTJIS_CP932
456 STATIC int cp932_f = TRUE;
457 #define CP932_TABLE_BEGIN (0xfa)
458 #define CP932_TABLE_END (0xfc)
460 STATIC int cp932inv_f = TRUE;
461 #define CP932INV_TABLE_BEGIN (0xed)
462 #define CP932INV_TABLE_END (0xee)
464 /* STATIC int cp932_conv PROTO((int c2, int c1)); */
465 #endif /* SHIFTJIS_CP932 */
468 STATIC int x0212_f = FALSE;
469 static int x0212_shift PROTO((int c));
470 static int x0212_unshift PROTO((int c));
473 STATIC unsigned char prefix_table[256];
475 STATIC void e_status PROTO((struct input_code *, int));
476 STATIC void s_status PROTO((struct input_code *, int));
478 #ifdef UTF8_INPUT_ENABLE
479 STATIC void w_status PROTO((struct input_code *, int));
480 STATIC void w16_status PROTO((struct input_code *, int));
481 static int utf16_mode = UTF16BE_INPUT;
484 struct input_code input_code_list[] = {
485 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
486 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
487 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
488 {"UTF-16", 0, 0, 0, {0, 0, 0}, w16_status, w_iconv16, 0},
492 static int mimeout_mode = 0;
493 static int base64_count = 0;
495 /* X0208 -> ASCII converter */
498 static int f_line = 0; /* chars in line */
499 static int f_prev = 0;
500 static int fold_preserve_f = FALSE; /* preserve new lines */
501 static int fold_f = FALSE;
502 static int fold_len = 0;
505 static unsigned char kanji_intro = DEFAULT_J;
506 static unsigned char ascii_intro = DEFAULT_R;
510 #define FOLD_MARGIN 10
511 #define DEFAULT_FOLD 60
513 static int fold_margin = FOLD_MARGIN;
517 #ifdef DEFAULT_CODE_JIS
518 # define DEFAULT_CONV j_oconv
520 #ifdef DEFAULT_CODE_SJIS
521 # define DEFAULT_CONV s_oconv
523 #ifdef DEFAULT_CODE_EUC
524 # define DEFAULT_CONV e_oconv
526 #ifdef DEFAULT_CODE_UTF8
527 # define DEFAULT_CONV w_oconv
530 /* process default */
531 static void (*output_conv)PROTO((int c2,int c1)) = DEFAULT_CONV;
533 static void (*oconv)PROTO((int c2,int c1)) = no_connection;
534 /* s_iconv or oconv */
535 static int (*iconv)PROTO((int c2,int c1,int c0)) = no_connection2;
537 static void (*o_zconv)PROTO((int c2,int c1)) = no_connection;
538 static void (*o_fconv)PROTO((int c2,int c1)) = no_connection;
539 static void (*o_crconv)PROTO((int c2,int c1)) = no_connection;
540 static void (*o_rot_conv)PROTO((int c2,int c1)) = no_connection;
541 static void (*o_hira_conv)PROTO((int c2,int c1)) = no_connection;
542 static void (*o_base64conv)PROTO((int c2,int c1)) = no_connection;
543 static void (*o_iso2022jp_check_conv)PROTO((int c2,int c1)) = no_connection;
545 /* static redirections */
547 static void (*o_putc)PROTO((int c)) = std_putc;
549 static int (*i_getc)PROTO((FILE *f)) = std_getc; /* general input */
550 static int (*i_ungetc)PROTO((int c,FILE *f)) =std_ungetc;
552 static int (*i_bgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
553 static int (*i_bungetc)PROTO((int c ,FILE *f)) = std_ungetc;
555 static void (*o_mputc)PROTO((int c)) = std_putc ; /* output of mputc */
557 static int (*i_mgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
558 static int (*i_mungetc)PROTO((int c ,FILE *f)) = std_ungetc;
560 /* for strict mime */
561 static int (*i_mgetc_buf)PROTO((FILE *)) = std_getc; /* input of mgetc_buf */
562 static int (*i_mungetc_buf)PROTO((int c,FILE *f)) = std_ungetc;
565 static int output_mode = ASCII, /* output kanji mode */
566 input_mode = ASCII, /* input kanji mode */
567 shift_mode = FALSE; /* TRUE shift out, or X0201 */
568 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
570 /* X0201 / X0208 conversion tables */
572 /* X0201 kana conversion table */
575 unsigned char cv[]= {
576 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
577 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
578 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
579 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
580 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
581 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
582 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
583 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
584 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
585 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
586 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
587 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
588 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
589 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
590 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
591 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
595 /* X0201 kana conversion table for daguten */
598 unsigned char dv[]= {
599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
601 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
602 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
603 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
604 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
605 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
606 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
607 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
608 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
609 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
610 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
611 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
614 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
617 /* X0201 kana conversion table for han-daguten */
620 unsigned char ev[]= {
621 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
622 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
623 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
624 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
625 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
626 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
627 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
628 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
629 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
630 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
631 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
632 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
633 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
634 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
635 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
636 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
640 /* X0208 kigou conversion table */
641 /* 0x8140 - 0x819e */
643 unsigned char fv[] = {
645 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
646 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
647 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
648 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
649 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
650 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
651 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
652 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
653 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
654 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
655 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
656 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
662 static int file_out = FALSE;
664 static int overwrite = FALSE;
667 static int crmode_f = 0; /* CR, NL, CRLF */
668 #ifdef EASYWIN /*Easy Win */
669 static int end_check;
672 #define STD_GC_BUFSIZE (256)
673 int std_gc_buf[STD_GC_BUFSIZE];
677 #include "nkf32dll.c"
678 #elif defined(PERL_XS)
688 char *outfname = NULL;
691 #ifdef EASYWIN /*Easy Win */
692 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
695 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
696 cp = (unsigned char *)*argv;
701 if (pipe(fds) < 0 || (pid = fork()) < 0){
712 execvp(argv[1], &argv[1]);
726 if(x0201_f == WISH_TRUE)
727 x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
729 if (binmode_f == TRUE)
731 if (freopen("","wb",stdout) == NULL)
738 setbuf(stdout, (char *) NULL);
740 setvbuffer(stdout, stdobuf, IOBUF_SIZE);
743 if (binmode_f == TRUE)
745 if (freopen("","rb",stdin) == NULL) return (-1);
749 setvbuffer(stdin, stdibuf, IOBUF_SIZE);
753 kanji_convert(stdin);
754 if (guess_f) print_guessed_code(NULL);
759 is_inputcode_mixed = FALSE;
760 is_inputcode_set = FALSE;
762 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
771 /* reopen file for stdout */
772 if (file_out == TRUE) {
775 outfname = malloc(strlen(origfname)
776 + strlen(".nkftmpXXXXXX")
782 strcpy(outfname, origfname);
786 for (i = strlen(outfname); i; --i){
787 if (outfname[i - 1] == '/'
788 || outfname[i - 1] == '\\'){
794 strcat(outfname, "ntXXXXXX");
796 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC,
799 strcat(outfname, ".nkftmpXXXXXX");
800 fd = mkstemp(outfname);
803 || (fd_backup = dup(fileno(stdout))) < 0
804 || dup2(fd, fileno(stdout)) < 0
815 outfname = "nkf.out";
818 if(freopen(outfname, "w", stdout) == NULL) {
822 if (binmode_f == TRUE) {
824 if (freopen("","wb",stdout) == NULL)
831 if (binmode_f == TRUE)
833 if (freopen("","rb",fin) == NULL)
838 setvbuffer(fin, stdibuf, IOBUF_SIZE);
842 char *filename = NULL;
844 if (nfiles > 1) filename = origfname;
845 if (guess_f) print_guessed_code(filename);
851 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
859 if (dup2(fd_backup, fileno(stdout)) < 0){
862 if (stat(origfname, &sb)) {
863 fprintf(stderr, "Can't stat %s\n", origfname);
865 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
866 if (chmod(outfname, sb.st_mode)) {
867 fprintf(stderr, "Can't set permission %s\n", outfname);
870 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
871 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
872 tb[0] = tb[1] = sb.st_mtime;
873 if (utime(outfname, tb)) {
874 fprintf(stderr, "Can't set timestamp %s\n", outfname);
877 tb.actime = sb.st_atime;
878 tb.modtime = sb.st_mtime;
879 if (utime(outfname, &tb)) {
880 fprintf(stderr, "Can't set timestamp %s\n", outfname);
884 if (unlink(origfname)){
888 if (rename(outfname, origfname)) {
890 fprintf(stderr, "Can't rename %s to %s\n",
891 outfname, origfname);
899 #ifdef EASYWIN /*Easy Win */
900 if (file_out == FALSE)
901 scanf("%d",&end_check);
904 #else /* for Other OS */
905 if (file_out == TRUE)
910 #endif /* WIN32DLL */
935 {"katakana-hiragana","h3"},
942 #ifdef UNICODE_ENABLE
943 {"internal-unicode", ""},
945 #ifdef UTF8_OUTPUT_ENABLE
950 #ifdef UTF8_INPUT_ENABLE
952 {"utf16-input", "W16"},
954 #ifdef UNICODE_NORMALIZATION
955 {"utf8mac-input", ""},
964 #ifdef NUMCHAR_OPTION
965 {"numchar-input", ""},
971 #ifdef SHIFTJIS_CP932
981 static int option_mode = 0;
988 unsigned char *p = NULL;
992 while(*cp && *cp++!='-');
996 case '-': /* literal options */
997 if (!*cp) { /* ignore the rest of arguments */
1001 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1003 p = (unsigned char *)long_option[i].name;
1004 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1005 if (*p == cp[j] || cp[j] == ' '){
1012 cp = (unsigned char *)long_option[i].alias;
1016 if (strcmp(long_option[i].name, "overwrite") == 0){
1023 if (strcmp(long_option[i].name, "cap-input") == 0){
1027 if (strcmp(long_option[i].name, "url-input") == 0){
1032 #ifdef NUMCHAR_OPTION
1033 if (strcmp(long_option[i].name, "numchar-input") == 0){
1039 if (strcmp(long_option[i].name, "no-output") == 0){
1043 if (strcmp(long_option[i].name, "debug") == 0){
1048 if (strcmp(long_option[i].name, "cp932") == 0){
1049 #ifdef SHIFTJIS_CP932
1053 #ifdef UTF8_OUTPUT_ENABLE
1054 ms_ucs_map_f = TRUE;
1058 if (strcmp(long_option[i].name, "no-cp932") == 0){
1059 #ifdef SHIFTJIS_CP932
1063 #ifdef UTF8_OUTPUT_ENABLE
1064 ms_ucs_map_f = FALSE;
1068 #ifdef SHIFTJIS_CP932
1069 if (strcmp(long_option[i].name, "cp932inv") == 0){
1076 if (strcmp(long_option[i].name, "x0212") == 0){
1083 if (strcmp(long_option[i].name, "exec-in") == 0){
1087 if (strcmp(long_option[i].name, "exec-out") == 0){
1092 #ifdef UNICODE_ENABLE
1093 if (strcmp(long_option[i].name, "internal-unicode") == 0){
1094 internal_unicode_f = TRUE;
1098 #ifdef UTF8_OUTPUT_ENABLE
1099 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1100 ms_ucs_map_f = TRUE;
1104 #ifdef UNICODE_NORMALIZATION
1105 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1106 input_f = UTF8_INPUT;
1111 if (strcmp(long_option[i].name, "prefix=") == 0){
1112 if (*p == '=' && ' ' < p[1] && p[1] < 128){
1113 for (i = 2; ' ' < p[i] && p[i] < 128; i++){
1114 prefix_table[p[i]] = p[1];
1121 case 'b': /* buffered mode */
1124 case 'u': /* non bufferd mode */
1127 case 't': /* transparent mode */
1130 case 'j': /* JIS output */
1132 output_conv = j_oconv;
1134 case 'e': /* AT&T EUC output */
1135 output_conv = e_oconv;
1137 case 's': /* SJIS output */
1138 output_conv = s_oconv;
1140 case 'l': /* ISO8859 Latin-1 support, no conversion */
1141 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
1142 input_f = LATIN1_INPUT;
1144 case 'i': /* Kanji IN ESC-$-@/B */
1145 if (*cp=='@'||*cp=='B')
1146 kanji_intro = *cp++;
1148 case 'o': /* ASCII IN ESC-(-J/B */
1149 if (*cp=='J'||*cp=='B'||*cp=='H')
1150 ascii_intro = *cp++;
1154 bit:1 katakana->hiragana
1155 bit:2 hiragana->katakana
1157 if ('9'>= *cp && *cp>='0')
1158 hira_f |= (*cp++ -'0');
1165 #if defined(MSDOS) || defined(__OS2__)
1180 #ifdef UTF8_OUTPUT_ENABLE
1181 case 'w': /* UTF-8 output */
1182 if ('1'== cp[0] && '6'==cp[1]) {
1183 output_conv = w_oconv16; cp+=2;
1185 unicode_bom_f=2; cp++;
1188 unicode_bom_f=1; cp++;
1190 } else if (cp[0] == 'B') {
1191 unicode_bom_f=2; cp++;
1193 unicode_bom_f=1; cp++;
1196 } else if (cp[0] == '8') {
1197 output_conv = w_oconv; cp++;
1200 unicode_bom_f=1; cp++;
1203 output_conv = w_oconv;
1206 #ifdef UTF8_INPUT_ENABLE
1207 case 'W': /* UTF-8 input */
1208 if ('1'== cp[0] && '6'==cp[1]) {
1209 input_f = UTF16BE_INPUT;
1210 utf16_mode = UTF16BE_INPUT;
1214 input_f = UTF16LE_INPUT;
1215 utf16_mode = UTF16LE_INPUT;
1216 } else if (cp[0] == 'B') {
1218 input_f = UTF16BE_INPUT;
1219 utf16_mode = UTF16BE_INPUT;
1221 } else if (cp[0] == '8') {
1223 input_f = UTF8_INPUT;
1225 input_f = UTF8_INPUT;
1228 /* Input code assumption */
1229 case 'J': /* JIS input */
1230 case 'E': /* AT&T EUC input */
1231 input_f = JIS_INPUT;
1233 case 'S': /* MS Kanji input */
1234 input_f = SJIS_INPUT;
1235 if (x0201_f==NO_X0201) x0201_f=TRUE;
1237 case 'Z': /* Convert X0208 alphabet to asii */
1238 /* bit:0 Convert X0208
1239 bit:1 Convert Kankaku to one space
1240 bit:2 Convert Kankaku to two spaces
1241 bit:3 Convert HTML Entity
1243 if ('9'>= *cp && *cp>='0')
1244 alpha_f |= 1<<(*cp++ -'0');
1248 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
1249 x0201_f = FALSE; /* No X0201->X0208 conversion */
1251 ESC-(-I in JIS, EUC, MS Kanji
1252 SI/SO in JIS, EUC, MS Kanji
1253 SSO in EUC, JIS, not in MS Kanji
1254 MS Kanji (0xa0-0xdf)
1256 ESC-(-I in JIS (0x20-0x5f)
1257 SSO in EUC (0xa0-0xdf)
1258 0xa0-0xd in MS Kanji (0xa0-0xdf)
1261 case 'X': /* Assume X0201 kana */
1262 /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1265 case 'F': /* prserve new lines */
1266 fold_preserve_f = TRUE;
1267 case 'f': /* folding -f60 or -f */
1270 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1272 fold_len += *cp++ - '0';
1274 if (!(0<fold_len && fold_len<BUFSIZ))
1275 fold_len = DEFAULT_FOLD;
1279 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1281 fold_margin += *cp++ - '0';
1285 case 'm': /* MIME support */
1286 /* mime_decode_f = TRUE; */ /* this has too large side effects... */
1287 if (*cp=='B'||*cp=='Q') {
1288 mime_decode_mode = *cp++;
1289 mimebuf_f = FIXED_MIME;
1290 } else if (*cp=='N') {
1291 mime_f = TRUE; cp++;
1292 } else if (*cp=='S') {
1293 mime_f = STRICT_MIME; cp++;
1294 } else if (*cp=='0') {
1295 mime_decode_f = FALSE;
1296 mime_f = FALSE; cp++;
1299 case 'M': /* MIME output */
1302 mimeout_f = FIXED_MIME; cp++;
1303 } else if (*cp=='Q') {
1305 mimeout_f = FIXED_MIME; cp++;
1310 case 'B': /* Broken JIS support */
1312 bit:1 allow any x on ESC-(-x or ESC-$-x
1313 bit:2 reset to ascii on NL
1315 if ('9'>= *cp && *cp>='0')
1316 broken_f |= 1<<(*cp++ -'0');
1321 case 'O':/* for Output file */
1325 case 'c':/* add cr code */
1328 case 'd':/* delete cr code */
1331 case 'I': /* ISO-2022-JP output */
1334 case 'L': /* line mode */
1335 if (*cp=='u') { /* unix */
1336 crmode_f = NL; cp++;
1337 } else if (*cp=='m') { /* mac */
1338 crmode_f = CR; cp++;
1339 } else if (*cp=='w') { /* windows */
1340 crmode_f = CRLF; cp++;
1341 } else if (*cp=='0') { /* no conversion */
1351 /* module muliple options in a string are allowed for Perl moudle */
1352 while(*cp && *cp++!='-');
1355 /* bogus option but ignored */
1361 #ifdef ANSI_C_PROTOTYPE
1362 struct input_code * find_inputcode_byfunc(int (*iconv_func)(int c2,int c1,int c0))
1364 struct input_code * find_inputcode_byfunc(iconv_func)
1365 int (*iconv_func)();
1369 struct input_code *p = input_code_list;
1371 if (iconv_func == p->iconv_func){
1381 static int (*iconv_for_check)() = 0;
1384 #ifdef ANSI_C_PROTOTYPE
1385 void set_iconv(int f, int (*iconv_func)(int c2,int c1,int c0))
1387 void set_iconv(f, iconv_func)
1389 int (*iconv_func)();
1392 #ifdef INPUT_CODE_FIX
1400 #ifdef INPUT_CODE_FIX
1401 && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1407 if (estab_f && iconv_for_check != iconv){
1408 struct input_code *p = find_inputcode_byfunc(iconv);
1410 set_input_codename(p->name);
1411 debug(input_codename);
1413 iconv_for_check = iconv;
1418 #define SCORE_L2 (1) /*
\e$BBh
\e(B2
\e$B?e=`4A;z
\e(B */
1419 #define SCORE_KANA (SCORE_L2 << 1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
1420 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
1421 #ifdef SHIFTJIS_CP932
1422 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932
\e$B$K$h$kFI$_49$(
\e(B */
1423 #define SCORE_NO_EXIST (SCORE_CP932 << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1425 #define SCORE_NO_EXIST (SCORE_DEPEND << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1427 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME
\e$B$K$h$k;XDj
\e(B */
1428 #define SCORE_ERROR (SCORE_iMIME << 1) /*
\e$B%(%i!<
\e(B */
1430 #define SCORE_INIT (SCORE_iMIME)
1432 int score_table_A0[] = {
1435 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1436 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1439 int score_table_F0[] = {
1440 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1441 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1442 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1443 SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1446 void set_code_score(ptr, score)
1447 struct input_code *ptr;
1451 ptr->score |= score;
1455 void clr_code_score(ptr, score)
1456 struct input_code *ptr;
1460 ptr->score &= ~score;
1464 void code_score(ptr)
1465 struct input_code *ptr;
1467 int c2 = ptr->buf[0];
1468 int c1 = ptr->buf[1];
1470 set_code_score(ptr, SCORE_ERROR);
1471 }else if (c2 == SSO){
1472 set_code_score(ptr, SCORE_KANA);
1473 #ifdef UTF8_OUTPUT_ENABLE
1474 }else if (!e2w_conv(c2, c1)){
1475 set_code_score(ptr, SCORE_NO_EXIST);
1477 }else if ((c2 & 0x70) == 0x20){
1478 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1479 }else if ((c2 & 0x70) == 0x70){
1480 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1481 }else if ((c2 & 0x70) >= 0x50){
1482 set_code_score(ptr, SCORE_L2);
1486 void status_disable(ptr)
1487 struct input_code *ptr;
1492 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1495 void status_push_ch(ptr, c)
1496 struct input_code *ptr;
1499 ptr->buf[ptr->index++] = c;
1502 void status_clear(ptr)
1503 struct input_code *ptr;
1509 void status_reset(ptr)
1510 struct input_code *ptr;
1513 ptr->score = SCORE_INIT;
1516 void status_reinit(ptr)
1517 struct input_code *ptr;
1520 ptr->_file_stat = 0;
1523 void status_check(ptr, c)
1524 struct input_code *ptr;
1527 if (c <= DEL && estab_f){
1532 void s_status(ptr, c)
1533 struct input_code *ptr;
1538 status_check(ptr, c);
1543 #ifdef NUMCHAR_OPTION
1544 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1547 }else if (0xa1 <= c && c <= 0xdf){
1548 status_push_ch(ptr, SSO);
1549 status_push_ch(ptr, c);
1552 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
1554 status_push_ch(ptr, c);
1555 #ifdef SHIFTJIS_CP932
1557 && CP932_TABLE_BEGIN <= c && c <= CP932_TABLE_END){
1559 status_push_ch(ptr, c);
1560 #endif /* SHIFTJIS_CP932 */
1562 }else if (x0212_f && 0xf0 <= c && c <= 0xfc){
1564 status_push_ch(ptr, c);
1565 #endif /* X0212_ENABLE */
1567 status_disable(ptr);
1571 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1572 status_push_ch(ptr, c);
1573 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
1577 status_disable(ptr);
1581 #ifdef SHIFTJIS_CP932
1582 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1583 status_push_ch(ptr, c);
1584 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
1585 set_code_score(ptr, SCORE_CP932);
1590 #endif /* SHIFTJIS_CP932 */
1591 #ifndef X0212_ENABLE
1592 status_disable(ptr);
1598 void e_status(ptr, c)
1599 struct input_code *ptr;
1604 status_check(ptr, c);
1609 #ifdef NUMCHAR_OPTION
1610 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1613 }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
1615 status_push_ch(ptr, c);
1617 }else if (0x8f == c){
1619 status_push_ch(ptr, c);
1620 #endif /* X0212_ENABLE */
1622 status_disable(ptr);
1626 if (0xa1 <= c && c <= 0xfe){
1627 status_push_ch(ptr, c);
1631 status_disable(ptr);
1636 if (0xa1 <= c && c <= 0xfe){
1638 status_push_ch(ptr, c);
1640 status_disable(ptr);
1642 #endif /* X0212_ENABLE */
1646 #ifdef UTF8_INPUT_ENABLE
1647 void w16_status(ptr, c)
1648 struct input_code *ptr;
1655 if (ptr->_file_stat == 0){
1656 if (c == 0xfe || c == 0xff){
1658 status_push_ch(ptr, c);
1659 ptr->_file_stat = 1;
1661 status_disable(ptr);
1662 ptr->_file_stat = -1;
1664 }else if (ptr->_file_stat > 0){
1666 status_push_ch(ptr, c);
1667 }else if (ptr->_file_stat < 0){
1668 status_disable(ptr);
1674 status_disable(ptr);
1675 ptr->_file_stat = -1;
1677 status_push_ch(ptr, c);
1684 if (ptr->stat != c && (c == 0xfe || c == 0xff)){
1685 status_push_ch(ptr, c);
1688 status_disable(ptr);
1689 ptr->_file_stat = -1;
1695 void w_status(ptr, c)
1696 struct input_code *ptr;
1701 status_check(ptr, c);
1706 #ifdef NUMCHAR_OPTION
1707 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1710 }else if (0xc0 <= c && c <= 0xdf){
1712 status_push_ch(ptr, c);
1713 }else if (0xe0 <= c && c <= 0xef){
1715 status_push_ch(ptr, c);
1717 status_disable(ptr);
1722 if (0x80 <= c && c <= 0xbf){
1723 status_push_ch(ptr, c);
1724 if (ptr->index > ptr->stat){
1725 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
1726 && ptr->buf[2] == 0xbf);
1727 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
1728 &ptr->buf[0], &ptr->buf[1]);
1735 status_disable(ptr);
1746 int action_flag = 1;
1747 struct input_code *result = 0;
1748 struct input_code *p = input_code_list;
1750 (p->status_func)(p, c);
1753 }else if(p->stat == 0){
1764 if (result && !estab_f){
1765 set_iconv(TRUE, result->iconv_func);
1766 }else if (c <= DEL){
1767 struct input_code *ptr = input_code_list;
1782 return std_gc_buf[--std_gc_ndx];
1793 if (std_gc_ndx == STD_GC_BUFSIZE){
1796 std_gc_buf[std_gc_ndx++] = c;
1816 while ((c = (*i_getc)(f)) != EOF)
1825 oconv = output_conv;
1828 /* replace continucation module, from output side */
1830 /* output redicrection */
1832 if (noout_f || guess_f){
1839 if (mimeout_f == TRUE) {
1840 o_base64conv = oconv; oconv = base64_conv;
1842 /* base64_count = 0; */
1846 o_crconv = oconv; oconv = cr_conv;
1849 o_rot_conv = oconv; oconv = rot_conv;
1852 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
1855 o_hira_conv = oconv; oconv = hira_conv;
1858 o_fconv = oconv; oconv = fold_conv;
1861 if (alpha_f || x0201_f) {
1862 o_zconv = oconv; oconv = z_conv;
1866 i_ungetc = std_ungetc;
1867 /* input redicrection */
1870 i_cgetc = i_getc; i_getc = cap_getc;
1871 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
1874 i_ugetc = i_getc; i_getc = url_getc;
1875 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
1878 #ifdef NUMCHAR_OPTION
1880 i_ngetc = i_getc; i_getc = numchar_getc;
1881 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
1884 #ifdef UNICODE_NORMALIZATION
1885 if (nfc_f && input_f == UTF8_INPUT){
1886 i_nfc_getc = i_getc; i_getc = nfc_getc;
1887 i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
1890 if (mime_f && mimebuf_f==FIXED_MIME) {
1891 i_mgetc = i_getc; i_getc = mime_getc;
1892 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
1895 i_bgetc = i_getc; i_getc = broken_getc;
1896 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
1898 if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
1899 set_iconv(-TRUE, e_iconv);
1900 } else if (input_f == SJIS_INPUT) {
1901 set_iconv(-TRUE, s_iconv);
1902 #ifdef UTF8_INPUT_ENABLE
1903 } else if (input_f == UTF8_INPUT) {
1904 set_iconv(-TRUE, w_iconv);
1905 } else if (input_f == UTF16BE_INPUT) {
1906 set_iconv(-TRUE, w_iconv16);
1907 } else if (input_f == UTF16LE_INPUT) {
1908 set_iconv(-TRUE, w_iconv16);
1911 set_iconv(FALSE, e_iconv);
1915 struct input_code *p = input_code_list;
1923 Conversion main loop. Code detection only.
1932 int is_8bit = FALSE;
1934 module_connection();
1939 output_mode = ASCII;
1942 #define NEXT continue /* no output, get next */
1943 #define SEND ; /* output c1 and c2, get next */
1944 #define LAST break /* end of loop, go closing */
1946 while ((c1 = (*i_getc)(f)) != EOF) {
1951 /* in case of 8th bit is on */
1952 if (!estab_f&&!mime_decode_mode) {
1953 /* in case of not established yet */
1954 /* It is still ambiguious */
1955 if (h_conv(f, c2, c1)==EOF)
1961 /* in case of already established */
1963 /* ignore bogus code */
1969 /* second byte, 7 bit code */
1970 /* it might be kanji shitfted */
1971 if ((c1 == DEL) || (c1 <= SPACE)) {
1972 /* ignore bogus first code */
1980 #ifdef UTF8_INPUT_ENABLE
1989 #ifdef NUMCHAR_OPTION
1990 } else if ((c1 & CLASS_MASK) == CLASS_UTF16){
1993 } else if (c1 > DEL) {
1995 if (!estab_f && !iso8859_f) {
1996 /* not established yet */
1997 if (!is_8bit) is_8bit = TRUE;
2000 } else { /* estab_f==TRUE */
2005 } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2006 /* SJIS X0201 Case... */
2007 if(iso2022jp_f && x0201_f==NO_X0201) {
2008 (*oconv)(GETA1, GETA2);
2015 } else if (c1==SSO && iconv != s_iconv) {
2016 /* EUC X0201 Case */
2017 c1 = (*i_getc)(f); /* skip SSO */
2019 if (SSP<=c1 && c1<0xe0) {
2020 if(iso2022jp_f && x0201_f==NO_X0201) {
2021 (*oconv)(GETA1, GETA2);
2028 } else { /* bogus code, skip SSO and one byte */
2032 /* already established */
2037 } else if ((c1 > SPACE) && (c1 != DEL)) {
2038 /* in case of Roman characters */
2040 /* output 1 shifted byte */
2044 } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
2045 /* output 1 shifted byte */
2046 if(iso2022jp_f && x0201_f==NO_X0201) {
2047 (*oconv)(GETA1, GETA2);
2054 /* look like bogus code */
2057 } else if (input_mode == X0208) {
2058 /* in case of Kanji shifted */
2061 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
2062 /* Check MIME code */
2063 if ((c1 = (*i_getc)(f)) == EOF) {
2066 } else if (c1 == '?') {
2067 /* =? is mime conversion start sequence */
2068 if(mime_f == STRICT_MIME) {
2069 /* check in real detail */
2070 if (mime_begin_strict(f) == EOF)
2074 } else if (mime_begin(f) == EOF)
2084 /* normal ASCII code */
2087 } else if (c1 == SI) {
2090 } else if (c1 == SO) {
2093 } else if (c1 == ESC ) {
2094 if ((c1 = (*i_getc)(f)) == EOF) {
2095 /* (*oconv)(0, ESC); don't send bogus code */
2097 } else if (c1 == '$') {
2098 if ((c1 = (*i_getc)(f)) == EOF) {
2100 (*oconv)(0, ESC); don't send bogus code
2101 (*oconv)(0, '$'); */
2103 } else if (c1 == '@'|| c1 == 'B') {
2104 /* This is kanji introduction */
2107 set_input_codename("ISO-2022-JP");
2108 debug(input_codename);
2110 } else if (c1 == '(') {
2111 if ((c1 = (*i_getc)(f)) == EOF) {
2112 /* don't send bogus code
2118 } else if (c1 == '@'|| c1 == 'B') {
2119 /* This is kanji introduction */
2124 } else if (c1 == 'D'){
2128 #endif /* X0212_ENABLE */
2130 /* could be some special code */
2137 } else if (broken_f&0x2) {
2138 /* accept any ESC-(-x as broken code ... */
2148 } else if (c1 == '(') {
2149 if ((c1 = (*i_getc)(f)) == EOF) {
2150 /* don't send bogus code
2152 (*oconv)(0, '('); */
2156 /* This is X0201 kana introduction */
2157 input_mode = X0201; shift_mode = X0201;
2159 } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2160 /* This is X0208 kanji introduction */
2161 input_mode = ASCII; shift_mode = FALSE;
2163 } else if (broken_f&0x2) {
2164 input_mode = ASCII; shift_mode = FALSE;
2169 /* maintain various input_mode here */
2173 } else if ( c1 == 'N' || c1 == 'n' ){
2175 c3 = (*i_getc)(f); /* skip SS2 */
2176 if ( (SPACE<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2191 } else if ((c1 == NL || c1 == CR) && broken_f&4) {
2192 input_mode = ASCII; set_iconv(FALSE, 0);
2194 } else if (c1 == NL && mime_decode_f && !mime_decode_mode ) {
2195 if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2203 } else if (c1 == CR && mime_decode_f && !mime_decode_mode ) {
2204 if ((c1=(*i_getc)(f))!=EOF) {
2208 } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2224 if (input_mode == X0208)
2225 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2227 else if (input_mode == X0212)
2228 (*oconv)((0x8f << 8) | c2, c1);
2229 #endif /* X0212_ENABLE */
2230 else if (input_mode)
2231 (*oconv)(input_mode, c1); /* other special case */
2232 else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */
2233 int c0 = (*i_getc)(f);
2236 (*iconv)(c2, c1, c0);
2242 /* goto next_word */
2246 (*iconv)(EOF, 0, 0);
2247 if (!is_inputcode_set)
2250 struct input_code *p = input_code_list;
2251 struct input_code *result = p;
2253 if (p->score < result->score) result = p;
2256 set_input_codename(result->name);
2271 /** it must NOT be in the kanji shifte sequence */
2272 /** it must NOT be written in JIS7 */
2273 /** and it must be after 2 byte 8bit code */
2280 while ((c1 = (*i_getc)(f)) != EOF) {
2286 if (push_hold_buf(c1) == EOF || estab_f){
2292 struct input_code *p = input_code_list;
2293 struct input_code *result = p;
2298 if (p->score < result->score){
2303 set_iconv(FALSE, result->iconv_func);
2308 ** 1) EOF is detected, or
2309 ** 2) Code is established, or
2310 ** 3) Buffer is FULL (but last word is pushed)
2312 ** in 1) and 3) cases, we continue to use
2313 ** Kanji codes by oconv and leave estab_f unchanged.
2318 while (wc < hold_count){
2319 c2 = hold_buf[wc++];
2321 #ifdef NUMCHAR_OPTION
2322 || (c2 & CLASS_MASK) == CLASS_UTF16
2327 }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
2328 (*iconv)(X0201, c2, 0);
2331 if (wc < hold_count){
2332 c1 = hold_buf[wc++];
2341 if ((*iconv)(c2, c1, 0) < 0){
2343 if (wc < hold_count){
2344 c0 = hold_buf[wc++];
2353 (*iconv)(c2, c1, c0);
2366 if (hold_count >= HOLD_SIZE*2)
2368 hold_buf[hold_count++] = c2;
2369 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
2372 int s2e_conv(c2, c1, p2, p1)
2377 #ifdef SHIFTJIS_CP932
2378 if (cp932_f && CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){
2379 extern unsigned short shiftjis_cp932[3][189];
2380 val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
2386 #endif /* SHIFTJIS_CP932 */
2388 if (x0212_f && 0xfa <= c2 && c2 <= 0xfc){
2389 extern unsigned short shiftjis_x0212[3][189];
2390 val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
2393 c2 = (0x8f << 8) | (val >> 8);
2405 c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
2407 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
2415 c2 = x0212_unshift(c2);
2430 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2433 int ret = s2e_conv(c2, c1, &c2, &c1);
2434 if (ret) return ret;
2448 }else if (c2 == 0x8f){
2452 c2 = (c2 << 8) | (c1 & 0x7f);
2454 #ifdef SHIFTJIS_CP932
2457 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2458 s2e_conv(s2, s1, &c2, &c1);
2459 if ((c2 & 0xff00) == 0){
2465 #endif /* SHIFTJIS_CP932 */
2466 #endif /* X0212_ENABLE */
2467 } else if (c2 == SSO){
2470 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2480 #ifdef UTF8_INPUT_ENABLE
2482 w2e_conv(c2, c1, c0, p2, p1)
2486 extern unsigned short * utf8_to_euc_2bytes[];
2487 extern unsigned short ** utf8_to_euc_3bytes[];
2490 if (0xc0 <= c2 && c2 <= 0xef) {
2491 unsigned short **pp;
2494 if (c0 == 0) return -1;
2495 pp = utf8_to_euc_3bytes[c2 - 0x80];
2496 ret = w_iconv_common(c1, c0, pp, sizeof_utf8_to_euc_C2, p2, p1);
2498 ret = w_iconv_common(c2, c1, utf8_to_euc_2bytes, sizeof_utf8_to_euc_2bytes, p2, p1);
2500 #ifdef NUMCHAR_OPTION
2503 if (p1) *p1 = CLASS_UTF16 | ww16_conv(c2, c1, c0);
2508 } else if (c2 == X0201) {
2522 unsigned short val = 0;
2525 if (c2 < 0x80 || (c2 & 0xc0) == 0xdf) /* 0x00-0x7f 0xc0-0xdf */
2527 else if ((c2 & 0xf0) == 0xe0) /* 0xe0-0xef */
2528 return -1; /* 3bytes */
2529 /*else if (0xf0 <= c2)
2530 return 0; /* 4,5,6bytes */
2531 else if ((c2 & 0xc0) == 0x80) /* 0x80-0xbf */
2532 return 0; /* trail byte */
2536 else if (c2 == 0xef && c1 == 0xbb && c0 == 0xbf)
2537 return 0; /* throw BOM */
2538 else if (internal_unicode_f && (output_conv == w_oconv || output_conv == w_oconv16)){
2539 val = ww16_conv(c2, c1, c0);
2540 c2 = (val >> 8) & 0xff;
2543 ret = w2e_conv(c2, c1, c0, &c2, &c1);
2552 w16w_conv(val, p2, p1, p0)
2560 }else if (val < 0x800){
2561 *p2 = 0xc0 | (val >> 6);
2562 *p1 = 0x80 | (val & 0x3f);
2565 *p2 = 0xe0 | (val >> 12);
2566 *p1 = 0x80 | ((val >> 6) & 0x3f);
2567 *p0 = 0x80 | (val & 0x3f);
2572 ww16_conv(c2, c1, c0)
2577 val = (c2 & 0x0f) << 12;
2578 val |= (c1 & 0x3f) << 6;
2580 }else if (c2 >= 0xc0){
2581 val = (c2 & 0x1f) << 6;
2590 w16e_conv(val, p2, p1)
2594 extern unsigned short * utf8_to_euc_2bytes[];
2595 extern unsigned short ** utf8_to_euc_3bytes[];
2597 unsigned short **pp;
2601 w16w_conv(val, &c2, &c1, &c0);
2604 pp = utf8_to_euc_3bytes[c2 - 0x80];
2605 psize = sizeof_utf8_to_euc_C2;
2606 ret = w_iconv_common(c1, c0, pp, psize, p2, p1);
2608 pp = utf8_to_euc_2bytes;
2609 psize = sizeof_utf8_to_euc_2bytes;
2610 ret = w_iconv_common(c2, c1, pp, psize, p2, p1);
2612 #ifdef NUMCHAR_OPTION
2615 *p1 = CLASS_UTF16 | val;
2627 w_iconv16(c2, c1, c0)
2632 if (c2==0376 && c1==0377){
2633 utf16_mode = UTF16BE_INPUT;
2635 } else if (c2==0377 && c1==0376){
2636 utf16_mode = UTF16LE_INPUT;
2639 if (c2 != EOF && utf16_mode == UTF16LE_INPUT) {
2641 tmp=c1; c1=c2; c2=tmp;
2643 if ((c2==0 && c1 < 0x80) || c2==EOF) {
2647 if (internal_unicode_f && (output_conv == w_oconv || output_conv == w_oconv16));
2648 else ret = w16e_conv(((c2<<8)&0xff00) + c1, &c2, &c1);
2649 if (ret) return ret;
2655 w_iconv_common(c1, c0, pp, psize, p2, p1)
2657 unsigned short **pp;
2665 if (pp == 0) return 1;
2668 if (c1 < 0 || psize <= c1) return 1;
2670 if (p == 0) return 1;
2673 if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1;
2675 if (val == 0) return 1;
2682 if (c2 == SO) c2 = X0201;
2691 #ifdef UTF8_OUTPUT_ENABLE
2696 extern unsigned short euc_to_utf8_1byte[];
2697 extern unsigned short * euc_to_utf8_2bytes[];
2698 extern unsigned short * euc_to_utf8_2bytes_ms[];
2702 p = euc_to_utf8_1byte;
2704 } else if (c2 >> 8 == 0x8f){
2705 extern unsigned short * x0212_to_utf8_2bytes[];
2706 c2 = (c2&0x7f) - 0x21;
2707 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2708 p = x0212_to_utf8_2bytes[c2];
2714 c2 = (c2&0x7f) - 0x21;
2715 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2716 p = ms_ucs_map_f ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
2721 c1 = (c1 & 0x7f) - 0x21;
2722 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
2739 if (unicode_bom_f==2) {
2746 #ifdef NUMCHAR_OPTION
2747 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2748 w16w_conv(c1, &c2, &c1, &c0);
2752 if (c0) (*o_putc)(c0);
2759 output_mode = ASCII;
2761 } else if (c2 == ISO8859_1) {
2762 output_mode = ISO8859_1;
2763 (*o_putc)(c1 | 0x080);
2766 if (internal_unicode_f && (iconv == w_iconv || iconv == w_iconv16))
2767 val = ((c2<<8)&0xff00) + c1;
2768 else val = e2w_conv(c2, c1);
2770 w16w_conv(val, &c2, &c1, &c0);
2774 if (c0) (*o_putc)(c0);
2790 if (unicode_bom_f==2) {
2792 (*o_putc)((unsigned char)'\377');
2796 (*o_putc)((unsigned char)'\377');
2801 if (internal_unicode_f && (iconv == w_iconv || iconv == w_iconv16)){
2802 } else if (c2 == ISO8859_1) {
2805 #ifdef NUMCHAR_OPTION
2806 } else if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16) {
2807 c2 = (c1 >> 8) & 0xff;
2811 unsigned short val = e2w_conv(c2, c1);
2812 c2 = (val >> 8) & 0xff;
2831 #ifdef NUMCHAR_OPTION
2832 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2833 w16e_conv(c1, &c2, &c1);
2834 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2842 } else if (c2 == 0) {
2843 output_mode = ASCII;
2845 } else if (c2 == X0201) {
2846 output_mode = JAPANESE_EUC;
2847 (*o_putc)(SSO); (*o_putc)(c1|0x80);
2848 } else if (c2 == ISO8859_1) {
2849 output_mode = ISO8859_1;
2850 (*o_putc)(c1 | 0x080);
2852 } else if ((c2 & 0xff00) >> 8 == 0x8f){
2853 output_mode = JAPANESE_EUC;
2854 #ifdef SHIFTJIS_CP932
2857 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2858 s2e_conv(s2, s1, &c2, &c1);
2862 if ((c2 & 0xff00) >> 8 == 0x8f){
2865 (*o_putc)((c2 & 0x7f) | 0x080);
2866 (*o_putc)(c1 | 0x080);
2869 (*o_putc)((c2 & 0x7f) | 0x080);
2870 (*o_putc)(c1 | 0x080);
2874 if ((c1<0x21 || 0x7e<c1) ||
2875 (c2<0x21 || 0x7e<c2)) {
2876 set_iconv(FALSE, 0);
2877 return; /* too late to rescue this char */
2879 output_mode = JAPANESE_EUC;
2880 (*o_putc)(c2 | 0x080);
2881 (*o_putc)(c1 | 0x080);
2891 if ((ret & 0xff00) == 0x8f00){
2892 if (0x75 <= c && c <= 0x7f){
2893 ret = c + (0x109 - 0x75);
2896 if (0x75 <= c && c <= 0x7f){
2897 ret = c + (0x113 - 0x75);
2904 int x0212_unshift(c)
2908 if (0x7f <= c && c <= 0x88){
2909 ret = c + (0x75 - 0x7f);
2910 }else if (0x89 <= c && c <= 0x92){
2911 ret = (0x8f << 8) | 0x80 | (c + (0x75 - 0x89));
2915 #endif /* X0212_ENABLE */
2918 e2s_conv(c2, c1, p2, p1)
2919 int c2, c1, *p2, *p1;
2922 unsigned short *ptr;
2924 extern unsigned short *x0212_shiftjis[];
2926 if ((c2 & 0xff00) == 0x8f00){
2928 if (0x21 <= ndx && ndx <= 0x7e){
2929 ptr = x0212_shiftjis[ndx - 0x21];
2931 val = ptr[(c1 & 0x7f) - 0x21];
2941 c2 = x0212_shift(c2);
2943 #endif /* X0212_ENABLE */
2944 if ((c2 & 0xff00) == 0x8f00){
2947 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
2948 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
2957 #ifdef NUMCHAR_OPTION
2958 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2959 w16e_conv(c1, &c2, &c1);
2965 } else if (c2 == 0) {
2966 output_mode = ASCII;
2968 } else if (c2 == X0201) {
2969 output_mode = SHIFT_JIS;
2971 } else if (c2 == ISO8859_1) {
2972 output_mode = ISO8859_1;
2973 (*o_putc)(c1 | 0x080);
2975 } else if ((c2 & 0xff00) >> 8 == 0x8f){
2976 output_mode = SHIFT_JIS;
2977 if (e2s_conv(c2, c1, &c2, &c1) == 0){
2983 if ((c1<0x20 || 0x7e<c1) ||
2984 (c2<0x20 || 0x7e<c2)) {
2985 set_iconv(FALSE, 0);
2986 return; /* too late to rescue this char */
2988 output_mode = SHIFT_JIS;
2989 e2s_conv(c2, c1, &c2, &c1);
2991 #ifdef SHIFTJIS_CP932
2993 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2994 extern unsigned short cp932inv[2][189];
2995 int c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3001 #endif /* SHIFTJIS_CP932 */
3004 if (prefix_table[(unsigned char)c1]){
3005 (*o_putc)(prefix_table[(unsigned char)c1]);
3016 #ifdef NUMCHAR_OPTION
3017 if ((c1 & CLASS_MASK) == CLASS_UTF16){
3018 w16e_conv(c1, &c2, &c1);
3022 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
3025 (*o_putc)(ascii_intro);
3026 output_mode = ASCII;
3030 } else if ((c2 & 0xff00) >> 8 == 0x8f){
3031 if (output_mode!=X0212) {
3032 output_mode = X0212;
3038 (*o_putc)(c2 & 0x7f);
3041 } else if (c2==X0201) {
3042 if (output_mode!=X0201) {
3043 output_mode = X0201;
3049 } else if (c2==ISO8859_1) {
3050 /* iso8859 introduction, or 8th bit on */
3051 /* Can we convert in 7bit form using ESC-'-'-A ?
3053 output_mode = ISO8859_1;
3055 } else if (c2 == 0) {
3056 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
3059 (*o_putc)(ascii_intro);
3060 output_mode = ASCII;
3064 if (output_mode != X0208) {
3065 output_mode = X0208;
3068 (*o_putc)(kanji_intro);
3070 if (c1<0x20 || 0x7e<c1)
3072 if (c2<0x20 || 0x7e<c2)
3084 mime_prechar(c2, c1);
3085 (*o_base64conv)(c2,c1);
3089 static int broken_buf[3];
3090 static int broken_counter = 0;
3091 static int broken_last = 0;
3098 if (broken_counter>0) {
3099 return broken_buf[--broken_counter];
3102 if (c=='$' && broken_last != ESC
3103 && (input_mode==ASCII || input_mode==X0201)) {
3106 if (c1=='@'|| c1=='B') {
3107 broken_buf[0]=c1; broken_buf[1]=c;
3114 } else if (c=='(' && broken_last != ESC
3115 && (input_mode==X0208 || input_mode==X0201)) { /* ) */
3118 if (c1=='J'|| c1=='B') {
3119 broken_buf[0]=c1; broken_buf[1]=c;
3137 if (broken_counter<2)
3138 broken_buf[broken_counter++]=c;
3142 static int prev_cr = 0;
3150 if (! (c2==0&&c1==NL) ) {
3156 } else if (c1=='\r') {
3158 } else if (c1=='\n') {
3159 if (crmode_f==CRLF) {
3160 (*o_crconv)(0,'\r');
3161 } else if (crmode_f==CR) {
3162 (*o_crconv)(0,'\r');
3166 } else if (c1!='\032' || crmode_f!=NL){
3172 Return value of fold_conv()
3174 \n add newline and output char
3175 \r add newline and output nothing
3178 1 (or else) normal output
3180 fold state in prev (previous character)
3182 >0x80 Japanese (X0208/X0201)
3187 This fold algorthm does not preserve heading space in a line.
3188 This is the main difference from fmt.
3191 #define char_size(c2,c1) (c2?2:1)
3200 if (c1== '\r' && !fold_preserve_f) {
3201 fold_state=0; /* ignore cr */
3202 }else if (c1== '\n'&&f_prev=='\r' && fold_preserve_f) {
3204 fold_state=0; /* ignore cr */
3205 } else if (c1== BS) {
3206 if (f_line>0) f_line--;
3208 } else if (c2==EOF && f_line != 0) { /* close open last line */
3210 } else if ((c1=='\n' && !fold_preserve_f)
3211 || ((c1=='\r'||(c1=='\n'&&f_prev!='\r'))
3212 && fold_preserve_f)) {
3214 if (fold_preserve_f) {
3218 } else if ((f_prev == c1 && !fold_preserve_f)
3219 || (f_prev == '\n' && fold_preserve_f)
3220 ) { /* duplicate newline */
3223 fold_state = '\n'; /* output two newline */
3229 if (f_prev&0x80) { /* Japanese? */
3231 fold_state = 0; /* ignore given single newline */
3232 } else if (f_prev==' ') {
3236 if (++f_line<=fold_len)
3240 fold_state = '\r'; /* fold and output nothing */
3244 } else if (c1=='\f') {
3249 fold_state = '\n'; /* output newline and clear */
3250 } else if ( (c2==0 && c1==' ')||
3251 (c2==0 && c1=='\t')||
3252 (c2=='!'&& c1=='!')) {
3253 /* X0208 kankaku or ascii space */
3254 if (f_prev == ' ') {
3255 fold_state = 0; /* remove duplicate spaces */
3258 if (++f_line<=fold_len)
3259 fold_state = ' '; /* output ASCII space only */
3261 f_prev = ' '; f_line = 0;
3262 fold_state = '\r'; /* fold and output nothing */
3266 prev0 = f_prev; /* we still need this one... , but almost done */
3268 if (c2 || c2==X0201)
3269 f_prev |= 0x80; /* this is Japanese */
3270 f_line += char_size(c2,c1);
3271 if (f_line<=fold_len) { /* normal case */
3274 if (f_line>=fold_len+fold_margin) { /* too many kinsou suspension */
3275 f_line = char_size(c2,c1);
3276 fold_state = '\n'; /* We can't wait, do fold now */
3277 } else if (c2==X0201) {
3278 /* simple kinsoku rules return 1 means no folding */
3279 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
3280 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
3281 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
3282 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
3283 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
3284 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
3285 else if (SPACE<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
3287 fold_state = '\n';/* add one new f_line before this character */
3290 fold_state = '\n';/* add one new f_line before this character */
3293 /* kinsoku point in ASCII */
3294 if ( c1==')'|| /* { [ ( */
3305 /* just after special */
3306 } else if (!is_alnum(prev0)) {
3307 f_line = char_size(c2,c1);
3309 } else if ((prev0==' ') || /* ignored new f_line */
3310 (prev0=='\n')|| /* ignored new f_line */
3311 (prev0&0x80)) { /* X0208 - ASCII */
3312 f_line = char_size(c2,c1);
3313 fold_state = '\n';/* add one new f_line before this character */
3315 fold_state = 1; /* default no fold in ASCII */
3319 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
3320 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
3321 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
3322 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
3323 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
3324 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
3325 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
3326 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
3327 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
3328 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
3329 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
3330 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
3331 /* default no fold in kinsoku */
3334 f_line = char_size(c2,c1);
3335 /* add one new f_line before this character */
3338 f_line = char_size(c2,c1);
3340 /* add one new f_line before this character */
3345 /* terminator process */
3346 switch(fold_state) {
3365 int z_prev2=0,z_prev1=0;
3372 /* if (c2) c1 &= 0x7f; assertion */
3374 if (x0201_f && z_prev2==X0201) { /* X0201 */
3375 if (c1==(0xde&0x7f)) { /*
\e$BByE@
\e(B */
3377 (*o_zconv)(dv[(z_prev1-SPACE)*2],dv[(z_prev1-SPACE)*2+1]);
3379 } else if (c1==(0xdf&0x7f)&&ev[(z_prev1-SPACE)*2]) { /*
\e$BH>ByE@
\e(B */
3381 (*o_zconv)(ev[(z_prev1-SPACE)*2],ev[(z_prev1-SPACE)*2+1]);
3385 (*o_zconv)(cv[(z_prev1-SPACE)*2],cv[(z_prev1-SPACE)*2+1]);
3394 if (x0201_f && c2==X0201) {
3395 if (dv[(c1-SPACE)*2]||ev[(c1-SPACE)*2]) {
3396 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
3397 z_prev1 = c1; z_prev2 = c2;
3400 (*o_zconv)(cv[(c1-SPACE)*2],cv[(c1-SPACE)*2+1]);
3405 /* JISX0208 Alphabet */
3406 if (alpha_f && c2 == 0x23 ) {
3408 } else if (alpha_f && c2 == 0x21 ) {
3409 /* JISX0208 Kigou */
3414 } else if (alpha_f&0x4) {
3419 } else if (0x20<c1 && c1<0x7f && fv[c1-0x20]) {
3425 case '>': entity = ">"; break;
3426 case '<': entity = "<"; break;
3427 case '\"': entity = """; break;
3428 case '&': entity = "&"; break;
3431 while (*entity) (*o_zconv)(0, *entity++);
3441 #define rot13(c) ( \
3443 (c <= 'M') ? (c + 13): \
3444 (c <= 'Z') ? (c - 13): \
3446 (c <= 'm') ? (c + 13): \
3447 (c <= 'z') ? (c - 13): \
3451 #define rot47(c) ( \
3453 ( c <= 'O' ) ? (c + 47) : \
3454 ( c <= '~' ) ? (c - 47) : \
3462 if (c2==0 || c2==X0201 || c2==ISO8859_1) {
3468 (*o_rot_conv)(c2,c1);
3475 if ((hira_f & 1) && c2==0x25 && 0x20<c1 && c1<0x74) {
3477 } else if ((hira_f & 2) && c2==0x24 && 0x20<c1 && c1<0x74) {
3480 (*o_hira_conv)(c2,c1);
3485 iso2022jp_check_conv(c2,c1)
3488 static int range[RANGE_NUM_MAX][2] = {
3511 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
3515 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
3520 for (i = 0; i < RANGE_NUM_MAX; i++) {
3521 start = range[i][0];
3524 if (c >= start && c <= end) {
3529 (*o_iso2022jp_check_conv)(c2,c1);
3533 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
3535 unsigned char *mime_pattern[] = {
3536 (unsigned char *)"\075?EUC-JP?B?",
3537 (unsigned char *)"\075?SHIFT_JIS?B?",
3538 (unsigned char *)"\075?ISO-8859-1?Q?",
3539 (unsigned char *)"\075?ISO-8859-1?B?",
3540 (unsigned char *)"\075?ISO-2022-JP?B?",
3541 (unsigned char *)"\075?ISO-2022-JP?Q?",
3542 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3543 (unsigned char *)"\075?UTF-8?B?",
3544 (unsigned char *)"\075?UTF-8?Q?",
3546 (unsigned char *)"\075?US-ASCII?Q?",
3551 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
3552 int (*mime_priority_func[])PROTO((int c2, int c1, int c0)) = {
3553 e_iconv, s_iconv, 0, 0, 0, 0,
3554 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3560 int mime_encode[] = {
3561 JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201,
3562 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3569 int mime_encode_method[] = {
3570 'B', 'B','Q', 'B', 'B', 'Q',
3571 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3579 #define MAXRECOVER 20
3581 /* I don't trust portablity of toupper */
3582 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
3583 #define nkf_isdigit(c) ('0'<=c && c<='9')
3584 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
3585 #define nkf_isblank(c) (c == SPACE || c == TAB)
3586 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == NL)
3587 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
3588 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
3593 if (i_getc!=mime_getc) {
3594 i_mgetc = i_getc; i_getc = mime_getc;
3595 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
3596 if(mime_f==STRICT_MIME) {
3597 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
3598 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
3604 unswitch_mime_getc()
3606 if(mime_f==STRICT_MIME) {
3607 i_mgetc = i_mgetc_buf;
3608 i_mungetc = i_mungetc_buf;
3611 i_ungetc = i_mungetc;
3615 mime_begin_strict(f)
3620 unsigned char *p,*q;
3621 int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
3623 mime_decode_mode = FALSE;
3624 /* =? has been checked */
3626 p = mime_pattern[j];
3629 for(i=2;p[i]>' ';i++) { /* start at =? */
3630 if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) {
3631 /* pattern fails, try next one */
3633 while ((p = mime_pattern[++j])) {
3634 for(k=2;k<i;k++) /* assume length(p) > i */
3635 if (p[k]!=q[k]) break;
3636 if (k==i && nkf_toupper(c1)==p[k]) break;
3638 if (p) continue; /* found next one, continue */
3639 /* all fails, output from recovery buffer */
3647 mime_decode_mode = p[i-2];
3649 set_iconv(FALSE, mime_priority_func[j]);
3650 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
3652 if (mime_decode_mode=='B') {
3653 mimebuf_f = unbuf_f;
3655 /* do MIME integrity check */
3656 return mime_integrity(f,mime_pattern[j]);
3668 /* we don't keep eof of Fifo, becase it contains ?= as
3669 a terminator. It was checked in mime_integrity. */
3670 return ((mimebuf_f)?
3671 (*i_mgetc_buf)(f):Fifo(mime_input++));
3675 mime_ungetc_buf(c,f)
3680 (*i_mungetc_buf)(c,f);
3682 Fifo(--mime_input)=c;
3693 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
3694 /* re-read and convert again from mime_buffer. */
3696 /* =? has been checked */
3698 Fifo(mime_last++)='='; Fifo(mime_last++)='?';
3699 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
3700 /* We accept any character type even if it is breaked by new lines */
3701 c1 = (*i_getc)(f); Fifo(mime_last++)= c1 ;
3702 if (c1=='\n'||c1==' '||c1=='\r'||
3703 c1=='-'||c1=='_'||is_alnum(c1) ) continue;
3705 /* Failed. But this could be another MIME preemble */
3713 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3714 if (!(++i<MAXRECOVER) || c1==EOF) break;
3715 if (c1=='b'||c1=='B') {
3716 mime_decode_mode = 'B';
3717 } else if (c1=='q'||c1=='Q') {
3718 mime_decode_mode = 'Q';
3722 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3723 if (!(++i<MAXRECOVER) || c1==EOF) break;
3725 mime_decode_mode = FALSE;
3731 if (!mime_decode_mode) {
3732 /* false MIME premble, restart from mime_buffer */
3733 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
3734 /* Since we are in MIME mode until buffer becomes empty, */
3735 /* we never go into mime_begin again for a while. */
3738 /* discard mime preemble, and goto MIME mode */
3740 /* do no MIME integrity check */
3741 return c1; /* used only for checking EOF */
3756 fprintf(stderr, "%s\n", str);
3762 set_input_codename (codename)
3767 strcmp(codename, "") != 0 &&