1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
5 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
6 ** Copyright (C) 1996,1998
8 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
9 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
10 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
11 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
13 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
14 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
15 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
16 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
17 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
18 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
20 ** Everyone is permitted to do anything on this program
21 ** including copying, modifying, improving,
22 ** as long as you don't try to pretend that you wrote it.
23 ** i.e., the above copyright notice has to appear in all copies.
24 ** Binary distribution requires original version messages.
25 ** You don't have to ask before copying, redistribution or publishing.
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
29 /***********************************************************************
30 ** UTF-8
\e$B%5%]!<%H$K$D$$$F
\e(B
31 **
\e$B=>Mh$N
\e(B nkf
\e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9
\e(B
32 ** nkf -e
\e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G
\e(B UTF-8
\e$B$HH=Dj$5$l$l$P!"
\e(B
33 **
\e$B$=$N$^$^
\e(B euc-jp
\e$B$KJQ49$5$l$^$9
\e(B
35 **
\e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#
\e(B
36 ** (
\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O
\e(B)
38 **
\e$B2?$+LdBj$r8+$D$1$?$i!"
\e(B
39 ** E-Mail: furukawa@tcp-ip.or.jp
40 **
\e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#
\e(B
41 ***********************************************************************/
42 /* $Id: nkf.c,v 1.81 2005/11/06 20:17:01 naruse Exp $ */
43 #define NKF_VERSION "2.0.5"
44 #define NKF_RELEASE_DATE "2005-11-07"
48 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2005 Kono, Furukawa, Naruse"
55 ** USAGE: nkf [flags] [file]
58 ** b Output is buffered (DEFAULT)
59 ** u Output is unbuffered
63 ** j Outout code is JIS 7 bit (DEFAULT SELECT)
64 ** s Output code is MS Kanji (DEFAULT SELECT)
65 ** e Output code is AT&T JIS (DEFAULT SELECT)
66 ** w Output code is AT&T JIS (DEFAULT SELECT)
67 ** l Output code is JIS 7bit and ISO8859-1 Latin-1
69 ** m MIME conversion for ISO-2022-JP
70 ** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
71 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
72 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
73 ** M MIME output conversion
75 ** r {de/en}crypt ROT13/47
79 ** T Text mode output (for MS-DOS)
81 ** x Do not convert X0201 kana into X0208
82 ** Z Convert X0208 alphabet to ASCII
87 ** B try to fix broken JIS, missing Escape
88 ** B[1-9] broken level
90 ** O Output to 'nkf.out' file or last file name
91 ** d Delete \r in line feed
92 ** c Add \r in line feed
93 ** -- other long option
94 ** -- ignore following option (don't use with -O )
98 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__)) && !defined(MSDOS)
100 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
116 #if defined(MSDOS) || defined(__OS2__)
123 #define setbinmode(fp) fsetbin(fp)
124 #else /* Microsoft C, Turbo C */
125 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
127 #else /* UNIX,OS/2 */
128 #define setbinmode(fp)
131 #ifdef _IOFBF /* SysV and MSDOS, Windows */
132 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
134 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
137 /*Borland C++ 4.5 EasyWin*/
138 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
147 /* added by satoru@isoternet.org */
148 #include <sys/stat.h>
149 #ifndef MSDOS /* UNIX, OS/2 */
152 #else /* defined(MSDOS) */
154 #ifdef __BORLANDC__ /* BCC32 */
156 #else /* !defined(__BORLANDC__) */
157 #include <sys/utime.h>
158 #endif /* (__BORLANDC__) */
159 #else /* !defined(__WIN32__) */
160 #if defined(_MSC_VER) || defined(__MINGW32__) /* VC++, MinGW */
161 #include <sys/utime.h>
162 #elif defined(__TURBOC__) /* BCC */
164 #elif defined(LSI_C) /* LSI C */
165 #endif /* (__WIN32__) */
177 /* state of output_mode and input_mode
195 /* Input Assumption */
199 #define LATIN1_INPUT 6
201 #define STRICT_MIME 8
206 #define JAPANESE_EUC 10
210 #define UTF8_INPUT 13
211 #define UTF16BE_INPUT 14
212 #define UTF16LE_INPUT 15
232 #define is_alnum(c) \
233 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
235 /* I don't trust portablity of toupper */
236 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
237 #define nkf_isoctal(c) ('0'<=c && c<='7')
238 #define nkf_isdigit(c) ('0'<=c && c<='9')
239 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
240 #define nkf_isblank(c) (c == SPACE || c == TAB)
241 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == NL)
242 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
243 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
244 #define hex2bin(x) ( nkf_isdigit(x) ? x - '0' : nkf_toupper(x) - 'A' + 10)
246 #define HOLD_SIZE 1024
247 #define IOBUF_SIZE 16384
249 #define DEFAULT_J 'B'
250 #define DEFAULT_R 'B'
252 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
253 #define SJ6394 0x0161 /* 63 - 94 ku offset */
255 #define RANGE_NUM_MAX 18
260 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
261 #define sizeof_euc_utf8 94
262 #define sizeof_euc_to_utf8_1byte 94
263 #define sizeof_euc_to_utf8_2bytes 94
264 #define sizeof_utf8_to_euc_C2 64
265 #define sizeof_utf8_to_euc_E5B8 64
266 #define sizeof_utf8_to_euc_2bytes 112
267 #define sizeof_utf8_to_euc_3bytes 112
270 /* MIME preprocessor */
272 #ifdef EASYWIN /*Easy Win */
273 extern POINT _BufferSize;
276 /* function prototype */
278 #ifdef ANSI_C_PROTOTYPE
280 #define STATIC static
294 void (*status_func)PROTO((struct input_code *, int));
295 int (*iconv_func)PROTO((int c2, int c1, int c0));
299 STATIC char *input_codename = "";
302 STATIC const char *CopyRight = COPY_RIGHT;
304 #if !defined(PERL_XS) && !defined(WIN32DLL)
305 STATIC int noconvert PROTO((FILE *f));
307 STATIC int kanji_convert PROTO((FILE *f));
308 STATIC int h_conv PROTO((FILE *f,int c2,int c1));
309 STATIC int push_hold_buf PROTO((int c2));
310 STATIC void set_iconv PROTO((int f, int (*iconv_func)(int c2,int c1,int c0)));
311 STATIC int s_iconv PROTO((int c2,int c1,int c0));
312 STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
313 STATIC int e_iconv PROTO((int c2,int c1,int c0));
314 #ifdef UTF8_INPUT_ENABLE
315 STATIC void encode_fallback_html PROTO((int c));
316 STATIC void encode_fallback_xml PROTO((int c));
317 STATIC void encode_fallback_java PROTO((int c));
318 STATIC void encode_fallback_perl PROTO((int c));
319 STATIC void encode_fallback_subchar PROTO((int c));
320 STATIC void (*encode_fallback)PROTO((int c)) = NULL;
321 STATIC int w2e_conv PROTO((int c2,int c1,int c0,int *p2,int *p1));
322 STATIC int w_iconv PROTO((int c2,int c1,int c0));
323 STATIC int w_iconv16 PROTO((int c2,int c1,int c0));
324 STATIC int w_iconv_common PROTO((int c1,int c0,const unsigned short *const *pp,int psize,int *p2,int *p1));
325 STATIC int ww16_conv PROTO((int c2, int c1, int c0));
326 STATIC int w16e_conv PROTO((unsigned short val,int *p2,int *p1));
328 #ifdef UTF8_OUTPUT_ENABLE
329 STATIC int e2w_conv PROTO((int c2,int c1));
330 STATIC void w_oconv PROTO((int c2,int c1));
331 STATIC void w_oconv16 PROTO((int c2,int c1));
333 STATIC void e_oconv PROTO((int c2,int c1));
334 STATIC int e2s_conv PROTO((int c2, int c1, int *p2, int *p1));
335 STATIC void s_oconv PROTO((int c2,int c1));
336 STATIC void j_oconv PROTO((int c2,int c1));
337 STATIC void fold_conv PROTO((int c2,int c1));
338 STATIC void cr_conv PROTO((int c2,int c1));
339 STATIC void z_conv PROTO((int c2,int c1));
340 STATIC void rot_conv PROTO((int c2,int c1));
341 STATIC void hira_conv PROTO((int c2,int c1));
342 STATIC void base64_conv PROTO((int c2,int c1));
343 STATIC void iso2022jp_check_conv PROTO((int c2,int c1));
344 STATIC void no_connection PROTO((int c2,int c1));
345 STATIC int no_connection2 PROTO((int c2,int c1,int c0));
347 STATIC void code_score PROTO((struct input_code *ptr));
348 STATIC void code_status PROTO((int c));
350 STATIC void std_putc PROTO((int c));
351 STATIC int std_getc PROTO((FILE *f));
352 STATIC int std_ungetc PROTO((int c,FILE *f));
354 STATIC int broken_getc PROTO((FILE *f));
355 STATIC int broken_ungetc PROTO((int c,FILE *f));
357 STATIC int mime_begin PROTO((FILE *f));
358 STATIC int mime_getc PROTO((FILE *f));
359 STATIC int mime_ungetc PROTO((int c,FILE *f));
361 STATIC int mime_begin_strict PROTO((FILE *f));
362 STATIC int mime_getc_buf PROTO((FILE *f));
363 STATIC int mime_ungetc_buf PROTO((int c,FILE *f));
364 STATIC int mime_integrity PROTO((FILE *f,const unsigned char *p));
366 STATIC int base64decode PROTO((int c));
367 STATIC void mime_prechar PROTO((int c2, int c1));
368 STATIC void mime_putc PROTO((int c));
369 STATIC void open_mime PROTO((int c));
370 STATIC void close_mime PROTO(());
372 STATIC void usage PROTO(());
373 STATIC void version PROTO(());
375 STATIC void options PROTO((unsigned char *c));
376 #if defined(PERL_XS) || defined(WIN32DLL)
377 STATIC void reinit PROTO(());
382 #if !defined(PERL_XS) && !defined(WIN32DLL)
383 STATIC unsigned char stdibuf[IOBUF_SIZE];
384 STATIC unsigned char stdobuf[IOBUF_SIZE];
386 STATIC unsigned char hold_buf[HOLD_SIZE*2];
387 STATIC int hold_count;
389 /* MIME preprocessor fifo */
391 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
392 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
393 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
394 STATIC unsigned char mime_buf[MIME_BUF_SIZE];
395 STATIC unsigned int mime_top = 0;
396 STATIC unsigned int mime_last = 0; /* decoded */
397 STATIC unsigned int mime_input = 0; /* undecoded */
398 STATIC int (*mime_iconv_back)PROTO((int c2,int c1,int c0)) = NULL;
401 STATIC int unbuf_f = FALSE;
402 STATIC int estab_f = FALSE;
403 STATIC int nop_f = FALSE;
404 STATIC int binmode_f = TRUE; /* binary mode */
405 STATIC int rot_f = FALSE; /* rot14/43 mode */
406 STATIC int hira_f = FALSE; /* hira/kata henkan */
407 STATIC int input_f = FALSE; /* non fixed input code */
408 STATIC int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
409 STATIC int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
410 STATIC int mime_decode_f = FALSE; /* mime decode is explicitly on */
411 STATIC int mimebuf_f = FALSE; /* MIME buffered input */
412 STATIC int broken_f = FALSE; /* convert ESC-less broken JIS */
413 STATIC int iso8859_f = FALSE; /* ISO8859 through */
414 STATIC int mimeout_f = FALSE; /* base64 mode */
415 #if defined(MSDOS) || defined(__OS2__)
416 STATIC int x0201_f = TRUE; /* Assume JISX0201 kana */
418 STATIC int x0201_f = NO_X0201; /* Assume NO JISX0201 */
420 STATIC int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
421 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
422 STATIC int internal_unicode_f = FALSE; /* Internal Unicode Processing */
424 #ifdef UTF8_OUTPUT_ENABLE
425 STATIC int unicode_bom_f= 0; /* Output Unicode BOM */
426 STATIC int w_oconv16_LE = 0; /* utf-16 little endian */
427 STATIC int ms_ucs_map_f = FALSE; /* Microsoft UCS Mapping Compatible */
428 STATIC int unicode_subchar = '?'; /* the regular substitution character */
431 #ifdef UNICODE_NORMALIZATION
432 STATIC int nfc_f = FALSE;
433 STATIC int (*i_nfc_getc)PROTO((FILE *)) = std_getc; /* input of ugetc */
434 STATIC int (*i_nfc_ungetc)PROTO((int c ,FILE *f)) = std_ungetc;
435 STATIC int nfc_getc PROTO((FILE *f));
436 STATIC int nfc_ungetc PROTO((int c,FILE *f));
440 STATIC int cap_f = FALSE;
441 STATIC int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */
442 STATIC int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc;
443 STATIC int cap_getc PROTO((FILE *f));
444 STATIC int cap_ungetc PROTO((int c,FILE *f));
446 STATIC int url_f = FALSE;
447 STATIC int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
448 STATIC int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc;
449 STATIC int url_getc PROTO((FILE *f));
450 STATIC int url_ungetc PROTO((int c,FILE *f));
453 #ifdef NUMCHAR_OPTION
454 #define CLASS_MASK 0x0f000000
455 #define CLASS_UTF16 0x01000000
456 STATIC int numchar_f = FALSE;
457 STATIC int (*i_ngetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
458 STATIC int (*i_nungetc)PROTO((int c ,FILE *f)) = std_ungetc;
459 STATIC int numchar_getc PROTO((FILE *f));
460 STATIC int numchar_ungetc PROTO((int c,FILE *f));
464 STATIC int noout_f = FALSE;
465 STATIC void no_putc PROTO((int c));
466 STATIC int debug_f = FALSE;
467 STATIC void debug PROTO((const char *str));
468 STATIC int (*iconv_for_check)() = 0;
471 STATIC int guess_f = FALSE;
473 STATIC void print_guessed_code PROTO((char *filename));
475 STATIC void set_input_codename PROTO((char *codename));
476 STATIC int is_inputcode_mixed = FALSE;
477 STATIC int is_inputcode_set = FALSE;
480 STATIC int exec_f = 0;
483 #ifdef SHIFTJIS_CP932
484 /* invert IBM extended characters to others and controls some UCS mapping */
485 STATIC int cp51932_f = TRUE;
486 #define CP932_TABLE_BEGIN (0xfa)
487 #define CP932_TABLE_END (0xfc)
489 /* invert NEC-selected IBM extended characters to IBM extended characters */
490 STATIC int cp932inv_f = TRUE;
491 #define CP932INV_TABLE_BEGIN (0xed)
492 #define CP932INV_TABLE_END (0xee)
494 /* STATIC int cp932_conv PROTO((int c2, int c1)); */
495 #endif /* SHIFTJIS_CP932 */
498 STATIC int x0212_f = FALSE;
499 STATIC int x0212_shift PROTO((int c));
500 STATIC int x0212_unshift PROTO((int c));
503 STATIC unsigned char prefix_table[256];
505 STATIC void e_status PROTO((struct input_code *, int));
506 STATIC void s_status PROTO((struct input_code *, int));
508 #ifdef UTF8_INPUT_ENABLE
509 STATIC void w_status PROTO((struct input_code *, int));
510 STATIC void w16_status PROTO((struct input_code *, int));
511 STATIC int utf16_mode = UTF16BE_INPUT;
514 struct input_code input_code_list[] = {
515 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
516 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
517 #ifdef UTF8_INPUT_ENABLE
518 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
519 {"UTF-16", 0, 0, 0, {0, 0, 0}, w16_status, w_iconv16, 0},
524 STATIC int mimeout_mode = 0;
525 STATIC int base64_count = 0;
527 /* X0208 -> ASCII converter */
530 STATIC int f_line = 0; /* chars in line */
531 STATIC int f_prev = 0;
532 STATIC int fold_preserve_f = FALSE; /* preserve new lines */
533 STATIC int fold_f = FALSE;
534 STATIC int fold_len = 0;
537 STATIC unsigned char kanji_intro = DEFAULT_J;
538 STATIC unsigned char ascii_intro = DEFAULT_R;
542 #define FOLD_MARGIN 10
543 #define DEFAULT_FOLD 60
545 STATIC int fold_margin = FOLD_MARGIN;
549 #ifdef DEFAULT_CODE_JIS
550 # define DEFAULT_CONV j_oconv
552 #ifdef DEFAULT_CODE_SJIS
553 # define DEFAULT_CONV s_oconv
555 #ifdef DEFAULT_CODE_EUC
556 # define DEFAULT_CONV e_oconv
558 #ifdef DEFAULT_CODE_UTF8
559 # define DEFAULT_CONV w_oconv
562 /* process default */
563 STATIC void (*output_conv)PROTO((int c2,int c1)) = DEFAULT_CONV;
565 STATIC void (*oconv)PROTO((int c2,int c1)) = no_connection;
566 /* s_iconv or oconv */
567 STATIC int (*iconv)PROTO((int c2,int c1,int c0)) = no_connection2;
569 STATIC void (*o_zconv)PROTO((int c2,int c1)) = no_connection;
570 STATIC void (*o_fconv)PROTO((int c2,int c1)) = no_connection;
571 STATIC void (*o_crconv)PROTO((int c2,int c1)) = no_connection;
572 STATIC void (*o_rot_conv)PROTO((int c2,int c1)) = no_connection;
573 STATIC void (*o_hira_conv)PROTO((int c2,int c1)) = no_connection;
574 STATIC void (*o_base64conv)PROTO((int c2,int c1)) = no_connection;
575 STATIC void (*o_iso2022jp_check_conv)PROTO((int c2,int c1)) = no_connection;
577 /* STATIC redirections */
579 STATIC void (*o_putc)PROTO((int c)) = std_putc;
581 STATIC int (*i_getc)PROTO((FILE *f)) = std_getc; /* general input */
582 STATIC int (*i_ungetc)PROTO((int c,FILE *f)) =std_ungetc;
584 STATIC int (*i_bgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
585 STATIC int (*i_bungetc)PROTO((int c ,FILE *f)) = std_ungetc;
587 STATIC void (*o_mputc)PROTO((int c)) = std_putc ; /* output of mputc */
589 STATIC int (*i_mgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
590 STATIC int (*i_mungetc)PROTO((int c ,FILE *f)) = std_ungetc;
592 /* for strict mime */
593 STATIC int (*i_mgetc_buf)PROTO((FILE *)) = std_getc; /* input of mgetc_buf */
594 STATIC int (*i_mungetc_buf)PROTO((int c,FILE *f)) = std_ungetc;
597 STATIC int output_mode = ASCII, /* output kanji mode */
598 input_mode = ASCII, /* input kanji mode */
599 shift_mode = FALSE; /* TRUE shift out, or X0201 */
600 STATIC int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
602 /* X0201 / X0208 conversion tables */
604 /* X0201 kana conversion table */
607 unsigned char cv[]= {
608 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
609 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
610 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
611 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
612 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
613 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
614 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
615 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
616 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
617 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
618 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
619 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
620 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
621 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
622 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
623 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
627 /* X0201 kana conversion table for daguten */
630 unsigned char dv[]= {
631 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
632 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
633 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
634 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
635 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
636 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
637 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
638 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
639 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
640 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
641 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
642 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
643 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
644 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
645 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
646 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
649 /* X0201 kana conversion table for han-daguten */
652 unsigned char ev[]= {
653 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
654 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
655 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
656 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
657 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
658 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
659 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
660 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
661 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
662 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
663 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
664 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
665 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
666 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
667 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
668 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
672 /* X0208 kigou conversion table */
673 /* 0x8140 - 0x819e */
675 unsigned char fv[] = {
677 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
678 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
679 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
680 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
681 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
682 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
683 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
684 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
685 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
686 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
687 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
688 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
694 STATIC int file_out = FALSE;
696 STATIC int overwrite = FALSE;
699 STATIC int crmode_f = 0; /* CR, NL, CRLF */
700 #ifdef EASYWIN /*Easy Win */
701 STATIC int end_check;
704 #define STD_GC_BUFSIZE (256)
705 int std_gc_buf[STD_GC_BUFSIZE];
709 #include "nkf32dll.c"
710 #elif defined(PERL_XS)
720 char *outfname = NULL;
723 #ifdef EASYWIN /*Easy Win */
724 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
727 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
728 cp = (unsigned char *)*argv;
733 if (pipe(fds) < 0 || (pid = fork()) < 0){
744 execvp(argv[1], &argv[1]);
758 if(x0201_f == WISH_TRUE)
759 x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
761 if (binmode_f == TRUE)
763 if (freopen("","wb",stdout) == NULL)
770 setbuf(stdout, (char *) NULL);
772 setvbuffer(stdout, stdobuf, IOBUF_SIZE);
775 if (binmode_f == TRUE)
777 if (freopen("","rb",stdin) == NULL) return (-1);
781 setvbuffer(stdin, stdibuf, IOBUF_SIZE);
785 kanji_convert(stdin);
786 if (guess_f) print_guessed_code(NULL);
791 is_inputcode_mixed = FALSE;
792 is_inputcode_set = FALSE;
797 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
806 /* reopen file for stdout */
807 if (file_out == TRUE) {
810 outfname = malloc(strlen(origfname)
811 + strlen(".nkftmpXXXXXX")
817 strcpy(outfname, origfname);
821 for (i = strlen(outfname); i; --i){
822 if (outfname[i - 1] == '/'
823 || outfname[i - 1] == '\\'){
829 strcat(outfname, "ntXXXXXX");
831 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC,
834 strcat(outfname, ".nkftmpXXXXXX");
835 fd = mkstemp(outfname);
838 || (fd_backup = dup(fileno(stdout))) < 0
839 || dup2(fd, fileno(stdout)) < 0
850 outfname = "nkf.out";
853 if(freopen(outfname, "w", stdout) == NULL) {
857 if (binmode_f == TRUE) {
859 if (freopen("","wb",stdout) == NULL)
866 if (binmode_f == TRUE)
868 if (freopen("","rb",fin) == NULL)
873 setvbuffer(fin, stdibuf, IOBUF_SIZE);
877 char *filename = NULL;
879 if (nfiles > 1) filename = origfname;
880 if (guess_f) print_guessed_code(filename);
886 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
894 if (dup2(fd_backup, fileno(stdout)) < 0){
897 if (stat(origfname, &sb)) {
898 fprintf(stderr, "Can't stat %s\n", origfname);
900 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
901 if (chmod(outfname, sb.st_mode)) {
902 fprintf(stderr, "Can't set permission %s\n", outfname);
905 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
906 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
907 tb[0] = tb[1] = sb.st_mtime;
908 if (utime(outfname, tb)) {
909 fprintf(stderr, "Can't set timestamp %s\n", outfname);
912 tb.actime = sb.st_atime;
913 tb.modtime = sb.st_mtime;
914 if (utime(outfname, &tb)) {
915 fprintf(stderr, "Can't set timestamp %s\n", outfname);
919 if (unlink(origfname)){
923 if (rename(outfname, origfname)) {
925 fprintf(stderr, "Can't rename %s to %s\n",
926 outfname, origfname);
934 #ifdef EASYWIN /*Easy Win */
935 if (file_out == FALSE)
936 scanf("%d",&end_check);
939 #else /* for Other OS */
940 if (file_out == TRUE)
945 #endif /* WIN32DLL */
972 {"katakana-hiragana","h3"},
979 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
980 {"internal-unicode", ""},
982 #ifdef UTF8_OUTPUT_ENABLE
994 #ifdef UTF8_INPUT_ENABLE
996 {"utf16-input", "W16"},
998 #ifdef UNICODE_NORMALIZATION
999 {"utf8mac-input", ""},
1008 #ifdef NUMCHAR_OPTION
1009 {"numchar-input", ""},
1015 #ifdef SHIFTJIS_CP932
1025 STATIC int option_mode = 0;
1032 unsigned char *p = NULL;
1033 unsigned char *cp_back = NULL;
1034 unsigned char codeset[32];
1038 while(*cp && *cp++!='-');
1039 while (*cp || cp_back) {
1047 case '-': /* literal options */
1048 if (!*cp || *cp == SPACE) { /* ignore the rest of arguments */
1052 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1053 p = (unsigned char *)long_option[i].name;
1054 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1055 if (*p == cp[j] || cp[j] == ' '){
1062 while(*cp && *cp != SPACE && cp++);
1063 if (long_option[i].alias[0]){
1065 cp = (unsigned char *)long_option[i].alias;
1067 if (strcmp(long_option[i].name, "ic=") == 0){
1068 for (i=0; i < 16 && SPACE < p[i] && p[i] < DEL; i++){
1069 codeset[i] = nkf_toupper(p[i]);
1072 if(strcmp(codeset, "ISO-2022-JP") == 0){
1073 input_f = JIS_INPUT;
1074 }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1075 input_f = SJIS_INPUT;
1076 if (x0201_f==NO_X0201) x0201_f=TRUE;
1077 }else if(strcmp(codeset, "CP932") == 0){
1078 input_f = SJIS_INPUT;
1080 #ifdef SHIFTJIS_CP932
1084 #ifdef UTF8_OUTPUT_ENABLE
1085 ms_ucs_map_f = TRUE;
1087 }else if(strcmp(codeset, "EUCJP") == 0 ||
1088 strcmp(codeset, "EUC-JP") == 0 ||
1089 strcmp(codeset, "CP51932") == 0){
1090 input_f = JIS_INPUT;
1092 #ifdef SHIFTJIS_CP932
1096 #ifdef UTF8_OUTPUT_ENABLE
1097 ms_ucs_map_f = TRUE;
1099 }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1100 strcmp(codeset, "EUCJP-MS") == 0){
1101 input_f = JIS_INPUT;
1103 #ifdef SHIFTJIS_CP932
1107 #ifdef UTF8_OUTPUT_ENABLE
1108 ms_ucs_map_f = TRUE;
1110 #ifdef UTF8_INPUT_ENABLE
1111 }else if(strcmp(codeset, "UTF-8") == 0 ||
1112 strcmp(codeset, "UTF-8N") == 0 ||
1113 strcmp(codeset, "UTF-8-BOM") == 0){
1114 input_f = UTF8_INPUT;
1115 #ifdef UNICODE_NORMALIZATION
1116 }else if(strcmp(codeset, "UTF8-MAC") == 0){
1117 input_f = UTF8_INPUT;
1120 }else if(strcmp(codeset, "UTF-16") == 0){
1121 input_f = UTF16BE_INPUT;
1122 utf16_mode = UTF16BE_INPUT;
1123 }else if(strcmp(codeset, "UTF-16BE") == 0 ||
1124 strcmp(codeset, "UTF-16BE-BOM") == 0){
1125 input_f = UTF16BE_INPUT;
1126 utf16_mode = UTF16BE_INPUT;
1127 }else if(strcmp(codeset, "UTF-16LE") == 0 ||
1128 strcmp(codeset, "UTF-16LE-BOM") == 0){
1129 input_f = UTF16LE_INPUT;
1130 utf16_mode = UTF16LE_INPUT;
1135 if (strcmp(long_option[i].name, "oc=") == 0){
1136 for (i=0; i < 16 && SPACE < p[i] && p[i] < DEL; i++){
1137 codeset[i] = nkf_toupper(p[i]);
1140 if(strcmp(codeset, "ISO-2022-JP") == 0){
1141 output_conv = j_oconv;
1142 }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1143 output_conv = s_oconv;
1144 }else if(strcmp(codeset, "CP932") == 0){
1145 output_conv = s_oconv;
1147 #ifdef SHIFTJIS_CP932
1151 #ifdef UTF8_OUTPUT_ENABLE
1152 ms_ucs_map_f = TRUE;
1154 }else if(strcmp(codeset, "EUCJP") == 0 ||
1155 strcmp(codeset, "EUC-JP") == 0 ||
1156 strcmp(codeset, "CP51932") == 0){
1157 output_conv = e_oconv;
1159 #ifdef SHIFTJIS_CP932
1163 #ifdef UTF8_OUTPUT_ENABLE
1164 ms_ucs_map_f = TRUE;
1166 }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1167 strcmp(codeset, "EUCJP-MS") == 0){
1168 output_conv = e_oconv;
1171 #ifdef SHIFTJIS_CP932
1174 #ifdef UTF8_OUTPUT_ENABLE
1175 ms_ucs_map_f = TRUE;
1177 #ifdef UTF8_OUTPUT_ENABLE
1178 }else if(strcmp(codeset, "UTF-8") == 0){
1179 output_conv = w_oconv;
1180 }else if(strcmp(codeset, "UTF-8N") == 0){
1181 output_conv = w_oconv;
1183 }else if(strcmp(codeset, "UTF-8-BOM") == 0){
1184 output_conv = w_oconv;
1186 }else if(strcmp(codeset, "UTF-16") == 0){
1187 output_conv = w_oconv16;
1188 }else if(strcmp(codeset, "UTF-16BE") == 0){
1189 output_conv = w_oconv16;
1191 }else if(strcmp(codeset, "UTF-16BE-BOM") == 0){
1192 output_conv = w_oconv16;
1194 }else if(strcmp(codeset, "UTF-16LE") == 0){
1195 output_conv = w_oconv16;
1198 }else if(strcmp(codeset, "UTF-16LE-BOM") == 0){
1199 output_conv = w_oconv16;
1207 if (strcmp(long_option[i].name, "overwrite") == 0){
1214 if (strcmp(long_option[i].name, "cap-input") == 0){
1218 if (strcmp(long_option[i].name, "url-input") == 0){
1223 #ifdef NUMCHAR_OPTION
1224 if (strcmp(long_option[i].name, "numchar-input") == 0){
1230 if (strcmp(long_option[i].name, "no-output") == 0){
1234 if (strcmp(long_option[i].name, "debug") == 0){
1239 if (strcmp(long_option[i].name, "cp932") == 0){
1240 #ifdef SHIFTJIS_CP932
1244 #ifdef UTF8_OUTPUT_ENABLE
1245 ms_ucs_map_f = TRUE;
1249 if (strcmp(long_option[i].name, "no-cp932") == 0){
1250 #ifdef SHIFTJIS_CP932
1254 #ifdef UTF8_OUTPUT_ENABLE
1255 ms_ucs_map_f = FALSE;
1259 #ifdef SHIFTJIS_CP932
1260 if (strcmp(long_option[i].name, "cp932inv") == 0){
1267 if (strcmp(long_option[i].name, "x0212") == 0){
1274 if (strcmp(long_option[i].name, "exec-in") == 0){
1278 if (strcmp(long_option[i].name, "exec-out") == 0){
1283 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1284 if (strcmp(long_option[i].name, "internal-unicode") == 0){
1285 internal_unicode_f = TRUE;
1288 if (strcmp(long_option[i].name, "fb-skip") == 0){
1289 encode_fallback = NULL;
1292 if (strcmp(long_option[i].name, "fb-html") == 0){
1293 encode_fallback = encode_fallback_html;
1296 if (strcmp(long_option[i].name, "fb-xml" ) == 0){
1297 encode_fallback = encode_fallback_xml;
1300 if (strcmp(long_option[i].name, "fb-java") == 0){
1301 encode_fallback = encode_fallback_java;
1304 if (strcmp(long_option[i].name, "fb-perl") == 0){
1305 encode_fallback = encode_fallback_perl;
1308 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1309 encode_fallback = encode_fallback_subchar;
1312 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1313 encode_fallback = encode_fallback_subchar;
1314 unicode_subchar = 0;
1316 /* decimal number */
1317 for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1318 unicode_subchar *= 10;
1319 unicode_subchar += hex2bin(p[i]);
1321 }else if(p[1] == 'x' || p[1] == 'X'){
1322 /* hexadecimal number */
1323 for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1324 unicode_subchar <<= 4;
1325 unicode_subchar |= hex2bin(p[i]);
1329 for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1330 unicode_subchar *= 8;
1331 unicode_subchar += hex2bin(p[i]);
1334 w16e_conv(unicode_subchar, &i, &j);
1335 unicode_subchar = i<<8 | j;
1339 #ifdef UTF8_OUTPUT_ENABLE
1340 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1341 ms_ucs_map_f = TRUE;
1345 #ifdef UNICODE_NORMALIZATION
1346 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1347 input_f = UTF8_INPUT;
1352 if (strcmp(long_option[i].name, "prefix=") == 0){
1353 if (' ' < p[0] && p[0] < 128){
1354 for (i = 1; ' ' < p[i] && p[i] < 128; i++){
1355 prefix_table[p[i]] = p[0];
1362 case 'b': /* buffered mode */
1365 case 'u': /* non bufferd mode */
1368 case 't': /* transparent mode */
1371 case 'j': /* JIS output */
1373 output_conv = j_oconv;
1375 case 'e': /* AT&T EUC output */
1376 output_conv = e_oconv;
1378 case 's': /* SJIS output */
1379 output_conv = s_oconv;
1381 case 'l': /* ISO8859 Latin-1 support, no conversion */
1382 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
1383 input_f = LATIN1_INPUT;
1385 case 'i': /* Kanji IN ESC-$-@/B */
1386 if (*cp=='@'||*cp=='B')
1387 kanji_intro = *cp++;
1389 case 'o': /* ASCII IN ESC-(-J/B */
1390 if (*cp=='J'||*cp=='B'||*cp=='H')
1391 ascii_intro = *cp++;
1395 bit:1 katakana->hiragana
1396 bit:2 hiragana->katakana
1398 if ('9'>= *cp && *cp>='0')
1399 hira_f |= (*cp++ -'0');
1406 #if defined(MSDOS) || defined(__OS2__)
1421 #ifdef UTF8_OUTPUT_ENABLE
1422 case 'w': /* UTF-8 output */
1423 if ('1'== cp[0] && '6'==cp[1]) {
1424 output_conv = w_oconv16; cp+=2;
1426 unicode_bom_f=2; cp++;
1429 unicode_bom_f=1; cp++;
1431 } else if (cp[0] == 'B') {
1432 unicode_bom_f=2; cp++;
1434 unicode_bom_f=1; cp++;
1437 } else if (cp[0] == '8') {
1438 output_conv = w_oconv; cp++;
1441 unicode_bom_f=1; cp++;
1444 output_conv = w_oconv;
1447 #ifdef UTF8_INPUT_ENABLE
1448 case 'W': /* UTF-8 input */
1449 if ('1'== cp[0] && '6'==cp[1]) {
1450 input_f = UTF16BE_INPUT;
1451 utf16_mode = UTF16BE_INPUT;
1455 input_f = UTF16LE_INPUT;
1456 utf16_mode = UTF16LE_INPUT;
1457 } else if (cp[0] == 'B') {
1459 input_f = UTF16BE_INPUT;
1460 utf16_mode = UTF16BE_INPUT;
1462 } else if (cp[0] == '8') {
1464 input_f = UTF8_INPUT;
1466 input_f = UTF8_INPUT;
1469 /* Input code assumption */
1470 case 'J': /* JIS input */
1471 case 'E': /* AT&T EUC input */
1472 input_f = JIS_INPUT;
1474 case 'S': /* MS Kanji input */
1475 input_f = SJIS_INPUT;
1476 if (x0201_f==NO_X0201) x0201_f=TRUE;
1478 case 'Z': /* Convert X0208 alphabet to asii */
1479 /* bit:0 Convert X0208
1480 bit:1 Convert Kankaku to one space
1481 bit:2 Convert Kankaku to two spaces
1482 bit:3 Convert HTML Entity
1484 if ('9'>= *cp && *cp>='0')
1485 alpha_f |= 1<<(*cp++ -'0');
1489 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
1490 x0201_f = FALSE; /* No X0201->X0208 conversion */
1492 ESC-(-I in JIS, EUC, MS Kanji
1493 SI/SO in JIS, EUC, MS Kanji
1494 SSO in EUC, JIS, not in MS Kanji
1495 MS Kanji (0xa0-0xdf)
1497 ESC-(-I in JIS (0x20-0x5f)
1498 SSO in EUC (0xa0-0xdf)
1499 0xa0-0xd in MS Kanji (0xa0-0xdf)
1502 case 'X': /* Assume X0201 kana */
1503 /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1506 case 'F': /* prserve new lines */
1507 fold_preserve_f = TRUE;
1508 case 'f': /* folding -f60 or -f */
1511 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1513 fold_len += *cp++ - '0';
1515 if (!(0<fold_len && fold_len<BUFSIZ))
1516 fold_len = DEFAULT_FOLD;
1520 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1522 fold_margin += *cp++ - '0';
1526 case 'm': /* MIME support */
1527 /* mime_decode_f = TRUE; */ /* this has too large side effects... */
1528 if (*cp=='B'||*cp=='Q') {
1529 mime_decode_mode = *cp++;
1530 mimebuf_f = FIXED_MIME;
1531 } else if (*cp=='N') {
1532 mime_f = TRUE; cp++;
1533 } else if (*cp=='S') {
1534 mime_f = STRICT_MIME; cp++;
1535 } else if (*cp=='0') {
1536 mime_decode_f = FALSE;
1537 mime_f = FALSE; cp++;
1540 case 'M': /* MIME output */
1543 mimeout_f = FIXED_MIME; cp++;
1544 } else if (*cp=='Q') {
1546 mimeout_f = FIXED_MIME; cp++;
1551 case 'B': /* Broken JIS support */
1553 bit:1 allow any x on ESC-(-x or ESC-$-x
1554 bit:2 reset to ascii on NL
1556 if ('9'>= *cp && *cp>='0')
1557 broken_f |= 1<<(*cp++ -'0');
1562 case 'O':/* for Output file */
1566 case 'c':/* add cr code */
1569 case 'd':/* delete cr code */
1572 case 'I': /* ISO-2022-JP output */
1575 case 'L': /* line mode */
1576 if (*cp=='u') { /* unix */
1577 crmode_f = NL; cp++;
1578 } else if (*cp=='m') { /* mac */
1579 crmode_f = CR; cp++;
1580 } else if (*cp=='w') { /* windows */
1581 crmode_f = CRLF; cp++;
1582 } else if (*cp=='0') { /* no conversion */
1592 /* module muliple options in a string are allowed for Perl moudle */
1593 while(*cp && *cp++!='-');
1596 /* bogus option but ignored */
1602 #ifdef ANSI_C_PROTOTYPE
1603 struct input_code * find_inputcode_byfunc(int (*iconv_func)(int c2,int c1,int c0))
1605 struct input_code * find_inputcode_byfunc(iconv_func)
1606 int (*iconv_func)();
1610 struct input_code *p = input_code_list;
1612 if (iconv_func == p->iconv_func){
1621 #ifdef ANSI_C_PROTOTYPE
1622 void set_iconv(int f, int (*iconv_func)(int c2,int c1,int c0))
1624 void set_iconv(f, iconv_func)
1626 int (*iconv_func)();
1629 #ifdef INPUT_CODE_FIX
1637 #ifdef INPUT_CODE_FIX
1638 && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1644 if (estab_f && iconv_for_check != iconv){
1645 struct input_code *p = find_inputcode_byfunc(iconv);
1647 set_input_codename(p->name);
1648 debug(input_codename);
1650 iconv_for_check = iconv;
1655 #define SCORE_L2 (1) /*
\e$BBh
\e(B2
\e$B?e=`4A;z
\e(B */
1656 #define SCORE_KANA (SCORE_L2 << 1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
1657 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
1658 #ifdef SHIFTJIS_CP932
1659 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932
\e$B$K$h$kFI$_49$(
\e(B */
1660 #define SCORE_NO_EXIST (SCORE_CP932 << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1662 #define SCORE_NO_EXIST (SCORE_DEPEND << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1664 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME
\e$B$K$h$k;XDj
\e(B */
1665 #define SCORE_ERROR (SCORE_iMIME << 1) /*
\e$B%(%i!<
\e(B */
1667 #define SCORE_INIT (SCORE_iMIME)
1669 const int score_table_A0[] = {
1672 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1673 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1676 const int score_table_F0[] = {
1677 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1678 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1679 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1680 SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1683 void set_code_score(ptr, score)
1684 struct input_code *ptr;
1688 ptr->score |= score;
1692 void clr_code_score(ptr, score)
1693 struct input_code *ptr;
1697 ptr->score &= ~score;
1701 void code_score(ptr)
1702 struct input_code *ptr;
1704 int c2 = ptr->buf[0];
1705 #ifdef UTF8_OUTPUT_ENABLE
1706 int c1 = ptr->buf[1];
1709 set_code_score(ptr, SCORE_ERROR);
1710 }else if (c2 == SSO){
1711 set_code_score(ptr, SCORE_KANA);
1712 #ifdef UTF8_OUTPUT_ENABLE
1713 }else if (!e2w_conv(c2, c1)){
1714 set_code_score(ptr, SCORE_NO_EXIST);
1716 }else if ((c2 & 0x70) == 0x20){
1717 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1718 }else if ((c2 & 0x70) == 0x70){
1719 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1720 }else if ((c2 & 0x70) >= 0x50){
1721 set_code_score(ptr, SCORE_L2);
1725 void status_disable(ptr)
1726 struct input_code *ptr;
1731 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1734 void status_push_ch(ptr, c)
1735 struct input_code *ptr;
1738 ptr->buf[ptr->index++] = c;
1741 void status_clear(ptr)
1742 struct input_code *ptr;
1748 void status_reset(ptr)
1749 struct input_code *ptr;
1752 ptr->score = SCORE_INIT;
1755 void status_reinit(ptr)
1756 struct input_code *ptr;
1759 ptr->_file_stat = 0;
1762 void status_check(ptr, c)
1763 struct input_code *ptr;
1766 if (c <= DEL && estab_f){
1771 void s_status(ptr, c)
1772 struct input_code *ptr;
1777 status_check(ptr, c);
1782 #ifdef NUMCHAR_OPTION
1783 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1786 }else if (0xa1 <= c && c <= 0xdf){
1787 status_push_ch(ptr, SSO);
1788 status_push_ch(ptr, c);
1791 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
1793 status_push_ch(ptr, c);
1794 #ifdef SHIFTJIS_CP932
1796 && CP932_TABLE_BEGIN <= c && c <= CP932_TABLE_END){
1798 status_push_ch(ptr, c);
1799 #endif /* SHIFTJIS_CP932 */
1801 }else if (x0212_f && 0xf0 <= c && c <= 0xfc){
1803 status_push_ch(ptr, c);
1804 #endif /* X0212_ENABLE */
1806 status_disable(ptr);
1810 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1811 status_push_ch(ptr, c);
1812 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
1816 status_disable(ptr);
1820 #ifdef SHIFTJIS_CP932
1821 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1822 status_push_ch(ptr, c);
1823 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
1824 set_code_score(ptr, SCORE_CP932);
1829 #endif /* SHIFTJIS_CP932 */
1830 #ifndef X0212_ENABLE
1831 status_disable(ptr);
1837 void e_status(ptr, c)
1838 struct input_code *ptr;
1843 status_check(ptr, c);
1848 #ifdef NUMCHAR_OPTION
1849 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1852 }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
1854 status_push_ch(ptr, c);
1856 }else if (0x8f == c){
1858 status_push_ch(ptr, c);
1859 #endif /* X0212_ENABLE */
1861 status_disable(ptr);
1865 if (0xa1 <= c && c <= 0xfe){
1866 status_push_ch(ptr, c);
1870 status_disable(ptr);
1875 if (0xa1 <= c && c <= 0xfe){
1877 status_push_ch(ptr, c);
1879 status_disable(ptr);
1881 #endif /* X0212_ENABLE */
1885 #ifdef UTF8_INPUT_ENABLE
1886 void w16_status(ptr, c)
1887 struct input_code *ptr;
1894 if (ptr->_file_stat == 0){
1895 if (c == 0xfe || c == 0xff){
1897 status_push_ch(ptr, c);
1898 ptr->_file_stat = 1;
1900 status_disable(ptr);
1901 ptr->_file_stat = -1;
1903 }else if (ptr->_file_stat > 0){
1905 status_push_ch(ptr, c);
1906 }else if (ptr->_file_stat < 0){
1907 status_disable(ptr);
1913 status_disable(ptr);
1914 ptr->_file_stat = -1;
1916 status_push_ch(ptr, c);
1923 if (ptr->stat != c && (c == 0xfe || c == 0xff)){
1924 status_push_ch(ptr, c);
1927 status_disable(ptr);
1928 ptr->_file_stat = -1;
1934 void w_status(ptr, c)
1935 struct input_code *ptr;
1940 status_check(ptr, c);
1945 #ifdef NUMCHAR_OPTION
1946 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1949 }else if (0xc0 <= c && c <= 0xdf){
1951 status_push_ch(ptr, c);
1952 }else if (0xe0 <= c && c <= 0xef){
1954 status_push_ch(ptr, c);
1956 status_disable(ptr);
1961 if (0x80 <= c && c <= 0xbf){
1962 status_push_ch(ptr, c);
1963 if (ptr->index > ptr->stat){
1964 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
1965 && ptr->buf[2] == 0xbf);
1966 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
1967 &ptr->buf[0], &ptr->buf[1]);
1974 status_disable(ptr);
1985 int action_flag = 1;
1986 struct input_code *result = 0;
1987 struct input_code *p = input_code_list;
1989 (p->status_func)(p, c);
1992 }else if(p->stat == 0){
2003 if (result && !estab_f){
2004 set_iconv(TRUE, result->iconv_func);
2005 }else if (c <= DEL){
2006 struct input_code *ptr = input_code_list;
2021 return std_gc_buf[--std_gc_ndx];
2032 if (std_gc_ndx == STD_GC_BUFSIZE){
2035 std_gc_buf[std_gc_ndx++] = c;
2049 #if !defined(PERL_XS) && !defined(WIN32DLL)
2056 while ((c = (*i_getc)(f)) != EOF)
2065 oconv = output_conv;
2068 /* replace continucation module, from output side */
2070 /* output redicrection */
2072 if (noout_f || guess_f){
2079 if (mimeout_f == TRUE) {
2080 o_base64conv = oconv; oconv = base64_conv;
2082 /* base64_count = 0; */
2086 o_crconv = oconv; oconv = cr_conv;
2089 o_rot_conv = oconv; oconv = rot_conv;
2092 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2095 o_hira_conv = oconv; oconv = hira_conv;
2098 o_fconv = oconv; oconv = fold_conv;
2101 if (alpha_f || x0201_f) {
2102 o_zconv = oconv; oconv = z_conv;
2106 i_ungetc = std_ungetc;
2107 /* input redicrection */
2110 i_cgetc = i_getc; i_getc = cap_getc;
2111 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2114 i_ugetc = i_getc; i_getc = url_getc;
2115 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2118 #ifdef NUMCHAR_OPTION
2120 i_ngetc = i_getc; i_getc = numchar_getc;
2121 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2124 #ifdef UNICODE_NORMALIZATION
2125 if (nfc_f && input_f == UTF8_INPUT){
2126 i_nfc_getc = i_getc; i_getc = nfc_getc;
2127 i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2130 if (mime_f && mimebuf_f==FIXED_MIME) {
2131 i_mgetc = i_getc; i_getc = mime_getc;
2132 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2135 i_bgetc = i_getc; i_getc = broken_getc;
2136 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2138 if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
2139 set_iconv(-TRUE, e_iconv);
2140 } else if (input_f == SJIS_INPUT) {
2141 set_iconv(-TRUE, s_iconv);
2142 #ifdef UTF8_INPUT_ENABLE
2143 } else if (input_f == UTF8_INPUT) {
2144 set_iconv(-TRUE, w_iconv);
2145 } else if (input_f == UTF16BE_INPUT) {
2146 set_iconv(-TRUE, w_iconv16);
2147 } else if (input_f == UTF16LE_INPUT) {
2148 set_iconv(-TRUE, w_iconv16);
2151 set_iconv(FALSE, e_iconv);
2155 struct input_code *p = input_code_list;
2163 Conversion main loop. Code detection only.
2172 int is_8bit = FALSE;
2174 module_connection();
2179 output_mode = ASCII;
2182 #define NEXT continue /* no output, get next */
2183 #define SEND ; /* output c1 and c2, get next */
2184 #define LAST break /* end of loop, go closing */
2186 while ((c1 = (*i_getc)(f)) != EOF) {
2191 /* in case of 8th bit is on */
2192 if (!estab_f&&!mime_decode_mode) {
2193 /* in case of not established yet */
2194 /* It is still ambiguious */
2195 if (h_conv(f, c2, c1)==EOF)
2201 /* in case of already established */
2203 /* ignore bogus code */
2209 /* second byte, 7 bit code */
2210 /* it might be kanji shitfted */
2211 if ((c1 == DEL) || (c1 <= SPACE)) {
2212 /* ignore bogus first code */
2220 #ifdef UTF8_INPUT_ENABLE
2229 #ifdef NUMCHAR_OPTION
2230 } else if ((c1 & CLASS_MASK) == CLASS_UTF16){
2233 } else if (c1 > DEL) {
2235 if (!estab_f && !iso8859_f) {
2236 /* not established yet */
2237 if (!is_8bit) is_8bit = TRUE;
2240 } else { /* estab_f==TRUE */
2245 } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2246 /* SJIS X0201 Case... */
2247 if(iso2022jp_f && x0201_f==NO_X0201) {
2248 (*oconv)(GETA1, GETA2);
2255 } else if (c1==SSO && iconv != s_iconv) {
2256 /* EUC X0201 Case */
2257 c1 = (*i_getc)(f); /* skip SSO */
2259 if (SSP<=c1 && c1<0xe0) {
2260 if(iso2022jp_f && x0201_f==NO_X0201) {
2261 (*oconv)(GETA1, GETA2);
2268 } else { /* bogus code, skip SSO and one byte */
2272 /* already established */
2277 } else if ((c1 > SPACE) && (c1 != DEL)) {
2278 /* in case of Roman characters */
2280 /* output 1 shifted byte */
2284 } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
2285 /* output 1 shifted byte */
2286 if(iso2022jp_f && x0201_f==NO_X0201) {
2287 (*oconv)(GETA1, GETA2);
2294 /* look like bogus code */
2297 } else if (input_mode == X0208) {
2298 /* in case of Kanji shifted */
2301 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
2302 /* Check MIME code */
2303 if ((c1 = (*i_getc)(f)) == EOF) {
2306 } else if (c1 == '?') {
2307 /* =? is mime conversion start sequence */
2308 if(mime_f == STRICT_MIME) {
2309 /* check in real detail */
2310 if (mime_begin_strict(f) == EOF)
2314 } else if (mime_begin(f) == EOF)
2324 /* normal ASCII code */
2327 } else if (!is_8bit && c1 == SI) {
2330 } else if (!is_8bit && c1 == SO) {
2333 } else if (!is_8bit && c1 == ESC ) {
2334 if ((c1 = (*i_getc)(f)) == EOF) {
2335 /* (*oconv)(0, ESC); don't send bogus code */
2337 } else if (c1 == '$') {
2338 if ((c1 = (*i_getc)(f)) == EOF) {
2340 (*oconv)(0, ESC); don't send bogus code
2341 (*oconv)(0, '$'); */
2343 } else if (c1 == '@'|| c1 == 'B') {
2344 /* This is kanji introduction */
2347 set_input_codename("ISO-2022-JP");
2349 debug(input_codename);
2352 } else if (c1 == '(') {
2353 if ((c1 = (*i_getc)(f)) == EOF) {
2354 /* don't send bogus code
2360 } else if (c1 == '@'|| c1 == 'B') {
2361 /* This is kanji introduction */
2366 } else if (c1 == 'D'){
2370 #endif /* X0212_ENABLE */
2372 /* could be some special code */
2379 } else if (broken_f&0x2) {
2380 /* accept any ESC-(-x as broken code ... */
2390 } else if (c1 == '(') {
2391 if ((c1 = (*i_getc)(f)) == EOF) {
2392 /* don't send bogus code
2394 (*oconv)(0, '('); */
2398 /* This is X0201 kana introduction */
2399 input_mode = X0201; shift_mode = X0201;
2401 } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2402 /* This is X0208 kanji introduction */
2403 input_mode = ASCII; shift_mode = FALSE;
2405 } else if (broken_f&0x2) {
2406 input_mode = ASCII; shift_mode = FALSE;
2411 /* maintain various input_mode here */
2415 } else if ( c1 == 'N' || c1 == 'n' ){
2417 c3 = (*i_getc)(f); /* skip SS2 */
2418 if ( (SPACE<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2433 } else if ((c1 == NL || c1 == CR) && broken_f&4) {
2434 input_mode = ASCII; set_iconv(FALSE, 0);
2436 } else if (c1 == NL && mime_decode_f && !mime_decode_mode ) {
2437 if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2445 } else if (c1 == CR && mime_decode_f && !mime_decode_mode ) {
2446 if ((c1=(*i_getc)(f))!=EOF) {
2450 } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2466 if (input_mode == X0208)
2467 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2469 else if (input_mode == X0212)
2470 (*oconv)((0x8f << 8) | c2, c1);
2471 #endif /* X0212_ENABLE */
2472 else if (input_mode)
2473 (*oconv)(input_mode, c1); /* other special case */
2474 else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */
2475 int c0 = (*i_getc)(f);
2478 (*iconv)(c2, c1, c0);
2484 /* goto next_word */
2488 (*iconv)(EOF, 0, 0);
2489 if (!is_inputcode_set)
2492 struct input_code *p = input_code_list;
2493 struct input_code *result = p;
2495 if (p->score < result->score) result = p;
2498 set_input_codename(result->name);
2513 /** it must NOT be in the kanji shifte sequence */
2514 /** it must NOT be written in JIS7 */
2515 /** and it must be after 2 byte 8bit code */
2522 while ((c1 = (*i_getc)(f)) != EOF) {
2528 if (push_hold_buf(c1) == EOF || estab_f){
2534 struct input_code *p = input_code_list;
2535 struct input_code *result = p;
2540 if (p->score < result->score){
2545 set_iconv(FALSE, result->iconv_func);
2550 ** 1) EOF is detected, or
2551 ** 2) Code is established, or
2552 ** 3) Buffer is FULL (but last word is pushed)
2554 ** in 1) and 3) cases, we continue to use
2555 ** Kanji codes by oconv and leave estab_f unchanged.
2560 while (wc < hold_count){
2561 c2 = hold_buf[wc++];
2563 #ifdef NUMCHAR_OPTION
2564 || (c2 & CLASS_MASK) == CLASS_UTF16
2569 }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
2570 (*iconv)(X0201, c2, 0);
2573 if (wc < hold_count){
2574 c1 = hold_buf[wc++];
2583 if ((*iconv)(c2, c1, 0) < 0){
2585 if (wc < hold_count){
2586 c0 = hold_buf[wc++];
2595 (*iconv)(c2, c1, c0);
2608 if (hold_count >= HOLD_SIZE*2)
2610 hold_buf[hold_count++] = c2;
2611 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
2614 int s2e_conv(c2, c1, p2, p1)
2618 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
2621 #ifdef SHIFTJIS_CP932
2622 if (cp51932_f && CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){
2623 extern const unsigned short shiftjis_cp932[3][189];
2624 val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
2630 #endif /* SHIFTJIS_CP932 */
2632 if (x0212_f && 0xfa <= c2 && c2 <= 0xfc){
2633 extern const unsigned short shiftjis_x0212[3][189];
2634 val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
2637 c2 = (0x8f << 8) | (val >> 8);
2650 c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
2652 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
2661 c2 = x0212_unshift(c2);
2676 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2679 int ret = s2e_conv(c2, c1, &c2, &c1);
2680 if (ret) return ret;
2694 }else if (c2 == 0x8f){
2698 c2 = (c2 << 8) | (c1 & 0x7f);
2700 #ifdef SHIFTJIS_CP932
2703 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2704 s2e_conv(s2, s1, &c2, &c1);
2705 if ((c2 & 0xff00) == 0){
2711 #endif /* SHIFTJIS_CP932 */
2712 #endif /* X0212_ENABLE */
2713 } else if (c2 == SSO){
2716 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2726 #ifdef UTF8_INPUT_ENABLE
2728 w2e_conv(c2, c1, c0, p2, p1)
2732 extern const unsigned short *const utf8_to_euc_2bytes[];
2733 extern const unsigned short *const *const utf8_to_euc_3bytes[];
2736 if (0xc0 <= c2 && c2 <= 0xef) {
2737 const unsigned short *const *pp;
2740 if (c0 == 0) return -1;
2741 pp = utf8_to_euc_3bytes[c2 - 0x80];
2742 ret = w_iconv_common(c1, c0, pp, sizeof_utf8_to_euc_C2, p2, p1);
2744 ret = w_iconv_common(c2, c1, utf8_to_euc_2bytes, sizeof_utf8_to_euc_2bytes, p2, p1);
2746 #ifdef NUMCHAR_OPTION
2749 if (p1) *p1 = CLASS_UTF16 | ww16_conv(c2, c1, c0);
2754 } else if (c2 == X0201) {
2770 if (c2 == 0) /* 0x00-0x7f */
2772 else if ((c2 & 0xe0) == 0xc0) /* 0xc0-0xdf */
2774 else if ((c2 & 0xf0) == 0xe0) /* 0xe0-0xef */
2775 return -1; /* 3bytes */
2777 else if (0xf0 <= c2)
2778 return 0; /* 4,5,6bytes */
2779 else if ((c2 & 0xc0) == 0x80) /* 0x80-0xbf */
2780 return 0; /* trail byte */
2785 else if (c2 == 0xef && c1 == 0xbb && c0 == 0xbf) {
2786 return 0; /* throw BOM */
2787 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
2788 } else if (internal_unicode_f && (output_conv == w_oconv || output_conv == w_oconv16)){
2789 unsigned short val = 0;
2794 val = ww16_conv(c2, c1, c0);
2795 c2 = (val >> 8) & 0xff;
2799 ret = w2e_conv(c2, c1, c0, &c2, &c1);
2808 w16w_conv(val, p2, p1, p0)
2816 }else if (val < 0x800){
2817 *p2 = 0xc0 | (val >> 6);
2818 *p1 = 0x80 | (val & 0x3f);
2821 *p2 = 0xe0 | (val >> 12);
2822 *p1 = 0x80 | ((val >> 6) & 0x3f);
2823 *p0 = 0x80 | (val & 0x3f);
2828 ww16_conv(c2, c1, c0)
2833 val = (c2 & 0x0f) << 12;
2834 val |= (c1 & 0x3f) << 6;
2836 }else if (c2 >= 0xc0){
2837 val = (c2 & 0x1f) << 6;
2846 w16e_conv(val, p2, p1)
2850 extern const unsigned short *const utf8_to_euc_2bytes[];
2851 extern const unsigned short *const *const utf8_to_euc_3bytes[];
2853 const unsigned short *const *pp;
2857 w16w_conv(val, &c2, &c1, &c0);
2860 pp = utf8_to_euc_3bytes[c2 - 0x80];
2861 psize = sizeof_utf8_to_euc_C2;
2862 ret = w_iconv_common(c1, c0, pp, psize, p2, p1);
2864 pp = utf8_to_euc_2bytes;
2865 psize = sizeof_utf8_to_euc_2bytes;
2866 ret = w_iconv_common(c2, c1, pp, psize, p2, p1);
2868 #ifdef NUMCHAR_OPTION
2871 *p1 = CLASS_UTF16 | val;
2883 w_iconv16(c2, c1, c0)
2888 if (c2==0376 && c1==0377){
2889 utf16_mode = UTF16BE_INPUT;
2891 } else if (c2==0377 && c1==0376){
2892 utf16_mode = UTF16LE_INPUT;
2895 if (c2 != EOF && utf16_mode == UTF16LE_INPUT) {
2897 tmp=c1; c1=c2; c2=tmp;
2899 if ((c2==0 && c1 < 0x80) || c2==EOF) {
2903 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
2904 if (internal_unicode_f && (output_conv == w_oconv || output_conv == w_oconv16));
2906 else ret = w16e_conv(((c2<<8)&0xff00) + c1, &c2, &c1);
2907 if (ret) return ret;
2913 w_iconv_common(c1, c0, pp, psize, p2, p1)
2915 const unsigned short *const *pp;
2920 const unsigned short *p;
2923 /* CP932/CP51932: U+00A6 (BROKEN BAR) -> not 0x8fa2c3, but 0x7c */
2924 if (cp51932_f && c1 == 0xC2 && c0 == 0xA6){
2930 if (pp == 0) return 1;
2933 if (c1 < 0 || psize <= c1) return 1;
2935 if (p == 0) return 1;
2938 if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1;
2940 if (val == 0) return 1;
2947 if (c2 == SO) c2 = X0201;
2956 #ifdef UTF8_OUTPUT_ENABLE
2958 nkf_each_char_to_hex(f, c)
2959 void (*f)PROTO((int c2,int c1));
2962 const char *hex = "0123456789ABCDEF";
2968 (*f)(0, hex[(c>>shift)&0xF]);
2979 encode_fallback_html(c)
2986 (*oconv)(0, 0x30+(c/1000000)%10);
2988 (*oconv)(0, 0x30+(c/100000 )%10);
2990 (*oconv)(0, 0x30+(c/10000 )%10);
2992 (*oconv)(0, 0x30+(c/1000 )%10);
2994 (*oconv)(0, 0x30+(c/100 )%10);
2996 (*oconv)(0, 0x30+(c/10 )%10);
2998 (*oconv)(0, 0x30+ c %10);
3004 encode_fallback_xml(c)
3010 nkf_each_char_to_hex(oconv, c);
3016 encode_fallback_java(c)
3019 const char *hex = "0123456789ABCDEF";
3021 if((c&0x00FFFFFF) > 0xFFFF){
3025 (*oconv)(0, hex[(c>>20)&0xF]);
3026 (*oconv)(0, hex[(c>>16)&0xF]);
3030 (*oconv)(0, hex[(c>>12)&0xF]);
3031 (*oconv)(0, hex[(c>> 8)&0xF]);
3032 (*oconv)(0, hex[(c>> 4)&0xF]);
3033 (*oconv)(0, hex[ c &0xF]);
3038 encode_fallback_perl(c)
3044 nkf_each_char_to_hex(oconv, c);
3050 encode_fallback_subchar(c)
3053 c = unicode_subchar;
3054 (*oconv)((c>>8)&0xFF, c&0xFF);
3060 (*oconv)(0, (c>>shift)&0xFF);
3074 extern const unsigned short euc_to_utf8_1byte[];
3075 extern const unsigned short *const euc_to_utf8_2bytes[];
3076 extern const unsigned short *const euc_to_utf8_2bytes_ms[];
3077 const unsigned short *p;
3080 p = euc_to_utf8_1byte;
3082 } else if (c2 >> 8 == 0x8f){
3083 extern const unsigned short *const x0212_to_utf8_2bytes[];
3084 c2 = (c2&0x7f) - 0x21;
3085 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3086 p = x0212_to_utf8_2bytes[c2];
3092 c2 = (c2&0x7f) - 0x21;
3093 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3094 p = ms_ucs_map_f ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
3099 c1 = (c1 & 0x7f) - 0x21;
3100 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
3117 if (unicode_bom_f==2) {
3124 #ifdef NUMCHAR_OPTION
3125 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3126 w16w_conv(c1, &c2, &c1, &c0);
3130 if (c0) (*o_putc)(c0);
3137 output_mode = ASCII;
3139 } else if (c2 == ISO8859_1) {
3140 output_mode = ISO8859_1;
3141 (*o_putc)(c1 | 0x080);
3144 if (internal_unicode_f && (iconv == w_iconv || iconv == w_iconv16))
3145 val = ((c2<<8)&0xff00) + c1;
3146 else val = e2w_conv(c2, c1);
3148 w16w_conv(val, &c2, &c1, &c0);
3152 if (c0) (*o_putc)(c0);
3168 if (unicode_bom_f==2) {
3170 (*o_putc)((unsigned char)'\377');
3174 (*o_putc)((unsigned char)'\377');
3179 if (internal_unicode_f && (iconv == w_iconv || iconv == w_iconv16)){
3180 } else if (c2 == ISO8859_1) {
3183 #ifdef NUMCHAR_OPTION
3184 } else if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16) {
3185 c2 = (c1 >> 8) & 0xff;
3189 unsigned short val = e2w_conv(c2, c1);
3190 c2 = (val >> 8) & 0xff;
3209 #ifdef NUMCHAR_OPTION
3210 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3211 w16e_conv(c1, &c2, &c1);
3212 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3213 if(encode_fallback)(*encode_fallback)(c1);
3221 } else if (c2 == 0) {
3222 output_mode = ASCII;
3224 } else if (c2 == X0201) {
3225 output_mode = JAPANESE_EUC;
3226 (*o_putc)(SSO); (*o_putc)(c1|0x80);
3227 } else if (c2 == ISO8859_1) {
3228 output_mode = ISO8859_1;
3229 (*o_putc)(c1 | 0x080);
3231 } else if ((c2 & 0xff00) >> 8 == 0x8f){
3232 output_mode = JAPANESE_EUC;
3233 #ifdef SHIFTJIS_CP932
3236 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3237 s2e_conv(s2, s1, &c2, &c1);
3242 output_mode = ASCII;
3244 }else if ((c2 & 0xff00) >> 8 == 0x8f){
3247 (*o_putc)((c2 & 0x7f) | 0x080);
3248 (*o_putc)(c1 | 0x080);
3251 (*o_putc)((c2 & 0x7f) | 0x080);
3252 (*o_putc)(c1 | 0x080);
3256 if ((c1<0x21 || 0x7e<c1) ||
3257 (c2<0x21 || 0x7e<c2)) {
3258 set_iconv(FALSE, 0);
3259 return; /* too late to rescue this char */
3261 output_mode = JAPANESE_EUC;
3262 (*o_putc)(c2 | 0x080);
3263 (*o_putc)(c1 | 0x080);
3273 if ((ret & 0xff00) == 0x8f00){
3274 if (0x75 <= c && c <= 0x7f){
3275 ret = c + (0x109 - 0x75);
3278 if (0x75 <= c && c <= 0x7f){
3279 ret = c + (0x113 - 0x75);
3286 int x0212_unshift(c)
3290 if (0x7f <= c && c <= 0x88){
3291 ret = c + (0x75 - 0x7f);
3292 }else if (0x89 <= c && c <= 0x92){
3293 ret = (0x8f << 8) | 0x80 | (c + (0x75 - 0x89));
3297 #endif /* X0212_ENABLE */
3300 e2s_conv(c2, c1, p2, p1)
3301 int c2, c1, *p2, *p1;
3305 const unsigned short *ptr;
3307 extern const unsigned short *const x0212_shiftjis[];
3308 if ((c2 & 0xff00) == 0x8f00){
3310 if (0x21 <= ndx && ndx <= 0x7e){
3311 ptr = x0212_shiftjis[ndx - 0x21];
3313 val = ptr[(c1 & 0x7f) - 0x21];
3323 c2 = x0212_shift(c2);
3325 #endif /* X0212_ENABLE */
3326 if ((c2 & 0xff00) == 0x8f00){
3329 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
3330 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
3339 #ifdef NUMCHAR_OPTION
3340 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3341 w16e_conv(c1, &c2, &c1);
3342 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3343 if(encode_fallback)(*encode_fallback)(c1);
3351 } else if (c2 == 0) {
3352 output_mode = ASCII;
3354 } else if (c2 == X0201) {
3355 output_mode = SHIFT_JIS;
3357 } else if (c2 == ISO8859_1) {
3358 output_mode = ISO8859_1;
3359 (*o_putc)(c1 | 0x080);
3361 } else if ((c2 & 0xff00) >> 8 == 0x8f){
3362 output_mode = SHIFT_JIS;
3363 if (e2s_conv(c2, c1, &c2, &c1) == 0){
3369 if ((c1<0x20 || 0x7e<c1) ||
3370 (c2<0x20 || 0x7e<c2)) {
3371 set_iconv(FALSE, 0);
3372 return; /* too late to rescue this char */
3374 output_mode = SHIFT_JIS;
3375 e2s_conv(c2, c1, &c2, &c1);
3377 #ifdef SHIFTJIS_CP932
3379 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3380 extern const unsigned short cp932inv[2][189];
3381 int c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3387 #endif /* SHIFTJIS_CP932 */
3390 if (prefix_table[(unsigned char)c1]){
3391 (*o_putc)(prefix_table[(unsigned char)c1]);
3402 #ifdef NUMCHAR_OPTION
3403 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3404 w16e_conv(c1, &c2, &c1);
3405 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3406 if(encode_fallback)(*encode_fallback)(c1);
3412 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
3415 (*o_putc)(ascii_intro);
3416 output_mode = ASCII;
3420 } else if ((c2 & 0xff00) >> 8 == 0x8f){
3421 if (output_mode!=X0212) {
3422 output_mode = X0212;
3428 (*o_putc)(c2 & 0x7f);
3431 } else if (c2==X0201) {
3432 if (output_mode!=X0201) {
3433 output_mode = X0201;
3439 } else if (c2==ISO8859_1) {
3440 /* iso8859 introduction, or 8th bit on */
3441 /* Can we convert in 7bit form using ESC-'-'-A ?
3443 output_mode = ISO8859_1;
3445 } else if (c2 == 0) {
3446 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
3449 (*o_putc)(ascii_intro);
3450 output_mode = ASCII;
3454 if (output_mode != X0208) {
3455 output_mode = X0208;
3458 (*o_putc)(kanji_intro);
3460 if (c1<0x20 || 0x7e<c1)
3462 if (c2<0x20 || 0x7e<c2)
3474 mime_prechar(c2, c1);
3475 (*o_base64conv)(c2,c1);
3479 STATIC int broken_buf[3];
3480 STATIC int broken_counter = 0;
3481 STATIC int broken_last = 0;
3488 if (broken_counter>0) {
3489 return broken_buf[--broken_counter];
3492 if (c=='$' && broken_last != ESC
3493 && (input_mode==ASCII || input_mode==X0201)) {
3496 if (c1=='@'|| c1=='B') {
3497 broken_buf[0]=c1; broken_buf[1]=c;
3504 } else if (c=='(' && broken_last != ESC
3505 && (input_mode==X0208 || input_mode==X0201)) { /* ) */
3508 if (c1=='J'|| c1=='B') {
3509 broken_buf[0]=c1; broken_buf[1]=c;
3527 if (broken_counter<2)
3528 broken_buf[broken_counter++]=c;
3532 STATIC int prev_cr = 0;
3540 if (! (c2==0&&c1==NL) ) {
3546 } else if (c1=='\r') {
3548 } else if (c1=='\n') {
3549 if (crmode_f==CRLF) {
3550 (*o_crconv)(0,'\r');
3551 } else if (crmode_f==CR) {
3552 (*o_crconv)(0,'\r');
3556 } else if (c1!='\032' || crmode_f!=NL){
3562 Return value of fold_conv()
3564 \n add newline and output char
3565 \r add newline and output nothing
3568 1 (or else) normal output
3570 fold state in prev (previous character)
3572 >0x80 Japanese (X0208/X0201)
3577 This fold algorthm does not preserve heading space in a line.
3578 This is the main difference from fmt.
3581 #define char_size(c2,c1) (c2?2:1)
3590 if (c1== '\r' && !fold_preserve_f) {
3591 fold_state=0; /* ignore cr */
3592 }else if (c1== '\n'&&f_prev=='\r' && fold_preserve_f) {
3594 fold_state=0; /* ignore cr */
3595 } else if (c1== BS) {
3596 if (f_line>0) f_line--;
3598 } else if (c2==EOF && f_line != 0) { /* close open last line */
3600 } else if ((c1=='\n' && !fold_preserve_f)
3601 || ((c1=='\r'||(c1=='\n'&&f_prev!='\r'))
3602 && fold_preserve_f)) {
3604 if (fold_preserve_f) {
3608 } else if ((f_prev == c1 && !fold_preserve_f)
3609 || (f_prev == '\n' && fold_preserve_f)
3610 ) { /* duplicate newline */
3613 fold_state = '\n'; /* output two newline */
3619 if (f_prev&0x80) { /* Japanese? */
3621 fold_state = 0; /* ignore given single newline */
3622 } else if (f_prev==' ') {
3626 if (++f_line<=fold_len)
3630 fold_state = '\r'; /* fold and output nothing */
3634 } else if (c1=='\f') {
3639 fold_state = '\n'; /* output newline and clear */
3640 } else if ( (c2==0 && c1==' ')||
3641 (c2==0 && c1=='\t')||
3642 (c2=='!'&& c1=='!')) {
3643 /* X0208 kankaku or ascii space */
3644 if (f_prev == ' ') {
3645 fold_state = 0; /* remove duplicate spaces */
3648 if (++f_line<=fold_len)
3649 fold_state = ' '; /* output ASCII space only */
3651 f_prev = ' '; f_line = 0;
3652 fold_state = '\r'; /* fold and output nothing */
3656 prev0 = f_prev; /* we still need this one... , but almost done */
3658 if (c2 || c2==X0201)
3659 f_prev |= 0x80; /* this is Japanese */
3660 f_line += char_size(c2,c1);
3661 if (f_line<=fold_len) { /* normal case */
3664 if (f_line>fold_len+fold_margin) { /* too many kinsoku suspension */
3665 f_line = char_size(c2,c1);
3666 fold_state = '\n'; /* We can't wait, do fold now */
3667 } else if (c2==X0201) {
3668 /* simple kinsoku rules return 1 means no folding */
3669 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
3670 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
3671 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
3672 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
3673 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
3674 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
3675 else if (SPACE<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
3677 fold_state = '\n';/* add one new f_line before this character */
3680 fold_state = '\n';/* add one new f_line before this character */
3683 /* kinsoku point in ASCII */
3684 if ( c1==')'|| /* { [ ( */
3695 /* just after special */
3696 } else if (!is_alnum(prev0)) {
3697 f_line = char_size(c2,c1);
3699 } else if ((prev0==' ') || /* ignored new f_line */
3700 (prev0=='\n')|| /* ignored new f_line */
3701 (prev0&0x80)) { /* X0208 - ASCII */
3702 f_line = char_size(c2,c1);
3703 fold_state = '\n';/* add one new f_line before this character */
3705 fold_state = 1; /* default no fold in ASCII */
3709 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
3710 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
3711 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
3712 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
3713 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
3714 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
3715 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
3716 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
3717 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
3718 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
3719 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
3720 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
3721 /* default no fold in kinsoku */
3724 f_line = char_size(c2,c1);
3725 /* add one new f_line before this character */
3728 f_line = char_size(c2,c1);
3730 /* add one new f_line before this character */
3735 /* terminator process */
3736 switch(fold_state) {
3755 int z_prev2=0,z_prev1=0;
3762 /* if (c2) c1 &= 0x7f; assertion */
3764 if (x0201_f && z_prev2==X0201) { /* X0201 */
3765 if (c1==(0xde&0x7f)) { /*
\e$BByE@
\e(B */
3767 (*o_zconv)(dv[(z_prev1-SPACE)*2],dv[(z_prev1-SPACE)*2+1]);
3769 } else if (c1==(0xdf&0x7f)&&ev[(z_prev1-SPACE)*2]) { /*
\e$BH>ByE@
\e(B */
3771 (*o_zconv)(ev[(z_prev1-SPACE)*2],ev[(z_prev1-SPACE)*2+1]);
3775 (*o_zconv)(cv[(z_prev1-SPACE)*2],cv[(z_prev1-SPACE)*2+1]);
3784 if (x0201_f && c2==X0201) {
3785 if (dv[(c1-SPACE)*2]||ev[(c1-SPACE)*2]) {
3786 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
3787 z_prev1 = c1; z_prev2 = c2;
3790 (*o_zconv)(cv[(c1-SPACE)*2],cv[(c1-SPACE)*2+1]);
3795 /* JISX0208 Alphabet */
3796 if (alpha_f && c2 == 0x23 ) {
3798 } else if (alpha_f && c2 == 0x21 ) {
3799 /* JISX0208 Kigou */
3804 } else if (alpha_f&0x4) {
3809 } else if (0x20<c1 && c1<0x7f && fv[c1-0x20]) {
3815 case '>': entity = ">"; break;
3816 case '<': entity = "<"; break;
3817 case '\"': entity = """; break;
3818 case '&': entity = "&"; break;
3821 while (*entity) (*o_zconv)(0, *entity++);
3831 #define rot13(c) ( \
3833 (c <= 'M') ? (c + 13): \
3834 (c <= 'Z') ? (c - 13): \
3836 (c <= 'm') ? (c + 13): \
3837 (c <= 'z') ? (c - 13): \
3841 #define rot47(c) ( \
3843 ( c <= 'O' ) ? (c + 47) : \
3844 ( c <= '~' ) ? (c - 47) : \
3852 if (c2==0 || c2==X0201 || c2==ISO8859_1) {
3858 (*o_rot_conv)(c2,c1);
3865 if ((hira_f & 1) && c2==0x25 && 0x20<c1 && c1<0x74) {
3867 } else if ((hira_f & 2) && c2==0x24 && 0x20<c1 && c1<0x74) {
3870 (*o_hira_conv)(c2,c1);
3875 iso2022jp_check_conv(c2,c1)
3878 STATIC const int range[RANGE_NUM_MAX][2] = {
3901 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
3905 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
3910 for (i = 0; i < RANGE_NUM_MAX; i++) {
3911 start = range[i][0];
3914 if (c >= start && c <= end) {
3919 (*o_iso2022jp_check_conv)(c2,c1);
3923 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
3925 const unsigned char *mime_pattern[] = {
3926 (const unsigned char *)"\075?EUC-JP?B?",
3927 (const unsigned char *)"\075?SHIFT_JIS?B?",
3928 (const unsigned char *)"\075?ISO-8859-1?Q?",
3929 (const unsigned char *)"\075?ISO-8859-1?B?",
3930 (const unsigned char *)"\075?ISO-2022-JP?B?",
3931 (const unsigned char *)"\075?ISO-2022-JP?Q?",
3932 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3933 (const unsigned char *)"\075?UTF-8?B?",
3934 (const unsigned char *)"\075?UTF-8?Q?",
3936 (const unsigned char *)"\075?US-ASCII?Q?",
3941 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
3942 int (*mime_priority_func[])PROTO((int c2, int c1, int c0)) = {
3943 e_iconv, s_iconv, 0, 0, 0, 0,
3944 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3950 const int mime_encode[] = {
3951 JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201,
3952 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3959 const int mime_encode_method[] = {
3960 'B', 'B','Q', 'B', 'B', 'Q',
3961 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3969 #define MAXRECOVER 20
3974 if (i_getc!=mime_getc) {
3975 i_mgetc = i_getc; i_getc = mime_getc;
3976 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
3977 if(mime_f==STRICT_MIME) {
3978 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
3979 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
3985 unswitch_mime_getc()
3987 if(mime_f==STRICT_MIME) {
3988 i_mgetc = i_mgetc_buf;
3989 i_mungetc = i_mungetc_buf;
3992 i_ungetc = i_mungetc;
3993 if(mime_iconv_back)set_iconv(FALSE, mime_iconv_back);
3994 mime_iconv_back = NULL;
3998 mime_begin_strict(f)
4003 const unsigned char *p,*q;
4004 int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
4006 mime_decode_mode = FALSE;
4007 /* =? has been checked */
4009 p = mime_pattern[j];
4012 for(i=2;p[i]>' ';i++) { /* start at =? */
4013 if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) {
4014 /* pattern fails, try next one */
4016 while ((p = mime_pattern[++j])) {
4017 for(k=2;k<i;k++) /* assume length(p) > i */
4018 if (p[k]!=q[k]) break;
4019 if (k==i && nkf_toupper(c1)==p[k]) break;
4021 if (p) continue; /* found next one, continue */
4022 /* all fails, output from recovery buffer */
4030 mime_decode_mode = p[i-2];
4032 mime_iconv_back = iconv;
4033 set_iconv(FALSE, mime_priority_func[j]);
4034 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
4036 if (mime_decode_mode=='B') {
4037 mimebuf_f = unbuf_f;
4039 /* do MIME integrity check */
4040 return mime_integrity(f,mime_pattern[j]);
4052 /* we don't keep eof of Fifo, becase it contains ?= as
4053 a terminator. It was checked in mime_integrity. */
4054 return ((mimebuf_f)?
4055 (*i_mgetc_buf)(f):Fifo(mime_input++));
4059 mime_ungetc_buf(c,f)
4064 (*i_mungetc_buf)(c,f);
4066 Fifo(--mime_input)=c;
4077 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
4078 /* re-read and convert again from mime_buffer. */
4080 /* =? has been checked */
4082 Fifo(mime_last++)='='; Fifo(mime_last++)='?';
4083 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
4084 /* We accept any character type even if it is breaked by new lines */
4085 c1 = (*i_getc)(f); Fifo(mime_last++)= c1 ;
4086 if (c1=='\n'||c1==' '||c1=='\r'||
4087 c1=='-'||c1=='_'||is_alnum(c1) ) continue;
4089 /* Failed. But this could be another MIME preemble */
4097 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
4098 if (!(++i<MAXRECOVER) || c1==EOF) break;
4099 if (c1=='b'||c1=='B') {
4100 mime_decode_mode = 'B';
4101 } else if (c1=='q'||c1=='Q') {
4102 mime_decode_mode = 'Q';
4106 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
4107 if (!(++i<MAXRECOVER) || c1==EOF) break;
4109 mime_decode_mode = FALSE;
4115 if (!mime_decode_mode) {
4116 /* false MIME premble, restart from mime_buffer */
4117 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
4118 /* Since we are in MIME mode until buffer becomes empty, */
4119 /* we never go into mime_begin again for a while. */
4122 /* discard mime preemble, and goto MIME mode */
4124 /* do no MIME integrity check */
4125 return c1; /* used only for checking EOF */
4140 fprintf(stderr, "%s\n", str);
4146 set_input_codename (codename)
4151 strcmp(codename, "") != 0 &&
4152 strcmp(codename, input_codename) != 0)
4154 is_inputcode_mixed = TRUE;
4156 input_codename = codename;
4157 is_inputcode_set = TRUE;
4160 #if !defined(PERL_XS) && !defined(WIN32DLL)
4162 print_guessed_code (filename)
4165 char *codename = "BINARY";
4166 if (!is_inputcode_mixed) {
4167 if (strcmp(input_codename, "") == 0) {
4170 codename = input_codename;
4173 if (filename != NULL) printf("%s:", filename);
4174 printf("%s\n", codename);
4180 #ifdef ANSI_C_PROTOTYPE
4181 int hex_getc(int ch, FILE *f, int (*g)(FILE *f), int (*u)(int c, FILE *f))
4184 hex_getc(ch, f, g, u)
4197 if (!nkf_isxdigit(c2)){
4202 if (!nkf_isxdigit(c3)){
4207 return (hex2bin(c2) << 4) | hex2bin(c3);
4214 return hex_getc(':', f, i_cgetc, i_cungetc);
4222 return (*i_cungetc)(c, f);
4229 return hex_getc('%', f, i_ugetc, i_uungetc);
4237 return (*i_uungetc)(c, f);
4241 #ifdef NUMCHAR_OPTION
4246 int (*g)() = i_ngetc;
4247 int (*u)() = i_nungetc;
4258 if (buf[i] == 'x' || buf[i] == 'X'){
4259 for (j = 0; j < 5; j++){
4261 if (!nkf_isxdigit(buf[i])){
4268 c |= hex2bin(buf[i]);
4271 for (j = 0; j < 6; j++){
4275 if (!nkf_isdigit(buf[i])){
4282 c += hex2bin(buf[i]);
4288 return CLASS_UTF16 | c;
4298 numchar_ungetc(c, f)
4302 return (*i_nungetc)(c, f);
4306 #ifdef UNICODE_NORMALIZATION
4308 /* Normalization Form C */
4313 int (*g)() = i_nfc_getc;
4314 int (*u)() = i_nfc_ungetc;
4315 int i=0, j, k=1, lower, upper;
4317 const int *array = NULL;
4318 extern const struct normalization_pair normalization_table[];
4321 while (k > 0 && ((buf[i] & 0xc0) != 0x80)){
4322 lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
4323 while (upper >= lower) {
4324 j = (lower+upper) / 2;
4325 array = normalization_table[j].nfd;
4326 for (k=0; k < NORMALIZATION_TABLE_NFD_LENGTH && array[k]; k++){
4327 if (array[k] != buf[k]){
4328 array[k] < buf[k] ? (lower = j + 1) : (upper = j - 1);
4335 array = normalization_table[j].nfc;
4336 for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++)
4353 return (*i_nfc_ungetc)(c, f);
4355 #endif /* UNICODE_NORMALIZATION */
4362 int c1, c2, c3, c4, cc;
4363 int t1, t2, t3, t4, mode, exit_mode;
4367 int lwsp_size = 128;
4369 if (mime_top != mime_last) { /* Something is in FIFO */
4370 return Fifo(mime_top++);
4372 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
4373 mime_decode_mode=FALSE;
4374 unswitch_mime_getc();
4375 return (*i_getc)(f);
4378 if (mimebuf_f == FIXED_MIME)
4379 exit_mode = mime_decode_mode;
4382 if (mime_decode_mode == 'Q') {
4383 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4385 if (c1=='_') return ' ';
4386 if (c1<=' ' || DEL<=c1) {
4387 mime_decode_mode = exit_mode; /* prepare for quit */
4390 if (c1!='=' && c1!='?') {
4394 mime_decode_mode = exit_mode; /* prepare for quit */
4395 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
4396 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
4397 /* end Q encoding */
4398 input_mode = exit_mode;
4400 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
4401 if (lwsp_buf==NULL) {
4402 perror("can't malloc");
4405 while ((c1=(*i_getc)(f))!=EOF) {
4410 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4418 if ((c1=(*i_getc)(f))!=EOF && c1 == NL) {
4419 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4434 lwsp_buf[lwsp_count] = c1;
4435 if (lwsp_count++>lwsp_size){
4437 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4438 if (lwsp_buf_new==NULL) {
4441 perror("can't realloc");
4444 lwsp_buf = lwsp_buf_new;
4450 if (lwsp_count > 0) {
4451 if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) {
4455 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4456 i_ungetc(lwsp_buf[lwsp_count],f);
4464 if (c1=='='&&c2<' ') { /* this is soft wrap */
4465 while((c1 = (*i_mgetc)(f)) <=' ') {
4466 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4468 mime_decode_mode = 'Q'; /* still in MIME */
4469 goto restart_mime_q;
4472 mime_decode_mode = 'Q'; /* still in MIME */
4476 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
4477 if (c2<=' ') return c2;
4478 mime_decode_mode = 'Q'; /* still in MIME */
4479 #define hex(c) (('0'<=c&&c<='9')?(c-'0'):\
4480 ('A'<=c&&c<='F')?(c-'A'+10):('a'<=c&&c<='f')?(c-'a'+10):0)
4481 return ((hex(c2)<<4) + hex(c3));
4484 if (mime_decode_mode != 'B') {
4485 mime_decode_mode = FALSE;
4486 return (*i_mgetc)(f);
4490 /* Base64 encoding */
4492 MIME allows line break in the middle of
4493 Base64, but we are very pessimistic in decoding
4494 in unbuf mode because MIME encoded code may broken by
4495 less or editor's control sequence (such as ESC-[-K in unbuffered
4496 mode. ignore incomplete MIME.
4498 mode = mime_decode_mode;
4499 mime_decode_mode = exit_mode; /* prepare for quit */
4501 while ((c1 = (*i_mgetc)(f))<=' ') {
4506 if ((c2 = (*i_mgetc)(f))<=' ') {
4509 if (mime_f != STRICT_MIME) goto mime_c2_retry;
4510 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4513 if ((c1 == '?') && (c2 == '=')) {
4516 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
4517 if (lwsp_buf==NULL) {
4518 perror("can't malloc");
4521 while ((c1=(*i_getc)(f))!=EOF) {
4526 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4534 if ((c1=(*i_getc)(f))!=EOF) {
4538 } else if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4553 lwsp_buf[lwsp_count] = c1;
4554 if (lwsp_count++>lwsp_size){
4556 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4557 if (lwsp_buf_new==NULL) {
4560 perror("can't realloc");
4563 lwsp_buf = lwsp_buf_new;
4569 if (lwsp_count > 0) {
4570 if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) {
4574 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4575 i_ungetc(lwsp_buf[lwsp_count],f);
4584 if ((c3 = (*i_mgetc)(f))<=' ') {
4587 if (mime_f != STRICT_MIME) goto mime_c3_retry;
4588 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4592 if ((c4 = (*i_mgetc)(f))<=' ') {
4595 if (mime_f != STRICT_MIME) goto mime_c4_retry;
4596 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4600 mime_decode_mode = mode; /* still in MIME sigh... */
4602 /* BASE 64 decoding */
4604 t1 = 0x3f & base64decode(c1);
4605 t2 = 0x3f & base64decode(c2);
4606 t3 = 0x3f & base64decode(c3);
4607 t4 = 0x3f & base64decode(c4);
4608 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
4610 Fifo(mime_last++) = cc;
4611 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
4613 Fifo(mime_last++) = cc;
4614 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
4616 Fifo(mime_last++) = cc;
4621 return Fifo(mime_top++);
4629 Fifo(--mime_top) = c;
4636 const unsigned char *p;
4640 /* In buffered mode, read until =? or NL or buffer full
4642 mime_input = mime_top;
4643 mime_last = mime_top;
4645 while(*p) Fifo(mime_input++) = *p++;
4648 while((c=(*i_getc)(f))!=EOF) {
4649 if (((mime_input-mime_top)&MIME_BUF_MASK)==0) {
4650 break; /* buffer full */
4652 if (c=='=' && d=='?') {
4653 /* checked. skip header, start decode */
4654 Fifo(mime_input++) = c;
4655 /* mime_last_input = mime_input; */
4660 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
4662 /* Should we check length mod 4? */
4663 Fifo(mime_input++) = c;
4666 /* In case of Incomplete MIME, no MIME decode */
4667 Fifo(mime_input++) = c;
4668 mime_last = mime_input; /* point undecoded buffer */
4669 mime_decode_mode = 1; /* no decode on Fifo last in mime_getc */
4670 switch_mime_getc(); /* anyway we need buffered getc */
4681 i = c - 'A'; /* A..Z 0-25 */
4683 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
4685 } else if (c > '/') {
4686 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
4687 } else if (c == '+') {
4688 i = '>' /* 62 */ ; /* + 62 */
4690 i = '?' /* 63 */ ; /* / 63 */
4695 STATIC const char basis_64[] =
4696 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
4699 #define MIMEOUT_BUF_LENGTH (60)
4700 char mimeout_buf[MIMEOUT_BUF_LENGTH+1];
4701 int mimeout_buf_count = 0;
4702 int mimeout_preserve_space = 0;
4703 #define itoh4(c) (c>=10?c+'A'-10:c+'0')
4709 const unsigned char *p;
4712 p = mime_pattern[0];
4713 for(i=0;mime_encode[i];i++) {
4714 if (mode == mime_encode[i]) {
4715 p = mime_pattern[i];
4719 mimeout_mode = mime_encode_method[i];
4722 if (base64_count>45) {
4723 if (mimeout_buf_count>0 && nkf_isblank(mimeout_buf[i])){
4724 (*o_mputc)(mimeout_buf[i]);
4730 if (!mimeout_preserve_space && mimeout_buf_count>0
4731 && (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
4732 || mimeout_buf[i]==CR || mimeout_buf[i]==NL )) {
4736 if (!mimeout_preserve_space) {
4737 for (;i<mimeout_buf_count;i++) {
4738 if (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
4739 || mimeout_buf[i]==CR || mimeout_buf[i]==NL ) {
4740 (*o_mputc)(mimeout_buf[i]);
4747 mimeout_preserve_space = FALSE;
4753 j = mimeout_buf_count;
4754 mimeout_buf_count = 0;
4756 mime_putc(mimeout_buf[i]);
4772 switch(mimeout_mode) {
4777 (*o_mputc)(basis_64[((b64c & 0x3)<< 4)]);
4783 (*o_mputc)(basis_64[((b64c & 0xF) << 2)]);
4789 if (mimeout_f!=FIXED_MIME) {
4791 } else if (mimeout_mode != 'Q')
4800 switch(mimeout_mode) {
4805 } else if (c==CR||c==NL) {
4808 } else if(c<SPACE||c=='='||c=='?'||c=='_'||DEL<=c) {
4810 (*o_mputc)(itoh4(((c>>4)&0xf)));
4811 (*o_mputc)(itoh4((c&0xf)));
4820 (*o_mputc)(basis_64[c>>2]);
4825 (*o_mputc)(basis_64[((b64c & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
4831 (*o_mputc)(basis_64[((b64c & 0xF) << 2) | ((c & 0xC0) >>6)]);
4832 (*o_mputc)(basis_64[c & 0x3F]);
4843 int mime_lastchar2, mime_lastchar1;
4845 void mime_prechar(c2, c1)
4850 if (base64_count + mimeout_buf_count/3*4> 66){
4851 (*o_base64conv)(EOF,0);
4852 (*o_base64conv)(0,NL);
4853 (*o_base64conv)(0,SPACE);
4855 }/*else if (mime_lastchar2){
4856 if (c1 <=DEL && !nkf_isspace(c1)){
4857 (*o_base64conv)(0,SPACE);
4861 if (c2 && mime_lastchar2 == 0
4862 && mime_lastchar1 && !nkf_isspace(mime_lastchar1)){
4863 (*o_base64conv)(0,SPACE);
4866 mime_lastchar2 = c2;
4867 mime_lastchar1 = c1;
4878 if (mimeout_f == FIXED_MIME){
4879 if (mimeout_mode == 'Q'){
4880 if (base64_count > 71){
4881 if (c!=CR && c!=NL) {
4888 if (base64_count > 71){
4893 if (c == EOF) { /* c==EOF */
4897 if (c != EOF) { /* c==EOF */
4903 /* mimeout_f != FIXED_MIME */
4905 if (c == EOF) { /* c==EOF */
4906 j = mimeout_buf_count;
4907 mimeout_buf_count = 0;
4910 /*if (nkf_isspace(mimeout_buf[i])){
4913 mimeout_addchar(mimeout_buf[i]);
4917 (*o_mputc)(mimeout_buf[i]);
4923 if (mimeout_mode=='Q') {
4924 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
4936 if (mimeout_buf_count > 0){
4937 lastchar = mimeout_buf[mimeout_buf_count - 1];
4942 if (!mimeout_mode) {
4943 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1)) {
4944 if (nkf_isspace(c)) {
4945 if (c==CR || c==NL) {
4948 for (i=0;i<mimeout_buf_count;i++) {
4949 (*o_mputc)(mimeout_buf[i]);
4950 if (mimeout_buf[i] == CR || mimeout_buf[i] == NL){
4957 mimeout_buf_count = 1;
4959 if (base64_count > 1
4960 && base64_count + mimeout_buf_count > 76){
4963 if (!nkf_isspace(mimeout_buf[0])){
4968 mimeout_buf[mimeout_buf_count++] = c;
4969 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
4970 open_mime(output_mode);
4975 if (lastchar==CR || lastchar == NL){
4976 for (i=0;i<mimeout_buf_count;i++) {
4977 (*o_mputc)(mimeout_buf[i]);
4980 mimeout_buf_count = 0;
4982 if (lastchar==SPACE) {
4983 for (i=0;i<mimeout_buf_count-1;i++) {
4984 (*o_mputc)(mimeout_buf[i]);
4987 mimeout_buf[0] = SPACE;
4988 mimeout_buf_count = 1;
4990 open_mime(output_mode);
4993 /* mimeout_mode == 'B', 1, 2 */
4994 if ( c<=DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
4995 if (lastchar == CR || lastchar == NL){
4996 if (nkf_isblank(c)) {
4997 for (i=0;i<mimeout_buf_count;i++) {
4998 mimeout_addchar(mimeout_buf[i]);
5000 mimeout_buf_count = 0;
5001 } else if (SPACE<c && c<DEL) {
5003 for (i=0;i<mimeout_buf_count;i++) {
5004 (*o_mputc)(mimeout_buf[i]);
5007 mimeout_buf_count = 0;
5010 if (c==SPACE || c==TAB || c==CR || c==NL) {
5011 for (i=0;i<mimeout_buf_count;i++) {
5012 if (SPACE<mimeout_buf[i] && mimeout_buf[i]<DEL) {
5014 for (i=0;i<mimeout_buf_count;i++) {
5015 (*o_mputc)(mimeout_buf[i]);
5018 mimeout_buf_count = 0;
5021 mimeout_buf[mimeout_buf_count++] = c;
5022 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
5024 for (i=0;i<mimeout_buf_count;i++) {
5025 (*o_mputc)(mimeout_buf[i]);
5028 mimeout_buf_count = 0;
5032 if (mimeout_buf_count>0 && SPACE<c && c!='=') {
5033 mimeout_buf[mimeout_buf_count++] = c;
5034 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
5035 j = mimeout_buf_count;
5036 mimeout_buf_count = 0;
5038 mimeout_addchar(mimeout_buf[i]);
5045 if (mimeout_buf_count>0) {
5046 j = mimeout_buf_count;
5047 mimeout_buf_count = 0;
5049 if (mimeout_buf[i]==CR || mimeout_buf[i]==NL)
5051 mimeout_addchar(mimeout_buf[i]);
5057 (*o_mputc)(mimeout_buf[i]);
5059 open_mime(output_mode);
5066 #if defined(PERL_XS) || defined(WIN32DLL)
5071 struct input_code *p = input_code_list;
5084 mime_f = STRICT_MIME;
5085 mime_decode_f = FALSE;
5090 #if defined(MSDOS) || defined(__OS2__)
5095 iso2022jp_f = FALSE;
5096 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
5097 internal_unicode_f = FALSE;
5099 #ifdef UTF8_OUTPUT_ENABLE
5102 ms_ucs_map_f = FALSE;
5103 encode_fallback = NULL;
5104 unicode_subchar = '?';
5106 #ifdef UNICODE_NORMALIZATION
5119 is_inputcode_mixed = FALSE;
5120 is_inputcode_set = FALSE;
5124 #ifdef SHIFTJIS_CP932
5133 for (i = 0; i < 256; i++){
5134 prefix_table[i] = 0;
5137 #ifdef UTF8_INPUT_ENABLE
5138 utf16_mode = UTF16BE_INPUT;
5140 mimeout_buf_count = 0;
5145 fold_preserve_f = FALSE;
5148 kanji_intro = DEFAULT_J;
5149 ascii_intro = DEFAULT_R;
5150 fold_margin = FOLD_MARGIN;
5151 output_conv = DEFAULT_CONV;
5152 oconv = DEFAULT_CONV;
5153 o_zconv = no_connection;
5154 o_fconv = no_connection;
5155 o_crconv = no_connection;
5156 o_rot_conv = no_connection;
5157 o_hira_conv = no_connection;
5158 o_base64conv = no_connection;
5159 o_iso2022jp_check_conv = no_connection;
5162 i_ungetc = std_ungetc;
5164 i_bungetc = std_ungetc;
5167 i_mungetc = std_ungetc;
5168 i_mgetc_buf = std_getc;
5169 i_mungetc_buf = std_ungetc;
5170 output_mode = ASCII;
5173 mime_decode_mode = FALSE;
5179 z_prev2=0,z_prev1=0;
5181 iconv_for_check = 0;
5183 input_codename = "";
5191 no_connection(c2,c1)
5194 no_connection2(c2,c1,0);
5198 no_connection2(c2,c1,c0)
5201 fprintf(stderr,"nkf internal module connection failure.\n");
5203 return 0; /* LINT */
5208 #define fprintf dllprintf
5213 fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
5214 fprintf(stderr,"Flags:\n");
5215 fprintf(stderr,"b,u Output is buffered (DEFAULT),Output is unbuffered\n");
5216 #ifdef DEFAULT_CODE_SJIS
5217 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC), UTF-8N\n");
5219 #ifdef DEFAULT_CODE_JIS
5220 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC), UTF-8N\n");
5222 #ifdef DEFAULT_CODE_EUC
5223 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT), UTF-8N\n");
5225 #ifdef DEFAULT_CODE_UTF8
5226 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC), UTF-8N (DEFAULT)\n");
5228 #ifdef UTF8_OUTPUT_ENABLE
5229 fprintf(stderr," After 'w' you can add more options. (80?|16((B|L)0?)?) \n");
5231 fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC), UTF-8\n");
5232 #ifdef UTF8_INPUT_ENABLE
5233 fprintf(stderr," After 'W' you can add more options. (8|16(B|L)?) \n");
5235 fprintf(stderr,"t no conversion\n");
5236 fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n");
5237 fprintf(stderr,"r {de/en}crypt ROT13/47\n");
5238 fprintf(stderr,"h 1 katakana->hiragana, 2 hiragana->katakana, 3 both\n");
5239 fprintf(stderr,"v Show this usage. V: show version\n");
5240 fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
5241 fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
5242 fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
5243 fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
5244 fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces,\n");
5245 fprintf(stderr," 3: Convert HTML Entity\n");
5246 fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
5247 fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
5249 fprintf(stderr,"T Text mode output\n");
5251 fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
5252 fprintf(stderr,"d,c Delete \\r in line feed and \\032, Add \\r in line feed\n");
5253 fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n");
5254 fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
5255 fprintf(stderr,"long name options\n");
5256 fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n");
5257 fprintf(stderr," --jis,--euc,--sjis,--utf8,--utf16,--mime,--base64 convert for the code\n");
5258 fprintf(stderr," --hiragana, --katakana Hiragana/Katakana Conversion\n");
5259 fprintf(stderr," --x0212 Convert JISX0212\n");
5260 fprintf(stderr," --cp932, --no-cp932 CP932 compatibility\n");
5261 fprintf(stderr," --prefix= Insert escape before troublesome characters of Shift_JIS\n");
5263 fprintf(stderr," --cap-input, --url-input Convert hex after ':' or '%%'\n");
5265 #ifdef NUMCHAR_OPTION
5266 fprintf(stderr," --numchar-input Convert Unicode Character Reference\n");
5268 #ifdef UNICODE_NORMALIZATION
5269 fprintf(stderr," --utf8mac-input UTF-8-MAC input\n");
5271 #ifdef UTF8_INPUT_ENABLE
5272 fprintf(stderr," --fb-{skip, html, xml, perl, java, subchar}\n");
5273 fprintf(stderr," set the way nkf handles unassigned characters\n");
5275 #ifdef UTF8_OUTPUT_ENABLE
5276 fprintf(stderr," --ms-ucs-map Microsoft UCS Mapping Compatible\n");
5279 fprintf(stderr," --overwrite Overwrite original listed files by filtered result\n");
5281 fprintf(stderr," -g, --guess Guess the input code\n");
5282 fprintf(stderr," --help,--version\n");
5289 fprintf(stderr,"Network Kanji Filter Version %s (%s) "
5290 #if defined(MSDOS) && !defined(__WIN32__) && !defined(__WIN16__)
5293 #if defined(MSDOS) && defined(__WIN16__)
5296 #if defined(MSDOS) && defined(__WIN32__)
5302 ,NKF_VERSION,NKF_RELEASE_DATE);
5303 fprintf(stderr,"\n%s\n",CopyRight);
5308 **
\e$B%Q%C%A@):n<T
\e(B
5309 ** void@merope.pleiades.or.jp (Kusakabe Youichi)
5310 ** NIDE Naoyuki <nide@ics.nara-wu.ac.jp>
5311 ** ohta@src.ricoh.co.jp (Junn Ohta)
5312 ** inouet@strl.nhk.or.jp (Tomoyuki Inoue)
5313 ** kiri@pulser.win.or.jp (Tetsuaki Kiriyama)
5314 ** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp>
5315 ** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe)
5316 ** kono@ie.u-ryukyu.ac.jp (Shinji Kono)
5317 ** GHG00637@nifty-serve.or.jp (COW)