1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
5 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
6 ** Copyright (C) 1996,1998
8 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
9 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
10 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
11 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
13 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
14 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
15 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
16 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
17 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
18 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
20 ** Everyone is permitted to do anything on this program
21 ** including copying, modifying, improving,
22 ** as long as you don't try to pretend that you wrote it.
23 ** i.e., the above copyright notice has to appear in all copies.
24 ** Binary distribution requires original version messages.
25 ** You don't have to ask before copying, redistribution or publishing.
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
29 /***********************************************************************
30 ** UTF-8
\e$B%5%]!<%H$K$D$$$F
\e(B
31 **
\e$B=>Mh$N
\e(B nkf
\e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9
\e(B
32 ** nkf -e
\e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G
\e(B UTF-8
\e$B$HH=Dj$5$l$l$P!"
\e(B
33 **
\e$B$=$N$^$^
\e(B euc-jp
\e$B$KJQ49$5$l$^$9
\e(B
35 **
\e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#
\e(B
36 ** (
\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O
\e(B)
38 **
\e$B2?$+LdBj$r8+$D$1$?$i!"
\e(B
39 ** E-Mail: furukawa@tcp-ip.or.jp
40 **
\e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#
\e(B
41 ***********************************************************************/
42 /* $Id: nkf.c,v 1.89 2006/01/11 16:49:09 naruse Exp $ */
43 #define NKF_VERSION "2.0.5"
44 #define NKF_RELEASE_DATE "2006-01-12"
48 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2006 Kono, Furukawa, Naruse"
55 ** USAGE: nkf [flags] [file]
58 ** b Output is buffered (DEFAULT)
59 ** u Output is unbuffered
63 ** j Outout code is JIS 7 bit (DEFAULT SELECT)
64 ** s Output code is MS Kanji (DEFAULT SELECT)
65 ** e Output code is AT&T JIS (DEFAULT SELECT)
66 ** w Output code is AT&T JIS (DEFAULT SELECT)
67 ** l Output code is JIS 7bit and ISO8859-1 Latin-1
69 ** m MIME conversion for ISO-2022-JP
70 ** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
71 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
72 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
73 ** M MIME output conversion
75 ** r {de/en}crypt ROT13/47
79 ** T Text mode output (for MS-DOS)
81 ** x Do not convert X0201 kana into X0208
82 ** Z Convert X0208 alphabet to ASCII
87 ** B try to fix broken JIS, missing Escape
88 ** B[1-9] broken level
90 ** O Output to 'nkf.out' file or last file name
91 ** d Delete \r in line feed
92 ** c Add \r in line feed
93 ** -- other long option
94 ** -- ignore following option (don't use with -O )
98 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__)) && !defined(MSDOS)
100 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
116 #if defined(MSDOS) || defined(__OS2__)
123 #define setbinmode(fp) fsetbin(fp)
124 #else /* Microsoft C, Turbo C */
125 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
127 #else /* UNIX,OS/2 */
128 #define setbinmode(fp)
131 #ifdef _IOFBF /* SysV and MSDOS, Windows */
132 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
134 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
137 /*Borland C++ 4.5 EasyWin*/
138 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
147 /* added by satoru@isoternet.org */
148 #include <sys/stat.h>
149 #ifndef MSDOS /* UNIX, OS/2 */
152 #else /* defined(MSDOS) */
154 #ifdef __BORLANDC__ /* BCC32 */
156 #else /* !defined(__BORLANDC__) */
157 #include <sys/utime.h>
158 #endif /* (__BORLANDC__) */
159 #else /* !defined(__WIN32__) */
160 #if defined(_MSC_VER) || defined(__MINGW32__) /* VC++, MinGW */
161 #include <sys/utime.h>
162 #elif defined(__TURBOC__) /* BCC */
164 #elif defined(LSI_C) /* LSI C */
165 #endif /* (__WIN32__) */
177 /* state of output_mode and input_mode
194 #define X0213_1 0x2850
195 #define X0213_2 0x2850
197 /* Input Assumption */
201 #define LATIN1_INPUT 6
203 #define STRICT_MIME 8
208 #define JAPANESE_EUC 10
212 #define UTF8_INPUT 13
213 #define UTF16BE_INPUT 14
214 #define UTF16LE_INPUT 15
234 #define is_alnum(c) \
235 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
237 /* I don't trust portablity of toupper */
238 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
239 #define nkf_isoctal(c) ('0'<=c && c<='7')
240 #define nkf_isdigit(c) ('0'<=c && c<='9')
241 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
242 #define nkf_isblank(c) (c == SPACE || c == TAB)
243 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == NL)
244 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
245 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
246 #define hex2bin(x) ( nkf_isdigit(x) ? x - '0' : nkf_toupper(x) - 'A' + 10)
248 #define HOLD_SIZE 1024
249 #define IOBUF_SIZE 16384
251 #define DEFAULT_J 'B'
252 #define DEFAULT_R 'B'
254 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
255 #define SJ6394 0x0161 /* 63 - 94 ku offset */
257 #define RANGE_NUM_MAX 18
262 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
263 #define sizeof_euc_utf8 94
264 #define sizeof_euc_to_utf8_1byte 94
265 #define sizeof_euc_to_utf8_2bytes 94
266 #define sizeof_utf8_to_euc_C2 64
267 #define sizeof_utf8_to_euc_E5B8 64
268 #define sizeof_utf8_to_euc_2bytes 112
269 #define sizeof_utf8_to_euc_3bytes 16
272 /* MIME preprocessor */
274 #ifdef EASYWIN /*Easy Win */
275 extern POINT _BufferSize;
278 /* function prototype */
280 #ifdef ANSI_C_PROTOTYPE
282 #define STATIC static
296 void (*status_func)PROTO((struct input_code *, int));
297 int (*iconv_func)PROTO((int c2, int c1, int c0));
301 STATIC char *input_codename = "";
304 STATIC const char *CopyRight = COPY_RIGHT;
306 #if !defined(PERL_XS) && !defined(WIN32DLL)
307 STATIC int noconvert PROTO((FILE *f));
309 STATIC int kanji_convert PROTO((FILE *f));
310 STATIC int h_conv PROTO((FILE *f,int c2,int c1));
311 STATIC int push_hold_buf PROTO((int c2));
312 STATIC void set_iconv PROTO((int f, int (*iconv_func)(int c2,int c1,int c0)));
313 STATIC int s_iconv PROTO((int c2,int c1,int c0));
314 STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
315 STATIC int e_iconv PROTO((int c2,int c1,int c0));
316 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
317 /* Microsoft UCS Mapping Compatible
318 * 0: Shift_JIS, eucJP-ascii
323 #define MS_CODEPAGE 2
324 STATIC int ms_ucs_map_f = 0;
326 #ifdef UTF8_INPUT_ENABLE
327 /* don't convert characters when the mapping is not defined in the standard */
328 STATIC int strict_mapping_f = TRUE;
329 /* disable NEC special, NEC-selected IBM extended and IBM extended characters */
330 STATIC int disable_cp932ext_f = FALSE;
331 /* ignore ZERO WIDTH NO-BREAK SPACE */
332 STATIC int ignore_zwnbsp_f = TRUE;
333 /* don't convert characters that can't secure round trip convertion */
334 STATIC int unicode_round_trip_f = FALSE;
335 STATIC int unicode_subchar = '?'; /* the regular substitution character */
336 STATIC void encode_fallback_html PROTO((int c));
337 STATIC void encode_fallback_xml PROTO((int c));
338 STATIC void encode_fallback_java PROTO((int c));
339 STATIC void encode_fallback_perl PROTO((int c));
340 STATIC void encode_fallback_subchar PROTO((int c));
341 STATIC void (*encode_fallback)PROTO((int c)) = NULL;
342 STATIC int w2e_conv PROTO((int c2,int c1,int c0,int *p2,int *p1));
343 STATIC int w_iconv PROTO((int c2,int c1,int c0));
344 STATIC int w_iconv16 PROTO((int c2,int c1,int c0));
345 STATIC int unicode_to_jis_common PROTO((int c2,int c1,int c0,int *p2,int *p1));
346 STATIC int w_iconv_common PROTO((int c1,int c0,const unsigned short *const *pp,int psize,int *p2,int *p1));
347 STATIC int ww16_conv PROTO((int c2, int c1, int c0));
348 STATIC int w16e_conv PROTO((unsigned short val,int *p2,int *p1));
350 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
351 STATIC int internal_unicode_f = FALSE; /* Internal Unicode Processing */
353 #ifdef UTF8_OUTPUT_ENABLE
354 STATIC int unicode_bom_f= 0; /* Output Unicode BOM */
355 STATIC int w_oconv16_LE = 0; /* utf-16 little endian */
356 STATIC int e2w_conv PROTO((int c2,int c1));
357 STATIC void w_oconv PROTO((int c2,int c1));
358 STATIC void w_oconv16 PROTO((int c2,int c1));
360 STATIC void e_oconv PROTO((int c2,int c1));
361 STATIC int e2s_conv PROTO((int c2, int c1, int *p2, int *p1));
362 STATIC void s_oconv PROTO((int c2,int c1));
363 STATIC void j_oconv PROTO((int c2,int c1));
364 STATIC void fold_conv PROTO((int c2,int c1));
365 STATIC void cr_conv PROTO((int c2,int c1));
366 STATIC void z_conv PROTO((int c2,int c1));
367 STATIC void rot_conv PROTO((int c2,int c1));
368 STATIC void hira_conv PROTO((int c2,int c1));
369 STATIC void base64_conv PROTO((int c2,int c1));
370 STATIC void iso2022jp_check_conv PROTO((int c2,int c1));
371 STATIC void no_connection PROTO((int c2,int c1));
372 STATIC int no_connection2 PROTO((int c2,int c1,int c0));
374 STATIC void code_score PROTO((struct input_code *ptr));
375 STATIC void code_status PROTO((int c));
377 STATIC void std_putc PROTO((int c));
378 STATIC int std_getc PROTO((FILE *f));
379 STATIC int std_ungetc PROTO((int c,FILE *f));
381 STATIC int broken_getc PROTO((FILE *f));
382 STATIC int broken_ungetc PROTO((int c,FILE *f));
384 STATIC int mime_begin PROTO((FILE *f));
385 STATIC int mime_getc PROTO((FILE *f));
386 STATIC int mime_ungetc PROTO((int c,FILE *f));
388 STATIC int mime_begin_strict PROTO((FILE *f));
389 STATIC int mime_getc_buf PROTO((FILE *f));
390 STATIC int mime_ungetc_buf PROTO((int c,FILE *f));
391 STATIC int mime_integrity PROTO((FILE *f,const unsigned char *p));
393 STATIC int base64decode PROTO((int c));
394 STATIC void mime_prechar PROTO((int c2, int c1));
395 STATIC void mime_putc PROTO((int c));
396 STATIC void open_mime PROTO((int c));
397 STATIC void close_mime PROTO(());
399 STATIC void usage PROTO(());
400 STATIC void version PROTO(());
402 STATIC void options PROTO((unsigned char *c));
403 #if defined(PERL_XS) || defined(WIN32DLL)
404 STATIC void reinit PROTO(());
409 #if !defined(PERL_XS) && !defined(WIN32DLL)
410 STATIC unsigned char stdibuf[IOBUF_SIZE];
411 STATIC unsigned char stdobuf[IOBUF_SIZE];
413 STATIC unsigned char hold_buf[HOLD_SIZE*2];
414 STATIC int hold_count;
416 /* MIME preprocessor fifo */
418 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
419 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
420 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
421 STATIC unsigned char mime_buf[MIME_BUF_SIZE];
422 STATIC unsigned int mime_top = 0;
423 STATIC unsigned int mime_last = 0; /* decoded */
424 STATIC unsigned int mime_input = 0; /* undecoded */
425 STATIC int (*mime_iconv_back)PROTO((int c2,int c1,int c0)) = NULL;
428 STATIC int unbuf_f = FALSE;
429 STATIC int estab_f = FALSE;
430 STATIC int nop_f = FALSE;
431 STATIC int binmode_f = TRUE; /* binary mode */
432 STATIC int rot_f = FALSE; /* rot14/43 mode */
433 STATIC int hira_f = FALSE; /* hira/kata henkan */
434 STATIC int input_f = FALSE; /* non fixed input code */
435 STATIC int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
436 STATIC int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
437 STATIC int mime_decode_f = FALSE; /* mime decode is explicitly on */
438 STATIC int mimebuf_f = FALSE; /* MIME buffered input */
439 STATIC int broken_f = FALSE; /* convert ESC-less broken JIS */
440 STATIC int iso8859_f = FALSE; /* ISO8859 through */
441 STATIC int mimeout_f = FALSE; /* base64 mode */
442 #if defined(MSDOS) || defined(__OS2__)
443 STATIC int x0201_f = TRUE; /* Assume JISX0201 kana */
445 STATIC int x0201_f = NO_X0201; /* Assume NO JISX0201 */
447 STATIC int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
449 #ifdef UNICODE_NORMALIZATION
450 STATIC int nfc_f = FALSE;
451 STATIC int (*i_nfc_getc)PROTO((FILE *)) = std_getc; /* input of ugetc */
452 STATIC int (*i_nfc_ungetc)PROTO((int c ,FILE *f)) = std_ungetc;
453 STATIC int nfc_getc PROTO((FILE *f));
454 STATIC int nfc_ungetc PROTO((int c,FILE *f));
458 STATIC int cap_f = FALSE;
459 STATIC int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */
460 STATIC int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc;
461 STATIC int cap_getc PROTO((FILE *f));
462 STATIC int cap_ungetc PROTO((int c,FILE *f));
464 STATIC int url_f = FALSE;
465 STATIC int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
466 STATIC int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc;
467 STATIC int url_getc PROTO((FILE *f));
468 STATIC int url_ungetc PROTO((int c,FILE *f));
471 #ifdef NUMCHAR_OPTION
472 #define CLASS_MASK 0x0f000000
473 #define CLASS_UTF16 0x01000000
474 STATIC int numchar_f = FALSE;
475 STATIC int (*i_ngetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
476 STATIC int (*i_nungetc)PROTO((int c ,FILE *f)) = std_ungetc;
477 STATIC int numchar_getc PROTO((FILE *f));
478 STATIC int numchar_ungetc PROTO((int c,FILE *f));
482 STATIC int noout_f = FALSE;
483 STATIC void no_putc PROTO((int c));
484 STATIC int debug_f = FALSE;
485 STATIC void debug PROTO((const char *str));
486 STATIC int (*iconv_for_check)() = 0;
489 STATIC int guess_f = FALSE;
491 STATIC void print_guessed_code PROTO((char *filename));
493 STATIC void set_input_codename PROTO((char *codename));
494 STATIC int is_inputcode_mixed = FALSE;
495 STATIC int is_inputcode_set = FALSE;
498 STATIC int exec_f = 0;
501 #ifdef SHIFTJIS_CP932
502 /* invert IBM extended characters to others
503 and controls some UCS mapping for Microsoft Code Page */
504 STATIC int cp51932_f = TRUE;
505 #define CP932_TABLE_BEGIN (0xfa)
506 #define CP932_TABLE_END (0xfc)
508 /* invert NEC-selected IBM extended characters to IBM extended characters */
509 STATIC int cp932inv_f = TRUE;
510 #define CP932INV_TABLE_BEGIN (0xed)
511 #define CP932INV_TABLE_END (0xee)
513 /* STATIC int cp932_conv PROTO((int c2, int c1)); */
514 #endif /* SHIFTJIS_CP932 */
517 STATIC int x0212_f = FALSE;
518 STATIC int x0212_shift PROTO((int c));
519 STATIC int x0212_unshift PROTO((int c));
521 STATIC int x0213_f = FALSE;
523 STATIC unsigned char prefix_table[256];
525 STATIC void e_status PROTO((struct input_code *, int));
526 STATIC void s_status PROTO((struct input_code *, int));
528 #ifdef UTF8_INPUT_ENABLE
529 STATIC void w_status PROTO((struct input_code *, int));
530 STATIC void w16_status PROTO((struct input_code *, int));
531 STATIC int utf16_mode = UTF16BE_INPUT;
534 struct input_code input_code_list[] = {
535 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
536 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
537 #ifdef UTF8_INPUT_ENABLE
538 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
539 {"UTF-16", 0, 0, 0, {0, 0, 0}, w16_status, w_iconv16, 0},
544 STATIC int mimeout_mode = 0;
545 STATIC int base64_count = 0;
547 /* X0208 -> ASCII converter */
550 STATIC int f_line = 0; /* chars in line */
551 STATIC int f_prev = 0;
552 STATIC int fold_preserve_f = FALSE; /* preserve new lines */
553 STATIC int fold_f = FALSE;
554 STATIC int fold_len = 0;
557 STATIC unsigned char kanji_intro = DEFAULT_J;
558 STATIC unsigned char ascii_intro = DEFAULT_R;
562 #define FOLD_MARGIN 10
563 #define DEFAULT_FOLD 60
565 STATIC int fold_margin = FOLD_MARGIN;
569 #ifdef DEFAULT_CODE_JIS
570 # define DEFAULT_CONV j_oconv
572 #ifdef DEFAULT_CODE_SJIS
573 # define DEFAULT_CONV s_oconv
575 #ifdef DEFAULT_CODE_EUC
576 # define DEFAULT_CONV e_oconv
578 #ifdef DEFAULT_CODE_UTF8
579 # define DEFAULT_CONV w_oconv
582 /* process default */
583 STATIC void (*output_conv)PROTO((int c2,int c1)) = DEFAULT_CONV;
585 STATIC void (*oconv)PROTO((int c2,int c1)) = no_connection;
586 /* s_iconv or oconv */
587 STATIC int (*iconv)PROTO((int c2,int c1,int c0)) = no_connection2;
589 STATIC void (*o_zconv)PROTO((int c2,int c1)) = no_connection;
590 STATIC void (*o_fconv)PROTO((int c2,int c1)) = no_connection;
591 STATIC void (*o_crconv)PROTO((int c2,int c1)) = no_connection;
592 STATIC void (*o_rot_conv)PROTO((int c2,int c1)) = no_connection;
593 STATIC void (*o_hira_conv)PROTO((int c2,int c1)) = no_connection;
594 STATIC void (*o_base64conv)PROTO((int c2,int c1)) = no_connection;
595 STATIC void (*o_iso2022jp_check_conv)PROTO((int c2,int c1)) = no_connection;
597 /* STATIC redirections */
599 STATIC void (*o_putc)PROTO((int c)) = std_putc;
601 STATIC int (*i_getc)PROTO((FILE *f)) = std_getc; /* general input */
602 STATIC int (*i_ungetc)PROTO((int c,FILE *f)) =std_ungetc;
604 STATIC int (*i_bgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
605 STATIC int (*i_bungetc)PROTO((int c ,FILE *f)) = std_ungetc;
607 STATIC void (*o_mputc)PROTO((int c)) = std_putc ; /* output of mputc */
609 STATIC int (*i_mgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
610 STATIC int (*i_mungetc)PROTO((int c ,FILE *f)) = std_ungetc;
612 /* for strict mime */
613 STATIC int (*i_mgetc_buf)PROTO((FILE *)) = std_getc; /* input of mgetc_buf */
614 STATIC int (*i_mungetc_buf)PROTO((int c,FILE *f)) = std_ungetc;
617 STATIC int output_mode = ASCII, /* output kanji mode */
618 input_mode = ASCII, /* input kanji mode */
619 shift_mode = FALSE; /* TRUE shift out, or X0201 */
620 STATIC int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
622 /* X0201 / X0208 conversion tables */
624 /* X0201 kana conversion table */
627 unsigned char cv[]= {
628 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
629 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
630 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
631 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
632 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
633 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
634 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
635 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
636 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
637 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
638 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
639 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
640 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
641 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
642 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
643 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
647 /* X0201 kana conversion table for daguten */
650 unsigned char dv[]= {
651 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
652 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
653 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
654 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
655 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
656 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
657 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
658 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
659 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
660 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
661 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
662 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
663 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
664 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
665 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
666 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
669 /* X0201 kana conversion table for han-daguten */
672 unsigned char ev[]= {
673 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
674 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
675 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
676 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
677 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
678 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
679 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
680 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
681 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
682 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
683 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
684 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
685 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
686 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
687 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
688 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
692 /* X0208 kigou conversion table */
693 /* 0x8140 - 0x819e */
695 unsigned char fv[] = {
697 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
698 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
699 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
700 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
701 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
702 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
703 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
704 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
705 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
706 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
707 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
708 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
714 STATIC int file_out = FALSE;
716 STATIC int overwrite = FALSE;
719 STATIC int crmode_f = 0; /* CR, NL, CRLF */
720 #ifdef EASYWIN /*Easy Win */
721 STATIC int end_check;
724 #define STD_GC_BUFSIZE (256)
725 int std_gc_buf[STD_GC_BUFSIZE];
729 #include "nkf32dll.c"
730 #elif defined(PERL_XS)
740 char *outfname = NULL;
743 #ifdef EASYWIN /*Easy Win */
744 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
747 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
748 cp = (unsigned char *)*argv;
753 if (pipe(fds) < 0 || (pid = fork()) < 0){
764 execvp(argv[1], &argv[1]);
778 if(x0201_f == WISH_TRUE)
779 x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
781 if (binmode_f == TRUE)
783 if (freopen("","wb",stdout) == NULL)
790 setbuf(stdout, (char *) NULL);
792 setvbuffer(stdout, stdobuf, IOBUF_SIZE);
795 if (binmode_f == TRUE)
797 if (freopen("","rb",stdin) == NULL) return (-1);
801 setvbuffer(stdin, stdibuf, IOBUF_SIZE);
805 kanji_convert(stdin);
806 if (guess_f) print_guessed_code(NULL);
811 is_inputcode_mixed = FALSE;
812 is_inputcode_set = FALSE;
817 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
826 /* reopen file for stdout */
827 if (file_out == TRUE) {
830 outfname = malloc(strlen(origfname)
831 + strlen(".nkftmpXXXXXX")
837 strcpy(outfname, origfname);
841 for (i = strlen(outfname); i; --i){
842 if (outfname[i - 1] == '/'
843 || outfname[i - 1] == '\\'){
849 strcat(outfname, "ntXXXXXX");
851 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC,
854 strcat(outfname, ".nkftmpXXXXXX");
855 fd = mkstemp(outfname);
858 || (fd_backup = dup(fileno(stdout))) < 0
859 || dup2(fd, fileno(stdout)) < 0
870 outfname = "nkf.out";
873 if(freopen(outfname, "w", stdout) == NULL) {
877 if (binmode_f == TRUE) {
879 if (freopen("","wb",stdout) == NULL)
886 if (binmode_f == TRUE)
888 if (freopen("","rb",fin) == NULL)
893 setvbuffer(fin, stdibuf, IOBUF_SIZE);
897 char *filename = NULL;
899 if (nfiles > 1) filename = origfname;
900 if (guess_f) print_guessed_code(filename);
906 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
914 if (dup2(fd_backup, fileno(stdout)) < 0){
917 if (stat(origfname, &sb)) {
918 fprintf(stderr, "Can't stat %s\n", origfname);
920 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
921 if (chmod(outfname, sb.st_mode)) {
922 fprintf(stderr, "Can't set permission %s\n", outfname);
925 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
926 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
927 tb[0] = tb[1] = sb.st_mtime;
928 if (utime(outfname, tb)) {
929 fprintf(stderr, "Can't set timestamp %s\n", outfname);
932 tb.actime = sb.st_atime;
933 tb.modtime = sb.st_mtime;
934 if (utime(outfname, &tb)) {
935 fprintf(stderr, "Can't set timestamp %s\n", outfname);
939 if (unlink(origfname)){
943 if (rename(outfname, origfname)) {
945 fprintf(stderr, "Can't rename %s to %s\n",
946 outfname, origfname);
954 #ifdef EASYWIN /*Easy Win */
955 if (file_out == FALSE)
956 scanf("%d",&end_check);
959 #else /* for Other OS */
960 if (file_out == TRUE)
965 #endif /* WIN32DLL */
992 {"katakana-hiragana","h3"},
999 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1000 {"internal-unicode", ""},
1002 #ifdef UTF8_OUTPUT_ENABLE
1012 {"fb-subchar=", ""},
1014 #ifdef UTF8_INPUT_ENABLE
1015 {"utf8-input", "W"},
1016 {"utf16-input", "W16"},
1017 {"disable-cp932ext", ""},
1018 {"strict-mapping", ""},
1019 {"enable-round-trip",""},
1021 #ifdef UNICODE_NORMALIZATION
1022 {"utf8mac-input", ""},
1031 #ifdef NUMCHAR_OPTION
1032 {"numchar-input", ""},
1038 #ifdef SHIFTJIS_CP932
1048 STATIC int option_mode = 0;
1055 unsigned char *p = NULL;
1056 unsigned char *cp_back = NULL;
1057 unsigned char codeset[32];
1061 while(*cp && *cp++!='-');
1062 while (*cp || cp_back) {
1070 case '-': /* literal options */
1071 if (!*cp || *cp == SPACE) { /* ignore the rest of arguments */
1075 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1076 p = (unsigned char *)long_option[i].name;
1077 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1078 if (*p == cp[j] || cp[j] == ' '){
1085 while(*cp && *cp != SPACE && cp++);
1086 if (long_option[i].alias[0]){
1088 cp = (unsigned char *)long_option[i].alias;
1090 if (strcmp(long_option[i].name, "ic=") == 0){
1091 for (i=0; i < 16 && SPACE < p[i] && p[i] < DEL; i++){
1092 codeset[i] = nkf_toupper(p[i]);
1095 if(strcmp(codeset, "ISO-2022-JP") == 0){
1096 input_f = JIS_INPUT;
1097 }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1098 input_f = SJIS_INPUT;
1099 if (x0201_f==NO_X0201) x0201_f=TRUE;
1100 }else if(strcmp(codeset, "CP932") == 0){
1101 input_f = SJIS_INPUT;
1103 #ifdef SHIFTJIS_CP932
1107 #ifdef UTF8_OUTPUT_ENABLE
1110 }else if(strcmp(codeset, "EUCJP") == 0 ||
1111 strcmp(codeset, "EUC-JP") == 0){
1112 input_f = JIS_INPUT;
1113 }else if(strcmp(codeset, "CP51932") == 0){
1114 input_f = JIS_INPUT;
1116 #ifdef SHIFTJIS_CP932
1120 #ifdef UTF8_OUTPUT_ENABLE
1123 }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1124 strcmp(codeset, "EUCJP-MS") == 0){
1125 input_f = JIS_INPUT;
1127 #ifdef SHIFTJIS_CP932
1131 #ifdef UTF8_OUTPUT_ENABLE
1134 }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1135 strcmp(codeset, "EUCJP-ASCII") == 0){
1136 input_f = JIS_INPUT;
1138 #ifdef SHIFTJIS_CP932
1142 #ifdef UTF8_OUTPUT_ENABLE
1145 }else if(strcmp(codeset, "SHIFT_JISX0213") == 0){
1146 input_f = SJIS_INPUT;
1148 #ifdef SHIFTJIS_CP932
1152 if (x0201_f==NO_X0201) x0201_f=TRUE;
1153 }else if(strcmp(codeset, "EUC-JISX0213") == 0){
1154 input_f = JIS_INPUT;
1157 #ifdef SHIFTJIS_CP932
1161 #ifdef UTF8_INPUT_ENABLE
1162 }else if(strcmp(codeset, "UTF-8") == 0 ||
1163 strcmp(codeset, "UTF-8N") == 0 ||
1164 strcmp(codeset, "UTF-8-BOM") == 0){
1165 input_f = UTF8_INPUT;
1166 #ifdef UNICODE_NORMALIZATION
1167 }else if(strcmp(codeset, "UTF8-MAC") == 0 ||
1168 strcmp(codeset, "UTF-8-MAC") == 0){
1169 input_f = UTF8_INPUT;
1172 }else if(strcmp(codeset, "UTF-16") == 0){
1173 input_f = UTF16BE_INPUT;
1174 utf16_mode = UTF16BE_INPUT;
1175 }else if(strcmp(codeset, "UTF-16BE") == 0 ||
1176 strcmp(codeset, "UTF-16BE-BOM") == 0){
1177 input_f = UTF16BE_INPUT;
1178 utf16_mode = UTF16BE_INPUT;
1179 }else if(strcmp(codeset, "UTF-16LE") == 0 ||
1180 strcmp(codeset, "UTF-16LE-BOM") == 0){
1181 input_f = UTF16LE_INPUT;
1182 utf16_mode = UTF16LE_INPUT;
1187 if (strcmp(long_option[i].name, "oc=") == 0){
1188 for (i=0; i < 16 && SPACE < p[i] && p[i] < DEL; i++){
1189 codeset[i] = nkf_toupper(p[i]);
1192 if(strcmp(codeset, "ISO-2022-JP") == 0){
1193 output_conv = j_oconv;
1194 }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1195 output_conv = s_oconv;
1196 }else if(strcmp(codeset, "CP932") == 0){
1197 output_conv = s_oconv;
1199 #ifdef SHIFTJIS_CP932
1203 #ifdef UTF8_OUTPUT_ENABLE
1206 }else if(strcmp(codeset, "EUCJP") == 0 ||
1207 strcmp(codeset, "EUC-JP") == 0){
1208 output_conv = e_oconv;
1209 }else if(strcmp(codeset, "CP51932") == 0){
1210 output_conv = e_oconv;
1212 #ifdef SHIFTJIS_CP932
1216 #ifdef UTF8_OUTPUT_ENABLE
1219 }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1220 strcmp(codeset, "EUCJP-MS") == 0){
1221 output_conv = e_oconv;
1226 #ifdef SHIFTJIS_CP932
1229 #ifdef UTF8_OUTPUT_ENABLE
1232 }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1233 strcmp(codeset, "EUCJP-ASCII") == 0){
1234 output_conv = e_oconv;
1239 #ifdef SHIFTJIS_CP932
1242 #ifdef UTF8_OUTPUT_ENABLE
1245 }else if(strcmp(codeset, "SHIFT_JISX0213") == 0){
1246 output_conv = s_oconv;
1248 }else if(strcmp(codeset, "EUC-JISX0213") == 0){
1249 output_conv = e_oconv;
1254 #ifdef UTF8_OUTPUT_ENABLE
1255 }else if(strcmp(codeset, "UTF-8") == 0){
1256 output_conv = w_oconv;
1257 }else if(strcmp(codeset, "UTF-8N") == 0){
1258 output_conv = w_oconv;
1260 }else if(strcmp(codeset, "UTF-8-BOM") == 0){
1261 output_conv = w_oconv;
1263 }else if(strcmp(codeset, "UTF-16BE") == 0){
1264 output_conv = w_oconv16;
1266 }else if(strcmp(codeset, "UTF-16") == 0 ||
1267 strcmp(codeset, "UTF-16BE-BOM") == 0){
1268 output_conv = w_oconv16;
1270 }else if(strcmp(codeset, "UTF-16LE") == 0){
1271 output_conv = w_oconv16;
1274 }else if(strcmp(codeset, "UTF-16LE-BOM") == 0){
1275 output_conv = w_oconv16;
1283 if (strcmp(long_option[i].name, "overwrite") == 0){
1290 if (strcmp(long_option[i].name, "cap-input") == 0){
1294 if (strcmp(long_option[i].name, "url-input") == 0){
1299 #ifdef NUMCHAR_OPTION
1300 if (strcmp(long_option[i].name, "numchar-input") == 0){
1306 if (strcmp(long_option[i].name, "no-output") == 0){
1310 if (strcmp(long_option[i].name, "debug") == 0){
1315 if (strcmp(long_option[i].name, "cp932") == 0){
1316 #ifdef SHIFTJIS_CP932
1320 #ifdef UTF8_OUTPUT_ENABLE
1325 if (strcmp(long_option[i].name, "no-cp932") == 0){
1326 #ifdef SHIFTJIS_CP932
1330 #ifdef UTF8_OUTPUT_ENABLE
1335 #ifdef SHIFTJIS_CP932
1336 if (strcmp(long_option[i].name, "cp932inv") == 0){
1343 if (strcmp(long_option[i].name, "x0212") == 0){
1350 if (strcmp(long_option[i].name, "exec-in") == 0){
1354 if (strcmp(long_option[i].name, "exec-out") == 0){
1359 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1360 if (strcmp(long_option[i].name, "internal-unicode") == 0){
1361 internal_unicode_f = TRUE;
1364 if (strcmp(long_option[i].name, "disable-cp932ext") == 0){
1365 disable_cp932ext_f = TRUE;
1368 if (strcmp(long_option[i].name, "enable-round-trip") == 0){
1369 unicode_round_trip_f = TRUE;
1372 if (strcmp(long_option[i].name, "fb-skip") == 0){
1373 encode_fallback = NULL;
1376 if (strcmp(long_option[i].name, "fb-html") == 0){
1377 encode_fallback = encode_fallback_html;
1380 if (strcmp(long_option[i].name, "fb-xml" ) == 0){
1381 encode_fallback = encode_fallback_xml;
1384 if (strcmp(long_option[i].name, "fb-java") == 0){
1385 encode_fallback = encode_fallback_java;
1388 if (strcmp(long_option[i].name, "fb-perl") == 0){
1389 encode_fallback = encode_fallback_perl;
1392 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1393 encode_fallback = encode_fallback_subchar;
1396 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1397 encode_fallback = encode_fallback_subchar;
1398 unicode_subchar = 0;
1400 /* decimal number */
1401 for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1402 unicode_subchar *= 10;
1403 unicode_subchar += hex2bin(p[i]);
1405 }else if(p[1] == 'x' || p[1] == 'X'){
1406 /* hexadecimal number */
1407 for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1408 unicode_subchar <<= 4;
1409 unicode_subchar |= hex2bin(p[i]);
1413 for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1414 unicode_subchar *= 8;
1415 unicode_subchar += hex2bin(p[i]);
1418 w16e_conv(unicode_subchar, &i, &j);
1419 unicode_subchar = i<<8 | j;
1423 #ifdef UTF8_OUTPUT_ENABLE
1424 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1429 #ifdef UNICODE_NORMALIZATION
1430 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1431 input_f = UTF8_INPUT;
1436 if (strcmp(long_option[i].name, "prefix=") == 0){
1437 if (' ' < p[0] && p[0] < 128){
1438 for (i = 1; ' ' < p[i] && p[i] < 128; i++){
1439 prefix_table[p[i]] = p[0];
1446 case 'b': /* buffered mode */
1449 case 'u': /* non bufferd mode */
1452 case 't': /* transparent mode */
1455 case 'j': /* JIS output */
1457 output_conv = j_oconv;
1459 case 'e': /* AT&T EUC output */
1460 output_conv = e_oconv;
1462 case 's': /* SJIS output */
1463 output_conv = s_oconv;
1465 case 'l': /* ISO8859 Latin-1 support, no conversion */
1466 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
1467 input_f = LATIN1_INPUT;
1469 case 'i': /* Kanji IN ESC-$-@/B */
1470 if (*cp=='@'||*cp=='B')
1471 kanji_intro = *cp++;
1473 case 'o': /* ASCII IN ESC-(-J/B */
1474 if (*cp=='J'||*cp=='B'||*cp=='H')
1475 ascii_intro = *cp++;
1479 bit:1 katakana->hiragana
1480 bit:2 hiragana->katakana
1482 if ('9'>= *cp && *cp>='0')
1483 hira_f |= (*cp++ -'0');
1490 #if defined(MSDOS) || defined(__OS2__)
1505 #ifdef UTF8_OUTPUT_ENABLE
1506 case 'w': /* UTF-8 output */
1507 if ('1'== cp[0] && '6'==cp[1]) {
1508 output_conv = w_oconv16; cp+=2;
1510 unicode_bom_f=2; cp++;
1513 unicode_bom_f=1; cp++;
1515 } else if (cp[0] == 'B') {
1516 unicode_bom_f=2; cp++;
1518 unicode_bom_f=1; cp++;
1521 } else if (cp[0] == '8') {
1522 output_conv = w_oconv; cp++;
1525 unicode_bom_f=1; cp++;
1528 output_conv = w_oconv;
1531 #ifdef UTF8_INPUT_ENABLE
1532 case 'W': /* UTF-8 input */
1533 if ('1'== cp[0] && '6'==cp[1]) {
1534 input_f = UTF16BE_INPUT;
1535 utf16_mode = UTF16BE_INPUT;
1539 input_f = UTF16LE_INPUT;
1540 utf16_mode = UTF16LE_INPUT;
1541 } else if (cp[0] == 'B') {
1543 input_f = UTF16BE_INPUT;
1544 utf16_mode = UTF16BE_INPUT;
1546 } else if (cp[0] == '8') {
1548 input_f = UTF8_INPUT;
1550 input_f = UTF8_INPUT;
1553 /* Input code assumption */
1554 case 'J': /* JIS input */
1555 case 'E': /* AT&T EUC input */
1556 input_f = JIS_INPUT;
1558 case 'S': /* MS Kanji input */
1559 input_f = SJIS_INPUT;
1560 if (x0201_f==NO_X0201) x0201_f=TRUE;
1562 case 'Z': /* Convert X0208 alphabet to asii */
1563 /* bit:0 Convert X0208
1564 bit:1 Convert Kankaku to one space
1565 bit:2 Convert Kankaku to two spaces
1566 bit:3 Convert HTML Entity
1568 if ('9'>= *cp && *cp>='0')
1569 alpha_f |= 1<<(*cp++ -'0');
1573 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
1574 x0201_f = FALSE; /* No X0201->X0208 conversion */
1576 ESC-(-I in JIS, EUC, MS Kanji
1577 SI/SO in JIS, EUC, MS Kanji
1578 SSO in EUC, JIS, not in MS Kanji
1579 MS Kanji (0xa0-0xdf)
1581 ESC-(-I in JIS (0x20-0x5f)
1582 SSO in EUC (0xa0-0xdf)
1583 0xa0-0xd in MS Kanji (0xa0-0xdf)
1586 case 'X': /* Assume X0201 kana */
1587 /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1590 case 'F': /* prserve new lines */
1591 fold_preserve_f = TRUE;
1592 case 'f': /* folding -f60 or -f */
1595 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1597 fold_len += *cp++ - '0';
1599 if (!(0<fold_len && fold_len<BUFSIZ))
1600 fold_len = DEFAULT_FOLD;
1604 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1606 fold_margin += *cp++ - '0';
1610 case 'm': /* MIME support */
1611 /* mime_decode_f = TRUE; */ /* this has too large side effects... */
1612 if (*cp=='B'||*cp=='Q') {
1613 mime_decode_mode = *cp++;
1614 mimebuf_f = FIXED_MIME;
1615 } else if (*cp=='N') {
1616 mime_f = TRUE; cp++;
1617 } else if (*cp=='S') {
1618 mime_f = STRICT_MIME; cp++;
1619 } else if (*cp=='0') {
1620 mime_decode_f = FALSE;
1621 mime_f = FALSE; cp++;
1624 case 'M': /* MIME output */
1627 mimeout_f = FIXED_MIME; cp++;
1628 } else if (*cp=='Q') {
1630 mimeout_f = FIXED_MIME; cp++;
1635 case 'B': /* Broken JIS support */
1637 bit:1 allow any x on ESC-(-x or ESC-$-x
1638 bit:2 reset to ascii on NL
1640 if ('9'>= *cp && *cp>='0')
1641 broken_f |= 1<<(*cp++ -'0');
1646 case 'O':/* for Output file */
1650 case 'c':/* add cr code */
1653 case 'd':/* delete cr code */
1656 case 'I': /* ISO-2022-JP output */
1659 case 'L': /* line mode */
1660 if (*cp=='u') { /* unix */
1661 crmode_f = NL; cp++;
1662 } else if (*cp=='m') { /* mac */
1663 crmode_f = CR; cp++;
1664 } else if (*cp=='w') { /* windows */
1665 crmode_f = CRLF; cp++;
1666 } else if (*cp=='0') { /* no conversion */
1676 /* module muliple options in a string are allowed for Perl moudle */
1677 while(*cp && *cp++!='-');
1680 /* bogus option but ignored */
1686 #ifdef ANSI_C_PROTOTYPE
1687 struct input_code * find_inputcode_byfunc(int (*iconv_func)(int c2,int c1,int c0))
1689 struct input_code * find_inputcode_byfunc(iconv_func)
1690 int (*iconv_func)();
1694 struct input_code *p = input_code_list;
1696 if (iconv_func == p->iconv_func){
1705 #ifdef ANSI_C_PROTOTYPE
1706 void set_iconv(int f, int (*iconv_func)(int c2,int c1,int c0))
1708 void set_iconv(f, iconv_func)
1710 int (*iconv_func)();
1713 #ifdef INPUT_CODE_FIX
1721 #ifdef INPUT_CODE_FIX
1722 && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1728 if (estab_f && iconv_for_check != iconv){
1729 struct input_code *p = find_inputcode_byfunc(iconv);
1731 set_input_codename(p->name);
1732 debug(input_codename);
1734 iconv_for_check = iconv;
1739 #define SCORE_L2 (1) /*
\e$BBh
\e(B2
\e$B?e=`4A;z
\e(B */
1740 #define SCORE_KANA (SCORE_L2 << 1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
1741 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
1742 #ifdef SHIFTJIS_CP932
1743 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932
\e$B$K$h$kFI$_49$(
\e(B */
1744 #define SCORE_NO_EXIST (SCORE_CP932 << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1746 #define SCORE_NO_EXIST (SCORE_DEPEND << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1748 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME
\e$B$K$h$k;XDj
\e(B */
1749 #define SCORE_ERROR (SCORE_iMIME << 1) /*
\e$B%(%i!<
\e(B */
1751 #define SCORE_INIT (SCORE_iMIME)
1753 const int score_table_A0[] = {
1756 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1757 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1760 const int score_table_F0[] = {
1761 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1762 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1763 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1764 SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1767 void set_code_score(ptr, score)
1768 struct input_code *ptr;
1772 ptr->score |= score;
1776 void clr_code_score(ptr, score)
1777 struct input_code *ptr;
1781 ptr->score &= ~score;
1785 void code_score(ptr)
1786 struct input_code *ptr;
1788 int c2 = ptr->buf[0];
1789 #ifdef UTF8_OUTPUT_ENABLE
1790 int c1 = ptr->buf[1];
1793 set_code_score(ptr, SCORE_ERROR);
1794 }else if (c2 == SSO){
1795 set_code_score(ptr, SCORE_KANA);
1796 #ifdef UTF8_OUTPUT_ENABLE
1797 }else if (!e2w_conv(c2, c1)){
1798 set_code_score(ptr, SCORE_NO_EXIST);
1800 }else if ((c2 & 0x70) == 0x20){
1801 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1802 }else if ((c2 & 0x70) == 0x70){
1803 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1804 }else if ((c2 & 0x70) >= 0x50){
1805 set_code_score(ptr, SCORE_L2);
1809 void status_disable(ptr)
1810 struct input_code *ptr;
1815 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1818 void status_push_ch(ptr, c)
1819 struct input_code *ptr;
1822 ptr->buf[ptr->index++] = c;
1825 void status_clear(ptr)
1826 struct input_code *ptr;
1832 void status_reset(ptr)
1833 struct input_code *ptr;
1836 ptr->score = SCORE_INIT;
1839 void status_reinit(ptr)
1840 struct input_code *ptr;
1843 ptr->_file_stat = 0;
1846 void status_check(ptr, c)
1847 struct input_code *ptr;
1850 if (c <= DEL && estab_f){
1855 void s_status(ptr, c)
1856 struct input_code *ptr;
1861 status_check(ptr, c);
1866 #ifdef NUMCHAR_OPTION
1867 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1870 }else if (0xa1 <= c && c <= 0xdf){
1871 status_push_ch(ptr, SSO);
1872 status_push_ch(ptr, c);
1875 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
1877 status_push_ch(ptr, c);
1878 #ifdef SHIFTJIS_CP932
1880 && CP932_TABLE_BEGIN <= c && c <= CP932_TABLE_END){
1882 status_push_ch(ptr, c);
1883 #endif /* SHIFTJIS_CP932 */
1885 }else if (x0212_f && 0xf0 <= c && c <= 0xfc){
1887 status_push_ch(ptr, c);
1888 #endif /* X0212_ENABLE */
1890 status_disable(ptr);
1894 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1895 status_push_ch(ptr, c);
1896 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
1900 status_disable(ptr);
1904 #ifdef SHIFTJIS_CP932
1905 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1906 status_push_ch(ptr, c);
1907 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
1908 set_code_score(ptr, SCORE_CP932);
1913 #endif /* SHIFTJIS_CP932 */
1914 #ifndef X0212_ENABLE
1915 status_disable(ptr);
1921 void e_status(ptr, c)
1922 struct input_code *ptr;
1927 status_check(ptr, c);
1932 #ifdef NUMCHAR_OPTION
1933 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1936 }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
1938 status_push_ch(ptr, c);
1940 }else if (0x8f == c){
1942 status_push_ch(ptr, c);
1943 #endif /* X0212_ENABLE */
1945 status_disable(ptr);
1949 if (0xa1 <= c && c <= 0xfe){
1950 status_push_ch(ptr, c);
1954 status_disable(ptr);
1959 if (0xa1 <= c && c <= 0xfe){
1961 status_push_ch(ptr, c);
1963 status_disable(ptr);
1965 #endif /* X0212_ENABLE */
1969 #ifdef UTF8_INPUT_ENABLE
1970 void w16_status(ptr, c)
1971 struct input_code *ptr;
1978 if (ptr->_file_stat == 0){
1979 if (c == 0xfe || c == 0xff){
1981 status_push_ch(ptr, c);
1982 ptr->_file_stat = 1;
1984 status_disable(ptr);
1985 ptr->_file_stat = -1;
1987 }else if (ptr->_file_stat > 0){
1989 status_push_ch(ptr, c);
1990 }else if (ptr->_file_stat < 0){
1991 status_disable(ptr);
1997 status_disable(ptr);
1998 ptr->_file_stat = -1;
2000 status_push_ch(ptr, c);
2007 if (ptr->stat != c && (c == 0xfe || c == 0xff)){
2008 status_push_ch(ptr, c);
2011 status_disable(ptr);
2012 ptr->_file_stat = -1;
2018 void w_status(ptr, c)
2019 struct input_code *ptr;
2024 status_check(ptr, c);
2029 #ifdef NUMCHAR_OPTION
2030 }else if ((c & CLASS_MASK) == CLASS_UTF16){
2033 }else if (0xc0 <= c && c <= 0xdf){
2035 status_push_ch(ptr, c);
2036 }else if (0xe0 <= c && c <= 0xef){
2038 status_push_ch(ptr, c);
2040 status_disable(ptr);
2045 if (0x80 <= c && c <= 0xbf){
2046 status_push_ch(ptr, c);
2047 if (ptr->index > ptr->stat){
2048 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2049 && ptr->buf[2] == 0xbf);
2050 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2051 &ptr->buf[0], &ptr->buf[1]);
2058 status_disable(ptr);
2069 int action_flag = 1;
2070 struct input_code *result = 0;
2071 struct input_code *p = input_code_list;
2073 (p->status_func)(p, c);
2076 }else if(p->stat == 0){
2087 if (result && !estab_f){
2088 set_iconv(TRUE, result->iconv_func);
2089 }else if (c <= DEL){
2090 struct input_code *ptr = input_code_list;
2105 return std_gc_buf[--std_gc_ndx];
2116 if (std_gc_ndx == STD_GC_BUFSIZE){
2119 std_gc_buf[std_gc_ndx++] = c;
2133 #if !defined(PERL_XS) && !defined(WIN32DLL)
2140 while ((c = (*i_getc)(f)) != EOF)
2149 oconv = output_conv;
2152 /* replace continucation module, from output side */
2154 /* output redicrection */
2156 if (noout_f || guess_f){
2163 if (mimeout_f == TRUE) {
2164 o_base64conv = oconv; oconv = base64_conv;
2166 /* base64_count = 0; */
2170 o_crconv = oconv; oconv = cr_conv;
2173 o_rot_conv = oconv; oconv = rot_conv;
2176 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2179 o_hira_conv = oconv; oconv = hira_conv;
2182 o_fconv = oconv; oconv = fold_conv;
2185 if (alpha_f || x0201_f) {
2186 o_zconv = oconv; oconv = z_conv;
2190 i_ungetc = std_ungetc;
2191 /* input redicrection */
2194 i_cgetc = i_getc; i_getc = cap_getc;
2195 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2198 i_ugetc = i_getc; i_getc = url_getc;
2199 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2202 #ifdef NUMCHAR_OPTION
2204 i_ngetc = i_getc; i_getc = numchar_getc;
2205 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2208 #ifdef UNICODE_NORMALIZATION
2209 if (nfc_f && input_f == UTF8_INPUT){
2210 i_nfc_getc = i_getc; i_getc = nfc_getc;
2211 i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2214 if (mime_f && mimebuf_f==FIXED_MIME) {
2215 i_mgetc = i_getc; i_getc = mime_getc;
2216 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2219 i_bgetc = i_getc; i_getc = broken_getc;
2220 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2222 if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
2223 set_iconv(-TRUE, e_iconv);
2224 } else if (input_f == SJIS_INPUT) {
2225 set_iconv(-TRUE, s_iconv);
2226 #ifdef UTF8_INPUT_ENABLE
2227 } else if (input_f == UTF8_INPUT) {
2228 set_iconv(-TRUE, w_iconv);
2229 } else if (input_f == UTF16BE_INPUT) {
2230 set_iconv(-TRUE, w_iconv16);
2231 } else if (input_f == UTF16LE_INPUT) {
2232 set_iconv(-TRUE, w_iconv16);
2235 set_iconv(FALSE, e_iconv);
2239 struct input_code *p = input_code_list;
2247 Conversion main loop. Code detection only.
2256 int is_8bit = FALSE;
2258 module_connection();
2261 if(input_f == SJIS_INPUT
2262 #ifdef UTF8_INPUT_ENABLE
2263 || input_f == UTF8_INPUT || input_f == UTF16BE_INPUT
2271 output_mode = ASCII;
2274 #define NEXT continue /* no output, get next */
2275 #define SEND ; /* output c1 and c2, get next */
2276 #define LAST break /* end of loop, go closing */
2278 while ((c1 = (*i_getc)(f)) != EOF) {
2283 /* in case of 8th bit is on */
2284 if (!estab_f&&!mime_decode_mode) {
2285 /* in case of not established yet */
2286 /* It is still ambiguious */
2287 if (h_conv(f, c2, c1)==EOF)
2293 /* in case of already established */
2295 /* ignore bogus code */
2301 /* second byte, 7 bit code */
2302 /* it might be kanji shitfted */
2303 if ((c1 == DEL) || (c1 <= SPACE)) {
2304 /* ignore bogus first code */
2312 #ifdef UTF8_INPUT_ENABLE
2321 #ifdef NUMCHAR_OPTION
2322 } else if ((c1 & CLASS_MASK) == CLASS_UTF16){
2325 } else if (c1 > DEL) {
2327 if (!estab_f && !iso8859_f) {
2328 /* not established yet */
2329 if (!is_8bit) is_8bit = TRUE;
2332 } else { /* estab_f==TRUE */
2337 } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2338 /* SJIS X0201 Case... */
2339 if(iso2022jp_f && x0201_f==NO_X0201) {
2340 (*oconv)(GETA1, GETA2);
2347 } else if (c1==SSO && iconv != s_iconv) {
2348 /* EUC X0201 Case */
2349 c1 = (*i_getc)(f); /* skip SSO */
2351 if (SSP<=c1 && c1<0xe0) {
2352 if(iso2022jp_f && x0201_f==NO_X0201) {
2353 (*oconv)(GETA1, GETA2);
2360 } else { /* bogus code, skip SSO and one byte */
2364 /* already established */
2369 } else if ((c1 > SPACE) && (c1 != DEL)) {
2370 /* in case of Roman characters */
2372 /* output 1 shifted byte */
2376 } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
2377 /* output 1 shifted byte */
2378 if(iso2022jp_f && x0201_f==NO_X0201) {
2379 (*oconv)(GETA1, GETA2);
2386 /* look like bogus code */
2389 } else if (input_mode == X0208) {
2390 /* in case of Kanji shifted */
2393 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
2394 /* Check MIME code */
2395 if ((c1 = (*i_getc)(f)) == EOF) {
2398 } else if (c1 == '?') {
2399 /* =? is mime conversion start sequence */
2400 if(mime_f == STRICT_MIME) {
2401 /* check in real detail */
2402 if (mime_begin_strict(f) == EOF)
2406 } else if (mime_begin(f) == EOF)
2416 /* normal ASCII code */
2419 } else if (!is_8bit && c1 == SI) {
2422 } else if (!is_8bit && c1 == SO) {
2425 } else if (!is_8bit && c1 == ESC ) {
2426 if ((c1 = (*i_getc)(f)) == EOF) {
2427 /* (*oconv)(0, ESC); don't send bogus code */
2429 } else if (c1 == '$') {
2430 if ((c1 = (*i_getc)(f)) == EOF) {
2432 (*oconv)(0, ESC); don't send bogus code
2433 (*oconv)(0, '$'); */
2435 } else if (c1 == '@'|| c1 == 'B') {
2436 /* This is kanji introduction */
2439 set_input_codename("ISO-2022-JP");
2441 debug(input_codename);
2444 } else if (c1 == '(') {
2445 if ((c1 = (*i_getc)(f)) == EOF) {
2446 /* don't send bogus code
2452 } else if (c1 == '@'|| c1 == 'B') {
2453 /* This is kanji introduction */
2458 } else if (c1 == 'D'){
2462 #endif /* X0212_ENABLE */
2464 /* could be some special code */
2471 } else if (broken_f&0x2) {
2472 /* accept any ESC-(-x as broken code ... */
2482 } else if (c1 == '(') {
2483 if ((c1 = (*i_getc)(f)) == EOF) {
2484 /* don't send bogus code
2486 (*oconv)(0, '('); */
2490 /* This is X0201 kana introduction */
2491 input_mode = X0201; shift_mode = X0201;
2493 } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2494 /* This is X0208 kanji introduction */
2495 input_mode = ASCII; shift_mode = FALSE;
2497 } else if (broken_f&0x2) {
2498 input_mode = ASCII; shift_mode = FALSE;
2503 /* maintain various input_mode here */
2507 } else if ( c1 == 'N' || c1 == 'n' ){
2509 c3 = (*i_getc)(f); /* skip SS2 */
2510 if ( (SPACE<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2525 } else if ((c1 == NL || c1 == CR) && broken_f&4) {
2526 input_mode = ASCII; set_iconv(FALSE, 0);
2528 } else if (c1 == NL && mime_decode_f && !mime_decode_mode ) {
2529 if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2537 } else if (c1 == CR && mime_decode_f && !mime_decode_mode ) {
2538 if ((c1=(*i_getc)(f))!=EOF) {
2542 } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2558 if (input_mode == X0208)
2559 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2561 else if (input_mode == X0212)
2562 (*oconv)((0x8f << 8) | c2, c1);
2563 #endif /* X0212_ENABLE */
2564 else if (input_mode)
2565 (*oconv)(input_mode, c1); /* other special case */
2566 else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */
2567 int c0 = (*i_getc)(f);
2570 (*iconv)(c2, c1, c0);
2576 /* goto next_word */
2580 (*iconv)(EOF, 0, 0);
2581 if (!is_inputcode_set)
2584 struct input_code *p = input_code_list;
2585 struct input_code *result = p;
2587 if (p->score < result->score) result = p;
2590 set_input_codename(result->name);
2605 /** it must NOT be in the kanji shifte sequence */
2606 /** it must NOT be written in JIS7 */
2607 /** and it must be after 2 byte 8bit code */
2614 while ((c1 = (*i_getc)(f)) != EOF) {
2620 if (push_hold_buf(c1) == EOF || estab_f){
2626 struct input_code *p = input_code_list;
2627 struct input_code *result = p;
2632 if (p->score < result->score){
2637 set_iconv(FALSE, result->iconv_func);
2642 ** 1) EOF is detected, or
2643 ** 2) Code is established, or
2644 ** 3) Buffer is FULL (but last word is pushed)
2646 ** in 1) and 3) cases, we continue to use
2647 ** Kanji codes by oconv and leave estab_f unchanged.
2652 while (wc < hold_count){
2653 c2 = hold_buf[wc++];
2655 #ifdef NUMCHAR_OPTION
2656 || (c2 & CLASS_MASK) == CLASS_UTF16
2661 }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
2662 (*iconv)(X0201, c2, 0);
2665 if (wc < hold_count){
2666 c1 = hold_buf[wc++];
2675 if ((*iconv)(c2, c1, 0) < 0){
2677 if (wc < hold_count){
2678 c0 = hold_buf[wc++];
2687 (*iconv)(c2, c1, c0);
2700 if (hold_count >= HOLD_SIZE*2)
2702 hold_buf[hold_count++] = c2;
2703 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
2706 const int shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
2708 int s2e_conv(c2, c1, p2, p1)
2712 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
2715 #ifdef SHIFTJIS_CP932
2716 if (cp51932_f && CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){
2717 extern const unsigned short shiftjis_cp932[3][189];
2718 val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
2724 #endif /* SHIFTJIS_CP932 */
2726 if (!x0213_f && x0212_f && 0xfa <= c2 && c2 <= 0xfc){
2727 extern const unsigned short shiftjis_x0212[3][189];
2728 val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
2731 c2 = (0x8f << 8) | (val >> 8);
2744 if(x0213_f && c2 >= 0xF0){
2745 if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
2746 c2 = 0x8F20 + shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
2747 }else{ /* 78<=k<=94 */
2748 c2 = 0x8F00 | (c2 * 2 - 0x17B);
2749 if (0x9E < c1) c2++;
2752 c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
2753 if (0x9E < c1) c2++;
2756 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1F);
2764 c2 = x0212_unshift(c2);
2779 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2782 int ret = s2e_conv(c2, c1, &c2, &c1);
2783 if (ret) return ret;
2797 }else if (c2 == 0x8f){
2801 c2 = (c2 << 8) | (c1 & 0x7f);
2803 #ifdef SHIFTJIS_CP932
2806 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2807 s2e_conv(s2, s1, &c2, &c1);
2808 if ((c2 & 0xff00) == 0){
2814 #endif /* SHIFTJIS_CP932 */
2815 #endif /* X0212_ENABLE */
2816 } else if (c2 == SSO){
2819 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2829 #ifdef UTF8_INPUT_ENABLE
2831 w2e_conv(c2, c1, c0, p2, p1)
2840 }else if (0xc0 <= c2 && c2 <= 0xef) {
2841 ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
2842 #ifdef NUMCHAR_OPTION
2845 if (p1) *p1 = CLASS_UTF16 | ww16_conv(c2, c1, c0);
2860 /* throw away ZERO WIDTH NO-BREAK SPACE (U+FEFF) */
2861 if(ignore_zwnbsp_f){
2862 ignore_zwnbsp_f = FALSE;
2863 if(c2 == 0xef && c1 == 0xbb && c0 == 0xbf)
2867 if (c2 == 0) /* 0x00-0x7f */
2868 c1 &= 0x7F; /* 1byte */
2870 if ((c2 & 0xe0) == 0xc0){ /* 0xc0-0xdf */
2872 if((c2 & 0xFE) == 0xC0 || c1 < 0x80 || 0xBF < c1) return 0;
2873 }else if ((c2 & 0xf0) == 0xe0) /* 0xe0-0xef */
2874 return -1; /* 3bytes */
2876 else if (0xf0 <= c2)
2877 return 0; /* 4,5,6bytes */
2878 else if ((c2 & 0xc0) == 0x80) /* 0x80-0xbf */
2879 return 0; /* trail byte */
2883 /* must be 3bytes */
2885 if(c1 < 0xA0 || 0xBF < c1 || c0 < 0x80 || 0xBF < c0)
2887 }else if(c2 == 0xED){
2888 if(c1 < 0x80 || 0x9F < c1 || c0 < 0x80 || 0xBF < c0)
2890 }else if((c2 & 0xf0) == 0xe0){
2891 if(c1 < 0x80 || 0xBF < c1 || c0 < 0x80 || 0xBF < c0)
2895 if (c2 == 0 || c2 == EOF){
2896 #ifdef UTF8_OUTPUT_ENABLE
2897 } else if (internal_unicode_f && (output_conv == w_oconv || output_conv == w_oconv16)){
2898 unsigned short val = 0;
2903 val = ww16_conv(c2, c1, c0);
2904 c2 = (val >> 8) & 0xff;
2908 ret = w2e_conv(c2, c1, c0, &c2, &c1);
2917 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
2919 w16w_conv(val, p2, p1, p0)
2927 }else if (val < 0x800){
2928 *p2 = 0xc0 | (val >> 6);
2929 *p1 = 0x80 | (val & 0x3f);
2932 *p2 = 0xe0 | (val >> 12);
2933 *p1 = 0x80 | ((val >> 6) & 0x3f);
2934 *p0 = 0x80 | (val & 0x3f);
2939 #ifdef UTF8_INPUT_ENABLE
2941 ww16_conv(c2, c1, c0)
2947 }else if (c2 >= 0xe0){
2948 val = (c2 & 0x0f) << 12;
2949 val |= (c1 & 0x3f) << 6;
2951 }else if (c2 >= 0xc0){
2952 val = (c2 & 0x1f) << 6;
2961 w16e_conv(val, p2, p1)
2987 w16w_conv(val, &c2, &c1, &c0);
2988 ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
2989 #ifdef NUMCHAR_OPTION
2992 *p1 = CLASS_UTF16 | val;
3001 #ifdef UTF8_INPUT_ENABLE
3003 w_iconv16(c2, c1, c0)
3008 /* throw away ZERO WIDTH NO-BREAK SPACE (U+FEFF) */
3009 if(ignore_zwnbsp_f){
3010 ignore_zwnbsp_f = FALSE;
3011 if (c2==0376 && c1==0377){
3012 utf16_mode = UTF16BE_INPUT;
3014 }else if(c2==0377 && c1==0376){
3015 utf16_mode = UTF16LE_INPUT;
3019 if (c2 != EOF && utf16_mode == UTF16LE_INPUT) {
3021 tmp=c1; c1=c2; c2=tmp;
3023 if ((c2==0 && c1 < 0x80) || c2==EOF) {
3026 }else if((c2>>3)==27){ /* surrogate pair */
3028 #ifdef UTF8_OUTPUT_ENABLE
3029 }else if (internal_unicode_f && (output_conv == w_oconv || output_conv == w_oconv16)){
3031 }else ret = w16e_conv(((c2<<8)&0xff00) + c1, &c2, &c1);
3032 if (ret) return ret;
3038 unicode_to_jis_common(c2, c1, c0, p2, p1)
3042 extern const unsigned short *const utf8_to_euc_2bytes[];
3043 extern const unsigned short *const *const utf8_to_euc_3bytes[];
3047 if (ms_ucs_map_f == 2){
3048 /* CP932/CP51932: U+00A6 (BROKEN BAR) -> not 0x8fa2c3, but 0x7c */
3061 }else if(strict_mapping_f){
3065 case 0xAB: case 0xAD: case 0xB2: case 0xB3:
3066 case 0xB5: case 0xB7: case 0xB9: case 0xBB:
3078 ret = w_iconv_common(c2, c1, utf8_to_euc_2bytes, sizeof_utf8_to_euc_2bytes, p2, p1);
3079 if(!ret && !ms_ucs_map_f
3084 if(*p2 == 0 && *p1 < 0x80){
3086 }else if(*p2 > 0xFF){
3088 if (e2s_conv(*p2, *p1, &s2, &s1) == 0){
3089 s2e_conv(s2, s1, p2, p1);
3090 if(*p2 == 0 && *p1 < 0x80)
3096 if(unicode_round_trip_f){
3101 if(c0 == 0x95) return 1;
3104 if(c0 == 0xA5) return 1;
3111 if(c0 == 0xBF) return 1;
3114 if(c0 == 0x8D) return 1;
3117 if(0xA0 <= c0 && c0 <= 0xA5) return 1;
3125 if(c2 == 0xE2 && c1 == 0x80 && c0 == 0xBE){
3129 }else if(c2 == 0xEF && c1 == 0xBD && c0 == 0x9E){
3130 if (p2) *p2 = 0x8F22;
3135 if(!strict_mapping_f);
3136 else if(ms_ucs_map_f == 2){
3137 /* Microsoft Code Page */
3143 case 0x94: case 0x96: case 0xBE:
3164 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94)
3167 ret = w_iconv_common(c1, c0, utf8_to_euc_3bytes[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
3173 w_iconv_common(c1, c0, pp, psize, p2, p1)
3175 const unsigned short *const *pp;
3180 const unsigned short *p;
3183 if (pp == 0) return 1;
3186 if (c1 < 0 || psize <= c1) return 1;
3188 if (p == 0) return 1;
3191 if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1;
3193 if (val == 0) return 1;
3194 if (disable_cp932ext_f && (
3195 (val>>8) == 0x2D || /* disable NEC special characters */
3196 val > 0xF300 /* disable NEC special characters */
3204 if (c2 == SO) c2 = X0201;
3212 nkf_each_char_to_hex(f, c)
3213 void (*f)PROTO((int c2,int c1));
3216 const char *hex = "0123456789ABCDEF";
3222 (*f)(0, hex[(c>>shift)&0xF]);
3233 encode_fallback_html(c)
3240 (*oconv)(0, 0x30+(c/1000000)%10);
3242 (*oconv)(0, 0x30+(c/100000 )%10);
3244 (*oconv)(0, 0x30+(c/10000 )%10);
3246 (*oconv)(0, 0x30+(c/1000 )%10);
3248 (*oconv)(0, 0x30+(c/100 )%10);
3250 (*oconv)(0, 0x30+(c/10 )%10);
3252 (*oconv)(0, 0x30+ c %10);
3258 encode_fallback_xml(c)
3264 nkf_each_char_to_hex(oconv, c);
3270 encode_fallback_java(c)
3273 const char *hex = "0123456789ABCDEF";
3275 if((c&0x00FFFFFF) > 0xFFFF){
3279 (*oconv)(0, hex[(c>>20)&0xF]);
3280 (*oconv)(0, hex[(c>>16)&0xF]);
3284 (*oconv)(0, hex[(c>>12)&0xF]);
3285 (*oconv)(0, hex[(c>> 8)&0xF]);
3286 (*oconv)(0, hex[(c>> 4)&0xF]);
3287 (*oconv)(0, hex[ c &0xF]);
3292 encode_fallback_perl(c)
3298 nkf_each_char_to_hex(oconv, c);
3304 encode_fallback_subchar(c)
3307 c = unicode_subchar;
3308 (*oconv)((c>>8)&0xFF, c&0xFF);
3314 (*oconv)(0, (c>>shift)&0xFF);
3325 #ifdef UTF8_OUTPUT_ENABLE
3330 extern const unsigned short euc_to_utf8_1byte[];
3331 extern const unsigned short *const euc_to_utf8_2bytes[];
3332 extern const unsigned short *const euc_to_utf8_2bytes_ms[];
3333 const unsigned short *p;
3336 p = euc_to_utf8_1byte;
3338 } else if (c2 >> 8 == 0x8f){
3339 if(!ms_ucs_map_f && c2 == 0x8F22 && c1 == 0x43){
3342 extern const unsigned short *const x0212_to_utf8_2bytes[];
3343 c2 = (c2&0x7f) - 0x21;
3344 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3345 p = x0212_to_utf8_2bytes[c2];
3351 c2 = (c2&0x7f) - 0x21;
3352 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3353 p = ms_ucs_map_f ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
3358 c1 = (c1 & 0x7f) - 0x21;
3359 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
3376 if (unicode_bom_f==2) {
3383 #ifdef NUMCHAR_OPTION
3384 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3385 w16w_conv(c1, &c2, &c1, &c0);
3389 if (c0) (*o_putc)(c0);
3396 output_mode = ASCII;
3398 } else if (c2 == ISO8859_1) {
3399 output_mode = ISO8859_1;
3400 (*o_putc)(c1 | 0x080);
3403 #ifdef UTF8_INPUT_ENABLE
3404 if (internal_unicode_f && (iconv == w_iconv || iconv == w_iconv16))
3405 val = ((c2<<8)&0xff00) + c1;
3408 val = e2w_conv(c2, c1);
3410 w16w_conv(val, &c2, &c1, &c0);
3414 if (c0) (*o_putc)(c0);
3430 if (unicode_bom_f==2) {
3432 (*o_putc)((unsigned char)'\377');
3436 (*o_putc)((unsigned char)'\377');
3441 #ifdef UTF8_INPUT_ENABLE
3442 if (internal_unicode_f && (iconv == w_iconv || iconv == w_iconv16)){
3445 if (c2 == ISO8859_1) {
3448 #ifdef NUMCHAR_OPTION
3449 } else if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16) {
3450 c2 = (c1 >> 8) & 0xff;
3454 unsigned short val = e2w_conv(c2, c1);
3455 c2 = (val >> 8) & 0xff;
3474 #ifdef NUMCHAR_OPTION
3475 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3476 w16e_conv(c1, &c2, &c1);
3477 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3478 if(encode_fallback)(*encode_fallback)(c1);
3486 } else if (c2 == 0) {
3487 output_mode = ASCII;
3489 } else if (c2 == X0201) {
3490 output_mode = JAPANESE_EUC;
3491 (*o_putc)(SSO); (*o_putc)(c1|0x80);
3492 } else if (c2 == ISO8859_1) {
3493 output_mode = ISO8859_1;
3494 (*o_putc)(c1 | 0x080);
3496 } else if ((c2 & 0xff00) >> 8 == 0x8f){
3497 output_mode = JAPANESE_EUC;
3498 #ifdef SHIFTJIS_CP932
3501 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3502 s2e_conv(s2, s1, &c2, &c1);
3507 output_mode = ASCII;
3509 }else if ((c2 & 0xff00) >> 8 == 0x8f){
3512 (*o_putc)((c2 & 0x7f) | 0x080);
3513 (*o_putc)(c1 | 0x080);
3516 (*o_putc)((c2 & 0x7f) | 0x080);
3517 (*o_putc)(c1 | 0x080);
3521 if ((c1<0x21 || 0x7e<c1) ||
3522 (c2<0x21 || 0x7e<c2)) {
3523 set_iconv(FALSE, 0);
3524 return; /* too late to rescue this char */
3526 output_mode = JAPANESE_EUC;
3527 (*o_putc)(c2 | 0x080);
3528 (*o_putc)(c1 | 0x080);
3538 if ((ret & 0xff00) == 0x8f00){
3539 if (0x75 <= c && c <= 0x7f){
3540 ret = c + (0x109 - 0x75);
3543 if (0x75 <= c && c <= 0x7f){
3544 ret = c + (0x113 - 0x75);
3551 int x0212_unshift(c)
3555 if (0x7f <= c && c <= 0x88){
3556 ret = c + (0x75 - 0x7f);
3557 }else if (0x89 <= c && c <= 0x92){
3558 ret = (0x8f << 8) | 0x80 | (c + (0x75 - 0x89));
3562 #endif /* X0212_ENABLE */
3565 e2s_conv(c2, c1, p2, p1)
3566 int c2, c1, *p2, *p1;
3569 if ((c2 & 0xff00) == 0x8f00){
3572 if((0x21 <= ndx && ndx <= 0x2F)){
3573 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
3574 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
3576 }else if(0x6E <= ndx && ndx <= 0x7E){
3577 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
3578 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
3584 else if(0x21 <= ndx && ndx <= 0x7e){
3586 const unsigned short *ptr;
3587 extern const unsigned short *const x0212_shiftjis[];
3589 ptr = x0212_shiftjis[ndx - 0x21];
3591 val = ptr[(c1 & 0x7f) - 0x21];
3600 c2 = x0212_shift(c2);
3602 #endif /* X0212_ENABLE */
3604 if(0x7F < c2) return 1;
3605 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
3606 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
3615 #ifdef NUMCHAR_OPTION
3616 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3617 w16e_conv(c1, &c2, &c1);
3618 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3619 if(encode_fallback)(*encode_fallback)(c1);
3627 } else if (c2 == 0) {
3628 output_mode = ASCII;
3630 } else if (c2 == X0201) {
3631 output_mode = SHIFT_JIS;
3633 } else if (c2 == ISO8859_1) {
3634 output_mode = ISO8859_1;
3635 (*o_putc)(c1 | 0x080);
3637 } else if ((c2 & 0xff00) >> 8 == 0x8f){
3638 output_mode = SHIFT_JIS;
3639 if (e2s_conv(c2, c1, &c2, &c1) == 0){
3645 if ((c1<0x20 || 0x7e<c1) ||
3646 (c2<0x20 || 0x7e<c2)) {
3647 set_iconv(FALSE, 0);
3648 return; /* too late to rescue this char */
3650 output_mode = SHIFT_JIS;
3651 e2s_conv(c2, c1, &c2, &c1);
3653 #ifdef SHIFTJIS_CP932
3655 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3656 extern const unsigned short cp932inv[2][189];
3657 int c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3663 #endif /* SHIFTJIS_CP932 */
3666 if (prefix_table[(unsigned char)c1]){
3667 (*o_putc)(prefix_table[(unsigned char)c1]);
3678 #ifdef NUMCHAR_OPTION
3679 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3680 w16e_conv(c1, &c2, &c1);
3681 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3682 if(encode_fallback)(*encode_fallback)(c1);
3688 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
3691 (*o_putc)(ascii_intro);
3692 output_mode = ASCII;
3696 } else if ((c2 & 0xff00) >> 8 == 0x8f){
3698 if(output_mode!=X0213_2){
3699 output_mode = X0213_2;
3702 if(output_mode!=X0212){
3703 output_mode = X0212;
3709 (*o_putc)(output_mode & 0x7F);
3710 (*o_putc)(c2 & 0x7f);
3713 } else if (c2==X0201) {
3714 if (output_mode!=X0201) {
3715 output_mode = X0201;
3721 } else if (c2==ISO8859_1) {
3722 /* iso8859 introduction, or 8th bit on */
3723 /* Can we convert in 7bit form using ESC-'-'-A ?
3725 output_mode = ISO8859_1;
3727 } else if (c2 == 0) {
3728 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
3731 (*o_putc)(ascii_intro);
3732 output_mode = ASCII;
3737 if (output_mode!=X0213_1) {
3738 output_mode = X0213_1;
3742 (*o_putc)(output_mode & 0x7F);
3744 }else if (output_mode != X0208) {
3745 output_mode = X0208;
3748 (*o_putc)(kanji_intro);
3750 if (c1<0x20 || 0x7e<c1)
3752 if (c2<0x20 || 0x7e<c2)
3764 mime_prechar(c2, c1);
3765 (*o_base64conv)(c2,c1);
3769 STATIC int broken_buf[3];
3770 STATIC int broken_counter = 0;
3771 STATIC int broken_last = 0;
3778 if (broken_counter>0) {
3779 return broken_buf[--broken_counter];
3782 if (c=='$' && broken_last != ESC
3783 && (input_mode==ASCII || input_mode==X0201)) {
3786 if (c1=='@'|| c1=='B') {
3787 broken_buf[0]=c1; broken_buf[1]=c;
3794 } else if (c=='(' && broken_last != ESC
3795 && (input_mode==X0208 || input_mode==X0201)) { /* ) */
3798 if (c1=='J'|| c1=='B') {
3799 broken_buf[0]=c1; broken_buf[1]=c;
3817 if (broken_counter<2)
3818 broken_buf[broken_counter++]=c;
3822 STATIC int prev_cr = 0;
3830 if (! (c2==0&&c1==NL) ) {
3836 } else if (c1=='\r') {
3838 } else if (c1=='\n') {
3839 if (crmode_f==CRLF) {
3840 (*o_crconv)(0,'\r');
3841 } else if (crmode_f==CR) {
3842 (*o_crconv)(0,'\r');
3846 } else if (c1!='\032' || crmode_f!=NL){
3852 Return value of fold_conv()
3854 \n add newline and output char
3855 \r add newline and output nothing
3858 1 (or else) normal output
3860 fold state in prev (previous character)
3862 >0x80 Japanese (X0208/X0201)
3867 This fold algorthm does not preserve heading space in a line.
3868 This is the main difference from fmt.
3871 #define char_size(c2,c1) (c2?2:1)
3880 if (c1== '\r' && !fold_preserve_f) {
3881 fold_state=0; /* ignore cr */
3882 }else if (c1== '\n'&&f_prev=='\r' && fold_preserve_f) {
3884 fold_state=0; /* ignore cr */
3885 } else if (c1== BS) {
3886 if (f_line>0) f_line--;
3888 } else if (c2==EOF && f_line != 0) { /* close open last line */
3890 } else if ((c1=='\n' && !fold_preserve_f)
3891 || ((c1=='\r'||(c1=='\n'&&f_prev!='\r'))
3892 && fold_preserve_f)) {
3894 if (fold_preserve_f) {
3898 } else if ((f_prev == c1 && !fold_preserve_f)
3899 || (f_prev == '\n' && fold_preserve_f)
3900 ) { /* duplicate newline */
3903 fold_state = '\n'; /* output two newline */
3909 if (f_prev&0x80) { /* Japanese? */
3911 fold_state = 0; /* ignore given single newline */
3912 } else if (f_prev==' ') {
3916 if (++f_line<=fold_len)
3920 fold_state = '\r'; /* fold and output nothing */
3924 } else if (c1=='\f') {
3929 fold_state = '\n'; /* output newline and clear */
3930 } else if ( (c2==0 && c1==' ')||
3931 (c2==0 && c1=='\t')||
3932 (c2=='!'&& c1=='!')) {
3933 /* X0208 kankaku or ascii space */
3934 if (f_prev == ' ') {
3935 fold_state = 0; /* remove duplicate spaces */
3938 if (++f_line<=fold_len)
3939 fold_state = ' '; /* output ASCII space only */
3941 f_prev = ' '; f_line = 0;
3942 fold_state = '\r'; /* fold and output nothing */
3946 prev0 = f_prev; /* we still need this one... , but almost done */
3948 if (c2 || c2==X0201)
3949 f_prev |= 0x80; /* this is Japanese */
3950 f_line += char_size(c2,c1);
3951 if (f_line<=fold_len) { /* normal case */
3954 if (f_line>fold_len+fold_margin) { /* too many kinsoku suspension */
3955 f_line = char_size(c2,c1);
3956 fold_state = '\n'; /* We can't wait, do fold now */
3957 } else if (c2==X0201) {
3958 /* simple kinsoku rules return 1 means no folding */
3959 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
3960 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
3961 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
3962 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
3963 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
3964 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
3965 else if (SPACE<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
3967 fold_state = '\n';/* add one new f_line before this character */
3970 fold_state = '\n';/* add one new f_line before this character */
3973 /* kinsoku point in ASCII */
3974 if ( c1==')'|| /* { [ ( */
3985 /* just after special */
3986 } else if (!is_alnum(prev0)) {
3987 f_line = char_size(c2,c1);
3989 } else if ((prev0==' ') || /* ignored new f_line */
3990 (prev0=='\n')|| /* ignored new f_line */
3991 (prev0&0x80)) { /* X0208 - ASCII */
3992 f_line = char_size(c2,c1);
3993 fold_state = '\n';/* add one new f_line before this character */
3995 fold_state = 1; /* default no fold in ASCII */
3999 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
4000 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
4001 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
4002 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
4003 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
4004 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
4005 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
4006 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
4007 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
4008 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
4009 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
4010 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
4011 /* default no fold in kinsoku */
4014 f_line = char_size(c2,c1);
4015 /* add one new f_line before this character */
4018 f_line = char_size(c2,c1);
4020 /* add one new f_line before this character */
4025 /* terminator process */
4026 switch(fold_state) {
4045 int z_prev2=0,z_prev1=0;
4052 /* if (c2) c1 &= 0x7f; assertion */
4054 if (x0201_f && z_prev2==X0201) { /* X0201 */
4055 if (c1==(0xde&0x7f)) { /*
\e$BByE@
\e(B */
4057 (*o_zconv)(dv[(z_prev1-SPACE)*2],dv[(z_prev1-SPACE)*2+1]);
4059 } else if (c1==(0xdf&0x7f)&&ev[(z_prev1-SPACE)*2]) { /*
\e$BH>ByE@
\e(B */
4061 (*o_zconv)(ev[(z_prev1-SPACE)*2],ev[(z_prev1-SPACE)*2+1]);
4065 (*o_zconv)(cv[(z_prev1-SPACE)*2],cv[(z_prev1-SPACE)*2+1]);
4074 if (x0201_f && c2==X0201) {
4075 if (dv[(c1-SPACE)*2]||ev[(c1-SPACE)*2]) {
4076 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
4077 z_prev1 = c1; z_prev2 = c2;
4080 (*o_zconv)(cv[(c1-SPACE)*2],cv[(c1-SPACE)*2+1]);
4085 /* JISX0208 Alphabet */
4086 if (alpha_f && c2 == 0x23 ) {
4088 } else if (alpha_f && c2 == 0x21 ) {
4089 /* JISX0208 Kigou */
4094 } else if (alpha_f&0x4) {
4099 } else if (0x20<c1 && c1<0x7f && fv[c1-0x20]) {
4105 case '>': entity = ">"; break;
4106 case '<': entity = "<"; break;
4107 case '\"': entity = """; break;
4108 case '&': entity = "&"; break;
4111 while (*entity) (*o_zconv)(0, *entity++);
4121 #define rot13(c) ( \
4123 (c <= 'M') ? (c + 13): \
4124 (c <= 'Z') ? (c - 13): \
4126 (c <= 'm') ? (c + 13): \
4127 (c <= 'z') ? (c - 13): \
4131 #define rot47(c) ( \
4133 ( c <= 'O' ) ? (c + 47) : \
4134 ( c <= '~' ) ? (c - 47) : \
4142 if (c2==0 || c2==X0201 || c2==ISO8859_1) {
4148 (*o_rot_conv)(c2,c1);
4155 if ((hira_f & 1) && c2==0x25 && 0x20<c1 && c1<0x74) {
4157 } else if ((hira_f & 2) && c2==0x24 && 0x20<c1 && c1<0x74) {
4160 (*o_hira_conv)(c2,c1);
4165 iso2022jp_check_conv(c2,c1)
4168 STATIC const int range[RANGE_NUM_MAX][2] = {
4191 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
4195 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
4200 for (i = 0; i < RANGE_NUM_MAX; i++) {
4201 start = range[i][0];
4204 if (c >= start && c <= end) {
4209 (*o_iso2022jp_check_conv)(c2,c1);
4213 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
4215 const unsigned char *mime_pattern[] = {
4216 (const unsigned char *)"\075?EUC-JP?B?",
4217 (const unsigned char *)"\075?SHIFT_JIS?B?",
4218 (const unsigned char *)"\075?ISO-8859-1?Q?",
4219 (const unsigned char *)"\075?ISO-8859-1?B?",
4220 (const unsigned char *)"\075?ISO-2022-JP?B?",
4221 (const unsigned char *)"\075?ISO-2022-JP?Q?",
4222 #if defined(UTF8_INPUT_ENABLE)
4223 (const unsigned char *)"\075?UTF-8?B?",
4224 (const unsigned char *)"\075?UTF-8?Q?",
4226 (const unsigned char *)"\075?US-ASCII?Q?",
4231 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
4232 int (*mime_priority_func[])PROTO((int c2, int c1, int c0)) = {
4233 e_iconv, s_iconv, 0, 0, 0, 0,
4234 #if defined(UTF8_INPUT_ENABLE)
4240 const int mime_encode[] = {
4241 JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201,
4242 #if defined(UTF8_INPUT_ENABLE)
4249 const int mime_encode_method[] = {
4250 'B', 'B','Q', 'B', 'B', 'Q',
4251 #if defined(UTF8_INPUT_ENABLE)
4259 #define MAXRECOVER 20
4264 if (i_getc!=mime_getc) {
4265 i_mgetc = i_getc; i_getc = mime_getc;
4266 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
4267 if(mime_f==STRICT_MIME) {
4268 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
4269 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
4275 unswitch_mime_getc()
4277 if(mime_f==STRICT_MIME) {
4278 i_mgetc = i_mgetc_buf;
4279 i_mungetc = i_mungetc_buf;
4282 i_ungetc = i_mungetc;
4283 if(mime_iconv_back)set_iconv(FALSE, mime_iconv_back);
4284 mime_iconv_back = NULL;
4288 mime_begin_strict(f)
4293 const unsigned char *p,*q;
4294 int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
4296 mime_decode_mode = FALSE;
4297 /* =? has been checked */
4299 p = mime_pattern[j];
4302 for(i=2;p[i]>' ';i++) { /* start at =? */
4303 if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) {
4304 /* pattern fails, try next one */
4306 while ((p = mime_pattern[++j])) {
4307 for(k=2;k<i;k++) /* assume length(p) > i */
4308 if (p[k]!=q[k]) break;
4309 if (k==i && nkf_toupper(c1)==p[k]) break;
4311 if (p) continue; /* found next one, continue */
4312 /* all fails, output from recovery buffer */
4320 mime_decode_mode = p[i-2];
4322 mime_iconv_back = iconv;
4323 set_iconv(FALSE, mime_priority_func[j]);
4324 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
4326 if (mime_decode_mode=='B') {
4327 mimebuf_f = unbuf_f;
4329 /* do MIME integrity check */
4330 return mime_integrity(f,mime_pattern[j]);
4342 /* we don't keep eof of Fifo, becase it contains ?= as
4343 a terminator. It was checked in mime_integrity. */
4344 return ((mimebuf_f)?
4345 (*i_mgetc_buf)(f):Fifo(mime_input++));
4349 mime_ungetc_buf(c,f)
4354 (*i_mungetc_buf)(c,f);
4356 Fifo(--mime_input)=c;
4367 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
4368 /* re-read and convert again from mime_buffer. */
4370 /* =? has been checked */
4372 Fifo(mime_last++)='='; Fifo(mime_last++)='?';
4373 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
4374 /* We accept any character type even if it is breaked by new lines */
4375 c1 = (*i_getc)(f); Fifo(mime_last++)= c1 ;
4376 if (c1=='\n'||c1==' '||c1=='\r'||
4377 c1=='-'||c1=='_'||is_alnum(c1) ) continue;
4379 /* Failed. But this could be another MIME preemble */
4387 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
4388 if (!(++i<MAXRECOVER) || c1==EOF) break;
4389 if (c1=='b'||c1=='B') {
4390 mime_decode_mode = 'B';
4391 } else if (c1=='q'||c1=='Q') {
4392 mime_decode_mode = 'Q';
4396 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
4397 if (!(++i<MAXRECOVER) || c1==EOF) break;
4399 mime_decode_mode = FALSE;
4405 if (!mime_decode_mode) {
4406 /* false MIME premble, restart from mime_buffer */
4407 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
4408 /* Since we are in MIME mode until buffer becomes empty, */
4409 /* we never go into mime_begin again for a while. */
4412 /* discard mime preemble, and goto MIME mode */
4414 /* do no MIME integrity check */
4415 return c1; /* used only for checking EOF */
4430 fprintf(stderr, "%s\n", str);
4436 set_input_codename (codename)
4441 strcmp(codename, "") != 0 &&
4442 strcmp(codename, input_codename) != 0)
4444 is_inputcode_mixed = TRUE;
4446 input_codename = codename;
4447 is_inputcode_set = TRUE;
4450 #if !defined(PERL_XS) && !defined(WIN32DLL)
4452 print_guessed_code (filename)
4455 char *codename = "BINARY";
4456 if (!is_inputcode_mixed) {
4457 if (strcmp(input_codename, "") == 0) {
4460 codename = input_codename;
4463 if (filename != NULL) printf("%s:", filename);
4464 printf("%s\n", codename);
4470 #ifdef ANSI_C_PROTOTYPE
4471 int hex_getc(int ch, FILE *f, int (*g)(FILE *f), int (*u)(int c, FILE *f))
4474 hex_getc(ch, f, g, u)
4487 if (!nkf_isxdigit(c2)){
4492 if (!nkf_isxdigit(c3)){
4497 return (hex2bin(c2) << 4) | hex2bin(c3);
4504 return hex_getc(':', f, i_cgetc, i_cungetc);
4512 return (*i_cungetc)(c, f);
4519 return hex_getc('%', f, i_ugetc, i_uungetc);
4527 return (*i_uungetc)(c, f);
4531 #ifdef NUMCHAR_OPTION
4536 int (*g)() = i_ngetc;
4537 int (*u)() = i_nungetc;
4548 if (buf[i] == 'x' || buf[i] == 'X'){
4549 for (j = 0; j < 5; j++){
4551 if (!nkf_isxdigit(buf[i])){
4558 c |= hex2bin(buf[i]);
4561 for (j = 0; j < 6; j++){
4565 if (!nkf_isdigit(buf[i])){
4572 c += hex2bin(buf[i]);
4578 return CLASS_UTF16 | c;
4588 numchar_ungetc(c, f)
4592 return (*i_nungetc)(c, f);
4596 #ifdef UNICODE_NORMALIZATION
4598 /* Normalization Form C */
4603 int (*g)() = i_nfc_getc;
4604 int (*u)() = i_nfc_ungetc;
4605 int i=0, j, k=1, lower, upper;
4607 const int *array = NULL;
4608 extern const struct normalization_pair normalization_table[];
4611 while (k > 0 && ((buf[i] & 0xc0) != 0x80)){
4612 lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
4613 while (upper >= lower) {
4614 j = (lower+upper) / 2;
4615 array = normalization_table[j].nfd;
4616 for (k=0; k < NORMALIZATION_TABLE_NFD_LENGTH && array[k]; k++){
4617 if (array[k] != buf[k]){
4618 array[k] < buf[k] ? (lower = j + 1) : (upper = j - 1);
4625 array = normalization_table[j].nfc;
4626 for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++)
4643 return (*i_nfc_ungetc)(c, f);
4645 #endif /* UNICODE_NORMALIZATION */
4652 int c1, c2, c3, c4, cc;
4653 int t1, t2, t3, t4, mode, exit_mode;
4657 int lwsp_size = 128;
4659 if (mime_top != mime_last) { /* Something is in FIFO */
4660 return Fifo(mime_top++);
4662 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
4663 mime_decode_mode=FALSE;
4664 unswitch_mime_getc();
4665 return (*i_getc)(f);
4668 if (mimebuf_f == FIXED_MIME)
4669 exit_mode = mime_decode_mode;
4672 if (mime_decode_mode == 'Q') {
4673 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4675 if (c1=='_') return ' ';
4676 if (c1<=' ' || DEL<=c1) {
4677 mime_decode_mode = exit_mode; /* prepare for quit */
4680 if (c1!='=' && c1!='?') {
4684 mime_decode_mode = exit_mode; /* prepare for quit */
4685 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
4686 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
4687 /* end Q encoding */
4688 input_mode = exit_mode;
4690 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
4691 if (lwsp_buf==NULL) {
4692 perror("can't malloc");
4695 while ((c1=(*i_getc)(f))!=EOF) {
4700 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4708 if ((c1=(*i_getc)(f))!=EOF && c1 == NL) {
4709 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4724 lwsp_buf[lwsp_count] = c1;
4725 if (lwsp_count++>lwsp_size){
4727 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4728 if (lwsp_buf_new==NULL) {
4731 perror("can't realloc");
4734 lwsp_buf = lwsp_buf_new;
4740 if (lwsp_count > 0) {
4741 if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) {
4745 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4746 i_ungetc(lwsp_buf[lwsp_count],f);
4754 if (c1=='='&&c2<' ') { /* this is soft wrap */
4755 while((c1 = (*i_mgetc)(f)) <=' ') {
4756 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4758 mime_decode_mode = 'Q'; /* still in MIME */
4759 goto restart_mime_q;
4762 mime_decode_mode = 'Q'; /* still in MIME */
4766 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
4767 if (c2<=' ') return c2;
4768 mime_decode_mode = 'Q'; /* still in MIME */
4769 #define hex(c) (('0'<=c&&c<='9')?(c-'0'):\
4770 ('A'<=c&&c<='F')?(c-'A'+10):('a'<=c&&c<='f')?(c-'a'+10):0)
4771 return ((hex(c2)<<4) + hex(c3));
4774 if (mime_decode_mode != 'B') {
4775 mime_decode_mode = FALSE;
4776 return (*i_mgetc)(f);
4780 /* Base64 encoding */
4782 MIME allows line break in the middle of
4783 Base64, but we are very pessimistic in decoding
4784 in unbuf mode because MIME encoded code may broken by
4785 less or editor's control sequence (such as ESC-[-K in unbuffered
4786 mode. ignore incomplete MIME.
4788 mode = mime_decode_mode;
4789 mime_decode_mode = exit_mode; /* prepare for quit */
4791 while ((c1 = (*i_mgetc)(f))<=' ') {
4796 if ((c2 = (*i_mgetc)(f))<=' ') {
4799 if (mime_f != STRICT_MIME) goto mime_c2_retry;
4800 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4803 if ((c1 == '?') && (c2 == '=')) {
4806 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
4807 if (lwsp_buf==NULL) {
4808 perror("can't malloc");
4811 while ((c1=(*i_getc)(f))!=EOF) {
4816 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4824 if ((c1=(*i_getc)(f))!=EOF) {
4828 } else if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4843 lwsp_buf[lwsp_count] = c1;
4844 if (lwsp_count++>lwsp_size){
4846 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4847 if (lwsp_buf_new==NULL) {
4850 perror("can't realloc");
4853 lwsp_buf = lwsp_buf_new;
4859 if (lwsp_count > 0) {
4860 if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) {
4864 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4865 i_ungetc(lwsp_buf[lwsp_count],f);
4874 if ((c3 = (*i_mgetc)(f))<=' ') {
4877 if (mime_f != STRICT_MIME) goto mime_c3_retry;
4878 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4882 if ((c4 = (*i_mgetc)(f))<=' ') {
4885 if (mime_f != STRICT_MIME) goto mime_c4_retry;
4886 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4890 mime_decode_mode = mode; /* still in MIME sigh... */
4892 /* BASE 64 decoding */
4894 t1 = 0x3f & base64decode(c1);
4895 t2 = 0x3f & base64decode(c2);
4896 t3 = 0x3f & base64decode(c3);
4897 t4 = 0x3f & base64decode(c4);
4898 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
4900 Fifo(mime_last++) = cc;
4901 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
4903 Fifo(mime_last++) = cc;
4904 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
4906 Fifo(mime_last++) = cc;
4911 return Fifo(mime_top++);
4919 Fifo(--mime_top) = c;
4926 const unsigned char *p;
4930 /* In buffered mode, read until =? or NL or buffer full
4932 mime_input = mime_top;
4933 mime_last = mime_top;
4935 while(*p) Fifo(mime_input++) = *p++;
4938 while((c=(*i_getc)(f))!=EOF) {
4939 if (((mime_input-mime_top)&MIME_BUF_MASK)==0) {
4940 break; /* buffer full */
4942 if (c=='=' && d=='?') {
4943 /* checked. skip header, start decode */
4944 Fifo(mime_input++) = c;
4945 /* mime_last_input = mime_input; */
4950 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
4952 /* Should we check length mod 4? */
4953 Fifo(mime_input++) = c;
4956 /* In case of Incomplete MIME, no MIME decode */
4957 Fifo(mime_input++) = c;
4958 mime_last = mime_input; /* point undecoded buffer */
4959 mime_decode_mode = 1; /* no decode on Fifo last in mime_getc */
4960 switch_mime_getc(); /* anyway we need buffered getc */
4971 i = c - 'A'; /* A..Z 0-25 */
4973 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
4975 } else if (c > '/') {
4976 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
4977 } else if (c == '+') {
4978 i = '>' /* 62 */ ; /* + 62 */
4980 i = '?' /* 63 */ ; /* / 63 */
4985 STATIC const char basis_64[] =
4986 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
4989 #define MIMEOUT_BUF_LENGTH (60)
4990 char mimeout_buf[MIMEOUT_BUF_LENGTH+1];
4991 int mimeout_buf_count = 0;
4992 int mimeout_preserve_space = 0;
4993 #define itoh4(c) (c>=10?c+'A'-10:c+'0')
4999 const unsigned char *p;
5002 p = mime_pattern[0];
5003 for(i=0;mime_encode[i];i++) {
5004 if (mode == mime_encode[i]) {
5005 p = mime_pattern[i];
5009 mimeout_mode = mime_encode_method[i];
5012 if (base64_count>45) {
5013 if (mimeout_buf_count>0 && nkf_isblank(mimeout_buf[i])){
5014 (*o_mputc)(mimeout_buf[i]);
5020 if (!mimeout_preserve_space && mimeout_buf_count>0
5021 && (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
5022 || mimeout_buf[i]==CR || mimeout_buf[i]==NL )) {
5026 if (!mimeout_preserve_space) {
5027 for (;i<mimeout_buf_count;i++) {
5028 if (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
5029 || mimeout_buf[i]==CR || mimeout_buf[i]==NL ) {
5030 (*o_mputc)(mimeout_buf[i]);
5037 mimeout_preserve_space = FALSE;
5043 j = mimeout_buf_count;
5044 mimeout_buf_count = 0;
5046 mime_putc(mimeout_buf[i]);
5062 switch(mimeout_mode) {
5067 (*o_mputc)(basis_64[((b64c & 0x3)<< 4)]);
5073 (*o_mputc)(basis_64[((b64c & 0xF) << 2)]);
5079 if (mimeout_f!=FIXED_MIME) {
5081 } else if (mimeout_mode != 'Q')
5090 switch(mimeout_mode) {
5095 } else if (c==CR||c==NL) {
5098 } else if(c<SPACE||c=='='||c=='?'||c=='_'||DEL<=c) {
5100 (*o_mputc)(itoh4(((c>>4)&0xf)));
5101 (*o_mputc)(itoh4((c&0xf)));
5110 (*o_mputc)(basis_64[c>>2]);
5115 (*o_mputc)(basis_64[((b64c & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
5121 (*o_mputc)(basis_64[((b64c & 0xF) << 2) | ((c & 0xC0) >>6)]);
5122 (*o_mputc)(basis_64[c & 0x3F]);
5133 int mime_lastchar2, mime_lastchar1;
5135 void mime_prechar(c2, c1)
5140 if (base64_count + mimeout_buf_count/3*4> 66){
5141 (*o_base64conv)(EOF,0);
5142 (*o_base64conv)(0,NL);
5143 (*o_base64conv)(0,SPACE);
5145 }/*else if (mime_lastchar2){
5146 if (c1 <=DEL && !nkf_isspace(c1)){
5147 (*o_base64conv)(0,SPACE);
5151 if (c2 && mime_lastchar2 == 0
5152 && mime_lastchar1 && !nkf_isspace(mime_lastchar1)){
5153 (*o_base64conv)(0,SPACE);
5156 mime_lastchar2 = c2;
5157 mime_lastchar1 = c1;
5168 if (mimeout_f == FIXED_MIME){
5169 if (mimeout_mode == 'Q'){
5170 if (base64_count > 71){
5171 if (c!=CR && c!=NL) {
5178 if (base64_count > 71){
5183 if (c == EOF) { /* c==EOF */
5187 if (c != EOF) { /* c==EOF */
5193 /* mimeout_f != FIXED_MIME */
5195 if (c == EOF) { /* c==EOF */
5196 j = mimeout_buf_count;
5197 mimeout_buf_count = 0;
5200 /*if (nkf_isspace(mimeout_buf[i])){
5203 mimeout_addchar(mimeout_buf[i]);
5207 (*o_mputc)(mimeout_buf[i]);
5213 if (mimeout_mode=='Q') {
5214 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
5226 if (mimeout_buf_count > 0){
5227 lastchar = mimeout_buf[mimeout_buf_count - 1];
5232 if (!mimeout_mode) {
5233 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1)) {
5234 if (nkf_isspace(c)) {
5235 if (c==CR || c==NL) {
5238 for (i=0;i<mimeout_buf_count;i++) {
5239 (*o_mputc)(mimeout_buf[i]);
5240 if (mimeout_buf[i] == CR || mimeout_buf[i] == NL){
5247 mimeout_buf_count = 1;
5249 if (base64_count > 1
5250 && base64_count + mimeout_buf_count > 76){
5253 if (!nkf_isspace(mimeout_buf[0])){
5258 mimeout_buf[mimeout_buf_count++] = c;
5259 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
5260 open_mime(output_mode);
5265 if (lastchar==CR || lastchar == NL){
5266 for (i=0;i<mimeout_buf_count;i++) {
5267 (*o_mputc)(mimeout_buf[i]);
5270 mimeout_buf_count = 0;
5272 if (lastchar==SPACE) {
5273 for (i=0;i<mimeout_buf_count-1;i++) {
5274 (*o_mputc)(mimeout_buf[i]);
5277 mimeout_buf[0] = SPACE;
5278 mimeout_buf_count = 1;
5280 open_mime(output_mode);
5283 /* mimeout_mode == 'B', 1, 2 */
5284 if ( c<=DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
5285 if (lastchar == CR || lastchar == NL){
5286 if (nkf_isblank(c)) {
5287 for (i=0;i<mimeout_buf_count;i++) {
5288 mimeout_addchar(mimeout_buf[i]);
5290 mimeout_buf_count = 0;
5291 } else if (SPACE<c && c<DEL) {
5293 for (i=0;i<mimeout_buf_count;i++) {
5294 (*o_mputc)(mimeout_buf[i]);
5297 mimeout_buf_count = 0;
5300 if (c==SPACE || c==TAB || c==CR || c==NL) {
5301 for (i=0;i<mimeout_buf_count;i++) {
5302 if (SPACE<mimeout_buf[i] && mimeout_buf[i]<DEL) {
5304 for (i=0;i<mimeout_buf_count;i++) {
5305 (*o_mputc)(mimeout_buf[i]);
5308 mimeout_buf_count = 0;
5311 mimeout_buf[mimeout_buf_count++] = c;
5312 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
5314 for (i=0;i<mimeout_buf_count;i++) {
5315 (*o_mputc)(mimeout_buf[i]);
5318 mimeout_buf_count = 0;
5322 if (mimeout_buf_count>0 && SPACE<c && c!='=') {
5323 mimeout_buf[mimeout_buf_count++] = c;
5324 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
5325 j = mimeout_buf_count;
5326 mimeout_buf_count = 0;
5328 mimeout_addchar(mimeout_buf[i]);
5335 if (mimeout_buf_count>0) {
5336 j = mimeout_buf_count;
5337 mimeout_buf_count = 0;
5339 if (mimeout_buf[i]==CR || mimeout_buf[i]==NL)
5341 mimeout_addchar(mimeout_buf[i]);
5347 (*o_mputc)(mimeout_buf[i]);
5349 open_mime(output_mode);
5356 #if defined(PERL_XS) || defined(WIN32DLL)
5361 struct input_code *p = input_code_list;
5374 mime_f = STRICT_MIME;
5375 mime_decode_f = FALSE;
5380 #if defined(MSDOS) || defined(__OS2__)
5385 iso2022jp_f = FALSE;
5386 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
5389 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
5390 internal_unicode_f = FALSE;
5392 #ifdef UTF8_INPUT_ENABLE
5393 strict_mapping_f = TRUE;
5394 disable_cp932ext_f = FALSE;
5395 ignore_zwnbsp_f = TRUE;
5396 unicode_round_trip_f = FALSE;
5397 encode_fallback = NULL;
5398 unicode_subchar = '?';
5400 #ifdef UTF8_OUTPUT_ENABLE
5404 #ifdef UNICODE_NORMALIZATION
5417 is_inputcode_mixed = FALSE;
5418 is_inputcode_set = FALSE;
5422 #ifdef SHIFTJIS_CP932
5432 for (i = 0; i < 256; i++){
5433 prefix_table[i] = 0;
5436 #ifdef UTF8_INPUT_ENABLE
5437 utf16_mode = UTF16BE_INPUT;
5439 mimeout_buf_count = 0;
5444 fold_preserve_f = FALSE;
5447 kanji_intro = DEFAULT_J;
5448 ascii_intro = DEFAULT_R;
5449 fold_margin = FOLD_MARGIN;
5450 output_conv = DEFAULT_CONV;
5451 oconv = DEFAULT_CONV;
5452 o_zconv = no_connection;
5453 o_fconv = no_connection;
5454 o_crconv = no_connection;
5455 o_rot_conv = no_connection;
5456 o_hira_conv = no_connection;
5457 o_base64conv = no_connection;
5458 o_iso2022jp_check_conv = no_connection;
5461 i_ungetc = std_ungetc;
5463 i_bungetc = std_ungetc;
5466 i_mungetc = std_ungetc;
5467 i_mgetc_buf = std_getc;
5468 i_mungetc_buf = std_ungetc;
5469 output_mode = ASCII;
5472 mime_decode_mode = FALSE;
5478 z_prev2=0,z_prev1=0;
5480 iconv_for_check = 0;
5482 input_codename = "";
5490 no_connection(c2,c1)
5493 no_connection2(c2,c1,0);
5497 no_connection2(c2,c1,c0)
5500 fprintf(stderr,"nkf internal module connection failure.\n");
5502 return 0; /* LINT */
5507 #define fprintf dllprintf
5512 fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
5513 fprintf(stderr,"Flags:\n");
5514 fprintf(stderr,"b,u Output is buffered (DEFAULT),Output is unbuffered\n");
5515 #ifdef DEFAULT_CODE_SJIS
5516 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC), UTF-8N\n");
5518 #ifdef DEFAULT_CODE_JIS
5519 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC), UTF-8N\n");
5521 #ifdef DEFAULT_CODE_EUC
5522 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT), UTF-8N\n");
5524 #ifdef DEFAULT_CODE_UTF8
5525 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC), UTF-8N (DEFAULT)\n");
5527 #ifdef UTF8_OUTPUT_ENABLE
5528 fprintf(stderr," After 'w' you can add more options. (80?|16((B|L)0?)?) \n");
5530 fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC), UTF-8\n");
5531 #ifdef UTF8_INPUT_ENABLE
5532 fprintf(stderr," After 'W' you can add more options. (8|16(B|L)?) \n");
5534 fprintf(stderr,"t no conversion\n");
5535 fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n");
5536 fprintf(stderr,"r {de/en}crypt ROT13/47\n");
5537 fprintf(stderr,"h 1 katakana->hiragana, 2 hiragana->katakana, 3 both\n");
5538 fprintf(stderr,"v Show this usage. V: show version\n");
5539 fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
5540 fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
5541 fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
5542 fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
5543 fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces,\n");
5544 fprintf(stderr," 3: Convert HTML Entity\n");
5545 fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
5546 fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
5548 fprintf(stderr,"T Text mode output\n");
5550 fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
5551 fprintf(stderr,"d,c Delete \\r in line feed and \\032, Add \\r in line feed\n");
5552 fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n");
5553 fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
5554 fprintf(stderr,"long name options\n");
5555 fprintf(stderr," --ic=<input codeset> --oc=<output codeset> set the input or output codeset\n");
5556 fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n");
5557 fprintf(stderr," --jis,--euc,--sjis,--utf8,--utf16,--mime,--base64 convert for the code\n");
5558 fprintf(stderr," --hiragana, --katakana Hiragana/Katakana Conversion\n");
5559 fprintf(stderr," --prefix= Insert escape before troublesome characters of Shift_JIS\n");
5561 fprintf(stderr," --cap-input, --url-input Convert hex after ':' or '%%'\n");
5563 #ifdef NUMCHAR_OPTION
5564 fprintf(stderr," --numchar-input Convert Unicode Character Reference\n");
5566 #ifdef UTF8_INPUT_ENABLE
5567 fprintf(stderr," --fb-{skip, html, xml, perl, java, subchar}\n");
5568 fprintf(stderr," set the way nkf handles unassigned characters\n");
5571 fprintf(stderr," --overwrite Overwrite original listed files by filtered result\n");
5573 fprintf(stderr," -g, --guess Guess the input code\n");
5574 fprintf(stderr," --help,--version\n");
5581 fprintf(stderr,"Network Kanji Filter Version %s (%s) "
5582 #if defined(MSDOS) && !defined(__WIN32__) && !defined(__WIN16__)
5585 #if defined(MSDOS) && defined(__WIN16__)
5588 #if defined(MSDOS) && defined(__WIN32__)
5594 ,NKF_VERSION,NKF_RELEASE_DATE);
5595 fprintf(stderr,"\n%s\n",CopyRight);
5600 **
\e$B%Q%C%A@):n<T
\e(B
5601 ** void@merope.pleiades.or.jp (Kusakabe Youichi)
5602 ** NIDE Naoyuki <nide@ics.nara-wu.ac.jp>
5603 ** ohta@src.ricoh.co.jp (Junn Ohta)
5604 ** inouet@strl.nhk.or.jp (Tomoyuki Inoue)
5605 ** kiri@pulser.win.or.jp (Tetsuaki Kiriyama)
5606 ** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp>
5607 ** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe)
5608 ** kono@ie.u-ryukyu.ac.jp (Shinji Kono)
5609 ** GHG00637@nifty-serve.or.jp (COW)