1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
5 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
6 ** Copyright (C) 1996,1998
8 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
9 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
10 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
11 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
13 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
14 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
15 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
16 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
17 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
18 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
20 ** Everyone is permitted to do anything on this program
21 ** including copying, modifying, improving,
22 ** as long as you don't try to pretend that you wrote it.
23 ** i.e., the above copyright notice has to appear in all copies.
24 ** Binary distribution requires original version messages.
25 ** You don't have to ask before copying, redistribution or publishing.
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
29 /***********************************************************************
30 ** UTF-8
\e$B%5%]!<%H$K$D$$$F
\e(B
31 **
\e$B=>Mh$N
\e(B nkf
\e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9
\e(B
32 ** nkf -e
\e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G
\e(B UTF-8
\e$B$HH=Dj$5$l$l$P!"
\e(B
33 **
\e$B$=$N$^$^
\e(B euc-jp
\e$B$KJQ49$5$l$^$9
\e(B
35 **
\e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#
\e(B
36 ** (
\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O
\e(B)
38 **
\e$B2?$+LdBj$r8+$D$1$?$i!"
\e(B
39 ** E-Mail: furukawa@tcp-ip.or.jp
40 **
\e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#
\e(B
41 ***********************************************************************/
42 /* $Id: nkf.c,v 1.87 2006/01/05 08:45:32 naruse Exp $ */
43 #define NKF_VERSION "2.0.5"
44 #define NKF_RELEASE_DATE "2005-12-08"
48 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2005 Kono, Furukawa, Naruse"
55 ** USAGE: nkf [flags] [file]
58 ** b Output is buffered (DEFAULT)
59 ** u Output is unbuffered
63 ** j Outout code is JIS 7 bit (DEFAULT SELECT)
64 ** s Output code is MS Kanji (DEFAULT SELECT)
65 ** e Output code is AT&T JIS (DEFAULT SELECT)
66 ** w Output code is AT&T JIS (DEFAULT SELECT)
67 ** l Output code is JIS 7bit and ISO8859-1 Latin-1
69 ** m MIME conversion for ISO-2022-JP
70 ** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
71 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
72 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
73 ** M MIME output conversion
75 ** r {de/en}crypt ROT13/47
79 ** T Text mode output (for MS-DOS)
81 ** x Do not convert X0201 kana into X0208
82 ** Z Convert X0208 alphabet to ASCII
87 ** B try to fix broken JIS, missing Escape
88 ** B[1-9] broken level
90 ** O Output to 'nkf.out' file or last file name
91 ** d Delete \r in line feed
92 ** c Add \r in line feed
93 ** -- other long option
94 ** -- ignore following option (don't use with -O )
98 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__)) && !defined(MSDOS)
100 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
116 #if defined(MSDOS) || defined(__OS2__)
123 #define setbinmode(fp) fsetbin(fp)
124 #else /* Microsoft C, Turbo C */
125 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
127 #else /* UNIX,OS/2 */
128 #define setbinmode(fp)
131 #ifdef _IOFBF /* SysV and MSDOS, Windows */
132 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
134 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
137 /*Borland C++ 4.5 EasyWin*/
138 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
147 /* added by satoru@isoternet.org */
148 #include <sys/stat.h>
149 #ifndef MSDOS /* UNIX, OS/2 */
152 #else /* defined(MSDOS) */
154 #ifdef __BORLANDC__ /* BCC32 */
156 #else /* !defined(__BORLANDC__) */
157 #include <sys/utime.h>
158 #endif /* (__BORLANDC__) */
159 #else /* !defined(__WIN32__) */
160 #if defined(_MSC_VER) || defined(__MINGW32__) /* VC++, MinGW */
161 #include <sys/utime.h>
162 #elif defined(__TURBOC__) /* BCC */
164 #elif defined(LSI_C) /* LSI C */
165 #endif /* (__WIN32__) */
177 /* state of output_mode and input_mode
195 /* Input Assumption */
199 #define LATIN1_INPUT 6
201 #define STRICT_MIME 8
206 #define JAPANESE_EUC 10
210 #define UTF8_INPUT 13
211 #define UTF16BE_INPUT 14
212 #define UTF16LE_INPUT 15
232 #define is_alnum(c) \
233 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
235 /* I don't trust portablity of toupper */
236 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
237 #define nkf_isoctal(c) ('0'<=c && c<='7')
238 #define nkf_isdigit(c) ('0'<=c && c<='9')
239 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
240 #define nkf_isblank(c) (c == SPACE || c == TAB)
241 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == NL)
242 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
243 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
244 #define hex2bin(x) ( nkf_isdigit(x) ? x - '0' : nkf_toupper(x) - 'A' + 10)
246 #define HOLD_SIZE 1024
247 #define IOBUF_SIZE 16384
249 #define DEFAULT_J 'B'
250 #define DEFAULT_R 'B'
252 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
253 #define SJ6394 0x0161 /* 63 - 94 ku offset */
255 #define RANGE_NUM_MAX 18
260 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
261 #define sizeof_euc_utf8 94
262 #define sizeof_euc_to_utf8_1byte 94
263 #define sizeof_euc_to_utf8_2bytes 94
264 #define sizeof_utf8_to_euc_C2 64
265 #define sizeof_utf8_to_euc_E5B8 64
266 #define sizeof_utf8_to_euc_2bytes 112
267 #define sizeof_utf8_to_euc_3bytes 16
270 /* MIME preprocessor */
272 #ifdef EASYWIN /*Easy Win */
273 extern POINT _BufferSize;
276 /* function prototype */
278 #ifdef ANSI_C_PROTOTYPE
280 #define STATIC static
294 void (*status_func)PROTO((struct input_code *, int));
295 int (*iconv_func)PROTO((int c2, int c1, int c0));
299 STATIC char *input_codename = "";
302 STATIC const char *CopyRight = COPY_RIGHT;
304 #if !defined(PERL_XS) && !defined(WIN32DLL)
305 STATIC int noconvert PROTO((FILE *f));
307 STATIC int kanji_convert PROTO((FILE *f));
308 STATIC int h_conv PROTO((FILE *f,int c2,int c1));
309 STATIC int push_hold_buf PROTO((int c2));
310 STATIC void set_iconv PROTO((int f, int (*iconv_func)(int c2,int c1,int c0)));
311 STATIC int s_iconv PROTO((int c2,int c1,int c0));
312 STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
313 STATIC int e_iconv PROTO((int c2,int c1,int c0));
314 #ifdef UTF8_INPUT_ENABLE
315 /* don't convert characters when the mapping is not defined in the standard */
316 STATIC int strict_mapping_f = TRUE;
317 /* disable NEC special, NEC-selected IBM extended and IBM extended characters */
318 STATIC int disable_cp932ext_f = FALSE;
319 /* ignore ZERO WIDTH NO-BREAK SPACE */
320 STATIC int ignore_zwnbsp_f = TRUE;
321 /* don't convert characters that can't secure round trip convertion */
322 STATIC int unicode_round_trip_f = FALSE;
323 STATIC void encode_fallback_html PROTO((int c));
324 STATIC void encode_fallback_xml PROTO((int c));
325 STATIC void encode_fallback_java PROTO((int c));
326 STATIC void encode_fallback_perl PROTO((int c));
327 STATIC void encode_fallback_subchar PROTO((int c));
328 STATIC void (*encode_fallback)PROTO((int c)) = NULL;
329 STATIC int w2e_conv PROTO((int c2,int c1,int c0,int *p2,int *p1));
330 STATIC int w_iconv PROTO((int c2,int c1,int c0));
331 STATIC int w_iconv16 PROTO((int c2,int c1,int c0));
332 STATIC int unicode_to_jis_common PROTO((int c2,int c1,int c0,int *p2,int *p1));
333 STATIC int w_iconv_common PROTO((int c1,int c0,const unsigned short *const *pp,int psize,int *p2,int *p1));
334 STATIC int ww16_conv PROTO((int c2, int c1, int c0));
335 STATIC int w16e_conv PROTO((unsigned short val,int *p2,int *p1));
337 #ifdef UTF8_OUTPUT_ENABLE
338 STATIC int e2w_conv PROTO((int c2,int c1));
339 STATIC void w_oconv PROTO((int c2,int c1));
340 STATIC void w_oconv16 PROTO((int c2,int c1));
342 STATIC void e_oconv PROTO((int c2,int c1));
343 STATIC int e2s_conv PROTO((int c2, int c1, int *p2, int *p1));
344 STATIC void s_oconv PROTO((int c2,int c1));
345 STATIC void j_oconv PROTO((int c2,int c1));
346 STATIC void fold_conv PROTO((int c2,int c1));
347 STATIC void cr_conv PROTO((int c2,int c1));
348 STATIC void z_conv PROTO((int c2,int c1));
349 STATIC void rot_conv PROTO((int c2,int c1));
350 STATIC void hira_conv PROTO((int c2,int c1));
351 STATIC void base64_conv PROTO((int c2,int c1));
352 STATIC void iso2022jp_check_conv PROTO((int c2,int c1));
353 STATIC void no_connection PROTO((int c2,int c1));
354 STATIC int no_connection2 PROTO((int c2,int c1,int c0));
356 STATIC void code_score PROTO((struct input_code *ptr));
357 STATIC void code_status PROTO((int c));
359 STATIC void std_putc PROTO((int c));
360 STATIC int std_getc PROTO((FILE *f));
361 STATIC int std_ungetc PROTO((int c,FILE *f));
363 STATIC int broken_getc PROTO((FILE *f));
364 STATIC int broken_ungetc PROTO((int c,FILE *f));
366 STATIC int mime_begin PROTO((FILE *f));
367 STATIC int mime_getc PROTO((FILE *f));
368 STATIC int mime_ungetc PROTO((int c,FILE *f));
370 STATIC int mime_begin_strict PROTO((FILE *f));
371 STATIC int mime_getc_buf PROTO((FILE *f));
372 STATIC int mime_ungetc_buf PROTO((int c,FILE *f));
373 STATIC int mime_integrity PROTO((FILE *f,const unsigned char *p));
375 STATIC int base64decode PROTO((int c));
376 STATIC void mime_prechar PROTO((int c2, int c1));
377 STATIC void mime_putc PROTO((int c));
378 STATIC void open_mime PROTO((int c));
379 STATIC void close_mime PROTO(());
381 STATIC void usage PROTO(());
382 STATIC void version PROTO(());
384 STATIC void options PROTO((unsigned char *c));
385 #if defined(PERL_XS) || defined(WIN32DLL)
386 STATIC void reinit PROTO(());
391 #if !defined(PERL_XS) && !defined(WIN32DLL)
392 STATIC unsigned char stdibuf[IOBUF_SIZE];
393 STATIC unsigned char stdobuf[IOBUF_SIZE];
395 STATIC unsigned char hold_buf[HOLD_SIZE*2];
396 STATIC int hold_count;
398 /* MIME preprocessor fifo */
400 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
401 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
402 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
403 STATIC unsigned char mime_buf[MIME_BUF_SIZE];
404 STATIC unsigned int mime_top = 0;
405 STATIC unsigned int mime_last = 0; /* decoded */
406 STATIC unsigned int mime_input = 0; /* undecoded */
407 STATIC int (*mime_iconv_back)PROTO((int c2,int c1,int c0)) = NULL;
410 STATIC int unbuf_f = FALSE;
411 STATIC int estab_f = FALSE;
412 STATIC int nop_f = FALSE;
413 STATIC int binmode_f = TRUE; /* binary mode */
414 STATIC int rot_f = FALSE; /* rot14/43 mode */
415 STATIC int hira_f = FALSE; /* hira/kata henkan */
416 STATIC int input_f = FALSE; /* non fixed input code */
417 STATIC int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
418 STATIC int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
419 STATIC int mime_decode_f = FALSE; /* mime decode is explicitly on */
420 STATIC int mimebuf_f = FALSE; /* MIME buffered input */
421 STATIC int broken_f = FALSE; /* convert ESC-less broken JIS */
422 STATIC int iso8859_f = FALSE; /* ISO8859 through */
423 STATIC int mimeout_f = FALSE; /* base64 mode */
424 #if defined(MSDOS) || defined(__OS2__)
425 STATIC int x0201_f = TRUE; /* Assume JISX0201 kana */
427 STATIC int x0201_f = NO_X0201; /* Assume NO JISX0201 */
429 STATIC int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
430 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
431 STATIC int internal_unicode_f = FALSE; /* Internal Unicode Processing */
433 #ifdef UTF8_OUTPUT_ENABLE
434 STATIC int unicode_bom_f= 0; /* Output Unicode BOM */
435 STATIC int w_oconv16_LE = 0; /* utf-16 little endian */
436 STATIC int ms_ucs_map_f = FALSE; /* Microsoft UCS Mapping Compatible */
437 STATIC int unicode_subchar = '?'; /* the regular substitution character */
440 #ifdef UNICODE_NORMALIZATION
441 STATIC int nfc_f = FALSE;
442 STATIC int (*i_nfc_getc)PROTO((FILE *)) = std_getc; /* input of ugetc */
443 STATIC int (*i_nfc_ungetc)PROTO((int c ,FILE *f)) = std_ungetc;
444 STATIC int nfc_getc PROTO((FILE *f));
445 STATIC int nfc_ungetc PROTO((int c,FILE *f));
449 STATIC int cap_f = FALSE;
450 STATIC int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */
451 STATIC int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc;
452 STATIC int cap_getc PROTO((FILE *f));
453 STATIC int cap_ungetc PROTO((int c,FILE *f));
455 STATIC int url_f = FALSE;
456 STATIC int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
457 STATIC int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc;
458 STATIC int url_getc PROTO((FILE *f));
459 STATIC int url_ungetc PROTO((int c,FILE *f));
462 #ifdef NUMCHAR_OPTION
463 #define CLASS_MASK 0x0f000000
464 #define CLASS_UTF16 0x01000000
465 STATIC int numchar_f = FALSE;
466 STATIC int (*i_ngetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
467 STATIC int (*i_nungetc)PROTO((int c ,FILE *f)) = std_ungetc;
468 STATIC int numchar_getc PROTO((FILE *f));
469 STATIC int numchar_ungetc PROTO((int c,FILE *f));
473 STATIC int noout_f = FALSE;
474 STATIC void no_putc PROTO((int c));
475 STATIC int debug_f = FALSE;
476 STATIC void debug PROTO((const char *str));
477 STATIC int (*iconv_for_check)() = 0;
480 STATIC int guess_f = FALSE;
482 STATIC void print_guessed_code PROTO((char *filename));
484 STATIC void set_input_codename PROTO((char *codename));
485 STATIC int is_inputcode_mixed = FALSE;
486 STATIC int is_inputcode_set = FALSE;
489 STATIC int exec_f = 0;
492 #ifdef SHIFTJIS_CP932
493 /* invert IBM extended characters to others
494 and controls some UCS mapping for Microsoft Code Page */
495 STATIC int cp51932_f = TRUE;
496 #define CP932_TABLE_BEGIN (0xfa)
497 #define CP932_TABLE_END (0xfc)
499 /* invert NEC-selected IBM extended characters to IBM extended characters */
500 STATIC int cp932inv_f = TRUE;
501 #define CP932INV_TABLE_BEGIN (0xed)
502 #define CP932INV_TABLE_END (0xee)
504 /* STATIC int cp932_conv PROTO((int c2, int c1)); */
505 #endif /* SHIFTJIS_CP932 */
508 STATIC int x0212_f = FALSE;
509 STATIC int x0212_shift PROTO((int c));
510 STATIC int x0212_unshift PROTO((int c));
513 STATIC unsigned char prefix_table[256];
515 STATIC void e_status PROTO((struct input_code *, int));
516 STATIC void s_status PROTO((struct input_code *, int));
518 #ifdef UTF8_INPUT_ENABLE
519 STATIC void w_status PROTO((struct input_code *, int));
520 STATIC void w16_status PROTO((struct input_code *, int));
521 STATIC int utf16_mode = UTF16BE_INPUT;
524 struct input_code input_code_list[] = {
525 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
526 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
527 #ifdef UTF8_INPUT_ENABLE
528 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
529 {"UTF-16", 0, 0, 0, {0, 0, 0}, w16_status, w_iconv16, 0},
534 STATIC int mimeout_mode = 0;
535 STATIC int base64_count = 0;
537 /* X0208 -> ASCII converter */
540 STATIC int f_line = 0; /* chars in line */
541 STATIC int f_prev = 0;
542 STATIC int fold_preserve_f = FALSE; /* preserve new lines */
543 STATIC int fold_f = FALSE;
544 STATIC int fold_len = 0;
547 STATIC unsigned char kanji_intro = DEFAULT_J;
548 STATIC unsigned char ascii_intro = DEFAULT_R;
552 #define FOLD_MARGIN 10
553 #define DEFAULT_FOLD 60
555 STATIC int fold_margin = FOLD_MARGIN;
559 #ifdef DEFAULT_CODE_JIS
560 # define DEFAULT_CONV j_oconv
562 #ifdef DEFAULT_CODE_SJIS
563 # define DEFAULT_CONV s_oconv
565 #ifdef DEFAULT_CODE_EUC
566 # define DEFAULT_CONV e_oconv
568 #ifdef DEFAULT_CODE_UTF8
569 # define DEFAULT_CONV w_oconv
572 /* process default */
573 STATIC void (*output_conv)PROTO((int c2,int c1)) = DEFAULT_CONV;
575 STATIC void (*oconv)PROTO((int c2,int c1)) = no_connection;
576 /* s_iconv or oconv */
577 STATIC int (*iconv)PROTO((int c2,int c1,int c0)) = no_connection2;
579 STATIC void (*o_zconv)PROTO((int c2,int c1)) = no_connection;
580 STATIC void (*o_fconv)PROTO((int c2,int c1)) = no_connection;
581 STATIC void (*o_crconv)PROTO((int c2,int c1)) = no_connection;
582 STATIC void (*o_rot_conv)PROTO((int c2,int c1)) = no_connection;
583 STATIC void (*o_hira_conv)PROTO((int c2,int c1)) = no_connection;
584 STATIC void (*o_base64conv)PROTO((int c2,int c1)) = no_connection;
585 STATIC void (*o_iso2022jp_check_conv)PROTO((int c2,int c1)) = no_connection;
587 /* STATIC redirections */
589 STATIC void (*o_putc)PROTO((int c)) = std_putc;
591 STATIC int (*i_getc)PROTO((FILE *f)) = std_getc; /* general input */
592 STATIC int (*i_ungetc)PROTO((int c,FILE *f)) =std_ungetc;
594 STATIC int (*i_bgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
595 STATIC int (*i_bungetc)PROTO((int c ,FILE *f)) = std_ungetc;
597 STATIC void (*o_mputc)PROTO((int c)) = std_putc ; /* output of mputc */
599 STATIC int (*i_mgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
600 STATIC int (*i_mungetc)PROTO((int c ,FILE *f)) = std_ungetc;
602 /* for strict mime */
603 STATIC int (*i_mgetc_buf)PROTO((FILE *)) = std_getc; /* input of mgetc_buf */
604 STATIC int (*i_mungetc_buf)PROTO((int c,FILE *f)) = std_ungetc;
607 STATIC int output_mode = ASCII, /* output kanji mode */
608 input_mode = ASCII, /* input kanji mode */
609 shift_mode = FALSE; /* TRUE shift out, or X0201 */
610 STATIC int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
612 /* X0201 / X0208 conversion tables */
614 /* X0201 kana conversion table */
617 unsigned char cv[]= {
618 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
619 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
620 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
621 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
622 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
623 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
624 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
625 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
626 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
627 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
628 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
629 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
630 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
631 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
632 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
633 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
637 /* X0201 kana conversion table for daguten */
640 unsigned char dv[]= {
641 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
642 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
643 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
644 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
645 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
646 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
647 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
648 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
649 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
650 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
651 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
652 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
653 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
654 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
655 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
656 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
659 /* X0201 kana conversion table for han-daguten */
662 unsigned char ev[]= {
663 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
664 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
665 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
666 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
667 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
668 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
669 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
670 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
671 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
672 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
673 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
674 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
675 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
676 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
677 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
678 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
682 /* X0208 kigou conversion table */
683 /* 0x8140 - 0x819e */
685 unsigned char fv[] = {
687 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
688 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
689 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
690 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
691 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
692 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
693 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
694 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
695 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
696 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
697 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
698 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
704 STATIC int file_out = FALSE;
706 STATIC int overwrite = FALSE;
709 STATIC int crmode_f = 0; /* CR, NL, CRLF */
710 #ifdef EASYWIN /*Easy Win */
711 STATIC int end_check;
714 #define STD_GC_BUFSIZE (256)
715 int std_gc_buf[STD_GC_BUFSIZE];
719 #include "nkf32dll.c"
720 #elif defined(PERL_XS)
730 char *outfname = NULL;
733 #ifdef EASYWIN /*Easy Win */
734 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
737 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
738 cp = (unsigned char *)*argv;
743 if (pipe(fds) < 0 || (pid = fork()) < 0){
754 execvp(argv[1], &argv[1]);
768 if(x0201_f == WISH_TRUE)
769 x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
771 if (binmode_f == TRUE)
773 if (freopen("","wb",stdout) == NULL)
780 setbuf(stdout, (char *) NULL);
782 setvbuffer(stdout, stdobuf, IOBUF_SIZE);
785 if (binmode_f == TRUE)
787 if (freopen("","rb",stdin) == NULL) return (-1);
791 setvbuffer(stdin, stdibuf, IOBUF_SIZE);
795 kanji_convert(stdin);
796 if (guess_f) print_guessed_code(NULL);
801 is_inputcode_mixed = FALSE;
802 is_inputcode_set = FALSE;
807 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
816 /* reopen file for stdout */
817 if (file_out == TRUE) {
820 outfname = malloc(strlen(origfname)
821 + strlen(".nkftmpXXXXXX")
827 strcpy(outfname, origfname);
831 for (i = strlen(outfname); i; --i){
832 if (outfname[i - 1] == '/'
833 || outfname[i - 1] == '\\'){
839 strcat(outfname, "ntXXXXXX");
841 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC,
844 strcat(outfname, ".nkftmpXXXXXX");
845 fd = mkstemp(outfname);
848 || (fd_backup = dup(fileno(stdout))) < 0
849 || dup2(fd, fileno(stdout)) < 0
860 outfname = "nkf.out";
863 if(freopen(outfname, "w", stdout) == NULL) {
867 if (binmode_f == TRUE) {
869 if (freopen("","wb",stdout) == NULL)
876 if (binmode_f == TRUE)
878 if (freopen("","rb",fin) == NULL)
883 setvbuffer(fin, stdibuf, IOBUF_SIZE);
887 char *filename = NULL;
889 if (nfiles > 1) filename = origfname;
890 if (guess_f) print_guessed_code(filename);
896 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
904 if (dup2(fd_backup, fileno(stdout)) < 0){
907 if (stat(origfname, &sb)) {
908 fprintf(stderr, "Can't stat %s\n", origfname);
910 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
911 if (chmod(outfname, sb.st_mode)) {
912 fprintf(stderr, "Can't set permission %s\n", outfname);
915 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
916 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
917 tb[0] = tb[1] = sb.st_mtime;
918 if (utime(outfname, tb)) {
919 fprintf(stderr, "Can't set timestamp %s\n", outfname);
922 tb.actime = sb.st_atime;
923 tb.modtime = sb.st_mtime;
924 if (utime(outfname, &tb)) {
925 fprintf(stderr, "Can't set timestamp %s\n", outfname);
929 if (unlink(origfname)){
933 if (rename(outfname, origfname)) {
935 fprintf(stderr, "Can't rename %s to %s\n",
936 outfname, origfname);
944 #ifdef EASYWIN /*Easy Win */
945 if (file_out == FALSE)
946 scanf("%d",&end_check);
949 #else /* for Other OS */
950 if (file_out == TRUE)
955 #endif /* WIN32DLL */
982 {"katakana-hiragana","h3"},
989 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
990 {"internal-unicode", ""},
992 #ifdef UTF8_OUTPUT_ENABLE
1002 {"fb-subchar=", ""},
1004 #ifdef UTF8_INPUT_ENABLE
1005 {"utf8-input", "W"},
1006 {"utf16-input", "W16"},
1007 {"disable-cp932ext", ""},
1008 {"strict-mapping", ""},
1009 {"enable-round-trip",""},
1011 #ifdef UNICODE_NORMALIZATION
1012 {"utf8mac-input", ""},
1021 #ifdef NUMCHAR_OPTION
1022 {"numchar-input", ""},
1028 #ifdef SHIFTJIS_CP932
1038 STATIC int option_mode = 0;
1045 unsigned char *p = NULL;
1046 unsigned char *cp_back = NULL;
1047 unsigned char codeset[32];
1051 while(*cp && *cp++!='-');
1052 while (*cp || cp_back) {
1060 case '-': /* literal options */
1061 if (!*cp || *cp == SPACE) { /* ignore the rest of arguments */
1065 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1066 p = (unsigned char *)long_option[i].name;
1067 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1068 if (*p == cp[j] || cp[j] == ' '){
1075 while(*cp && *cp != SPACE && cp++);
1076 if (long_option[i].alias[0]){
1078 cp = (unsigned char *)long_option[i].alias;
1080 if (strcmp(long_option[i].name, "ic=") == 0){
1081 for (i=0; i < 16 && SPACE < p[i] && p[i] < DEL; i++){
1082 codeset[i] = nkf_toupper(p[i]);
1085 if(strcmp(codeset, "ISO-2022-JP") == 0){
1086 input_f = JIS_INPUT;
1087 }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1088 input_f = SJIS_INPUT;
1089 if (x0201_f==NO_X0201) x0201_f=TRUE;
1090 }else if(strcmp(codeset, "CP932") == 0){
1091 input_f = SJIS_INPUT;
1093 #ifdef SHIFTJIS_CP932
1097 #ifdef UTF8_OUTPUT_ENABLE
1098 ms_ucs_map_f = TRUE;
1100 }else if(strcmp(codeset, "EUCJP") == 0 ||
1101 strcmp(codeset, "EUC-JP") == 0){
1102 input_f = JIS_INPUT;
1103 }else if(strcmp(codeset, "CP51932") == 0){
1104 input_f = JIS_INPUT;
1106 #ifdef SHIFTJIS_CP932
1110 #ifdef UTF8_OUTPUT_ENABLE
1111 ms_ucs_map_f = TRUE;
1113 }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1114 strcmp(codeset, "EUCJP-MS") == 0){
1115 input_f = JIS_INPUT;
1117 #ifdef SHIFTJIS_CP932
1121 #ifdef UTF8_OUTPUT_ENABLE
1122 ms_ucs_map_f = TRUE;
1124 }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1125 strcmp(codeset, "EUCJP-ASCII") == 0){
1126 input_f = JIS_INPUT;
1128 #ifdef SHIFTJIS_CP932
1132 #ifdef UTF8_OUTPUT_ENABLE
1133 ms_ucs_map_f = FALSE;
1135 #ifdef UTF8_INPUT_ENABLE
1136 }else if(strcmp(codeset, "UTF-8") == 0 ||
1137 strcmp(codeset, "UTF-8N") == 0 ||
1138 strcmp(codeset, "UTF-8-BOM") == 0){
1139 input_f = UTF8_INPUT;
1140 #ifdef UNICODE_NORMALIZATION
1141 }else if(strcmp(codeset, "UTF8-MAC") == 0 ||
1142 strcmp(codeset, "UTF-8-MAC") == 0){
1143 input_f = UTF8_INPUT;
1146 }else if(strcmp(codeset, "UTF-16") == 0){
1147 input_f = UTF16BE_INPUT;
1148 utf16_mode = UTF16BE_INPUT;
1149 }else if(strcmp(codeset, "UTF-16BE") == 0 ||
1150 strcmp(codeset, "UTF-16BE-BOM") == 0){
1151 input_f = UTF16BE_INPUT;
1152 utf16_mode = UTF16BE_INPUT;
1153 }else if(strcmp(codeset, "UTF-16LE") == 0 ||
1154 strcmp(codeset, "UTF-16LE-BOM") == 0){
1155 input_f = UTF16LE_INPUT;
1156 utf16_mode = UTF16LE_INPUT;
1161 if (strcmp(long_option[i].name, "oc=") == 0){
1162 for (i=0; i < 16 && SPACE < p[i] && p[i] < DEL; i++){
1163 codeset[i] = nkf_toupper(p[i]);
1166 if(strcmp(codeset, "ISO-2022-JP") == 0){
1167 output_conv = j_oconv;
1168 }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1169 output_conv = s_oconv;
1170 }else if(strcmp(codeset, "CP932") == 0){
1171 output_conv = s_oconv;
1173 #ifdef SHIFTJIS_CP932
1177 #ifdef UTF8_OUTPUT_ENABLE
1178 ms_ucs_map_f = TRUE;
1180 }else if(strcmp(codeset, "EUCJP") == 0 ||
1181 strcmp(codeset, "EUC-JP") == 0){
1182 output_conv = e_oconv;
1183 }else if(strcmp(codeset, "CP51932") == 0){
1184 output_conv = e_oconv;
1186 #ifdef SHIFTJIS_CP932
1190 #ifdef UTF8_OUTPUT_ENABLE
1191 ms_ucs_map_f = TRUE;
1193 }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1194 strcmp(codeset, "EUCJP-MS") == 0){
1195 output_conv = e_oconv;
1198 #ifdef SHIFTJIS_CP932
1201 #ifdef UTF8_OUTPUT_ENABLE
1202 ms_ucs_map_f = TRUE;
1204 }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1205 strcmp(codeset, "EUCJP-ASCII") == 0){
1206 output_conv = e_oconv;
1209 #ifdef SHIFTJIS_CP932
1212 #ifdef UTF8_OUTPUT_ENABLE
1213 ms_ucs_map_f = FALSE;
1215 #ifdef UTF8_OUTPUT_ENABLE
1216 }else if(strcmp(codeset, "UTF-8") == 0){
1217 output_conv = w_oconv;
1218 }else if(strcmp(codeset, "UTF-8N") == 0){
1219 output_conv = w_oconv;
1221 }else if(strcmp(codeset, "UTF-8-BOM") == 0){
1222 output_conv = w_oconv;
1224 }else if(strcmp(codeset, "UTF-16BE") == 0){
1225 output_conv = w_oconv16;
1227 }else if(strcmp(codeset, "UTF-16") == 0 ||
1228 strcmp(codeset, "UTF-16BE-BOM") == 0){
1229 output_conv = w_oconv16;
1231 }else if(strcmp(codeset, "UTF-16LE") == 0){
1232 output_conv = w_oconv16;
1235 }else if(strcmp(codeset, "UTF-16LE-BOM") == 0){
1236 output_conv = w_oconv16;
1244 if (strcmp(long_option[i].name, "overwrite") == 0){
1251 if (strcmp(long_option[i].name, "cap-input") == 0){
1255 if (strcmp(long_option[i].name, "url-input") == 0){
1260 #ifdef NUMCHAR_OPTION
1261 if (strcmp(long_option[i].name, "numchar-input") == 0){
1267 if (strcmp(long_option[i].name, "no-output") == 0){
1271 if (strcmp(long_option[i].name, "debug") == 0){
1276 if (strcmp(long_option[i].name, "cp932") == 0){
1277 #ifdef SHIFTJIS_CP932
1281 #ifdef UTF8_OUTPUT_ENABLE
1282 ms_ucs_map_f = TRUE;
1286 if (strcmp(long_option[i].name, "no-cp932") == 0){
1287 #ifdef SHIFTJIS_CP932
1291 #ifdef UTF8_OUTPUT_ENABLE
1292 ms_ucs_map_f = FALSE;
1296 #ifdef SHIFTJIS_CP932
1297 if (strcmp(long_option[i].name, "cp932inv") == 0){
1304 if (strcmp(long_option[i].name, "x0212") == 0){
1311 if (strcmp(long_option[i].name, "exec-in") == 0){
1315 if (strcmp(long_option[i].name, "exec-out") == 0){
1320 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1321 if (strcmp(long_option[i].name, "internal-unicode") == 0){
1322 internal_unicode_f = TRUE;
1325 if (strcmp(long_option[i].name, "disable-cp932ext") == 0){
1326 disable_cp932ext_f = TRUE;
1329 if (strcmp(long_option[i].name, "enable-round-trip") == 0){
1330 unicode_round_trip_f = TRUE;
1333 if (strcmp(long_option[i].name, "fb-skip") == 0){
1334 encode_fallback = NULL;
1337 if (strcmp(long_option[i].name, "fb-html") == 0){
1338 encode_fallback = encode_fallback_html;
1341 if (strcmp(long_option[i].name, "fb-xml" ) == 0){
1342 encode_fallback = encode_fallback_xml;
1345 if (strcmp(long_option[i].name, "fb-java") == 0){
1346 encode_fallback = encode_fallback_java;
1349 if (strcmp(long_option[i].name, "fb-perl") == 0){
1350 encode_fallback = encode_fallback_perl;
1353 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1354 encode_fallback = encode_fallback_subchar;
1357 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1358 encode_fallback = encode_fallback_subchar;
1359 unicode_subchar = 0;
1361 /* decimal number */
1362 for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1363 unicode_subchar *= 10;
1364 unicode_subchar += hex2bin(p[i]);
1366 }else if(p[1] == 'x' || p[1] == 'X'){
1367 /* hexadecimal number */
1368 for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1369 unicode_subchar <<= 4;
1370 unicode_subchar |= hex2bin(p[i]);
1374 for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1375 unicode_subchar *= 8;
1376 unicode_subchar += hex2bin(p[i]);
1379 w16e_conv(unicode_subchar, &i, &j);
1380 unicode_subchar = i<<8 | j;
1384 #ifdef UTF8_OUTPUT_ENABLE
1385 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1386 ms_ucs_map_f = TRUE;
1390 #ifdef UNICODE_NORMALIZATION
1391 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1392 input_f = UTF8_INPUT;
1397 if (strcmp(long_option[i].name, "prefix=") == 0){
1398 if (' ' < p[0] && p[0] < 128){
1399 for (i = 1; ' ' < p[i] && p[i] < 128; i++){
1400 prefix_table[p[i]] = p[0];
1407 case 'b': /* buffered mode */
1410 case 'u': /* non bufferd mode */
1413 case 't': /* transparent mode */
1416 case 'j': /* JIS output */
1418 output_conv = j_oconv;
1420 case 'e': /* AT&T EUC output */
1421 output_conv = e_oconv;
1423 case 's': /* SJIS output */
1424 output_conv = s_oconv;
1426 case 'l': /* ISO8859 Latin-1 support, no conversion */
1427 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
1428 input_f = LATIN1_INPUT;
1430 case 'i': /* Kanji IN ESC-$-@/B */
1431 if (*cp=='@'||*cp=='B')
1432 kanji_intro = *cp++;
1434 case 'o': /* ASCII IN ESC-(-J/B */
1435 if (*cp=='J'||*cp=='B'||*cp=='H')
1436 ascii_intro = *cp++;
1440 bit:1 katakana->hiragana
1441 bit:2 hiragana->katakana
1443 if ('9'>= *cp && *cp>='0')
1444 hira_f |= (*cp++ -'0');
1451 #if defined(MSDOS) || defined(__OS2__)
1466 #ifdef UTF8_OUTPUT_ENABLE
1467 case 'w': /* UTF-8 output */
1468 if ('1'== cp[0] && '6'==cp[1]) {
1469 output_conv = w_oconv16; cp+=2;
1471 unicode_bom_f=2; cp++;
1474 unicode_bom_f=1; cp++;
1476 } else if (cp[0] == 'B') {
1477 unicode_bom_f=2; cp++;
1479 unicode_bom_f=1; cp++;
1482 } else if (cp[0] == '8') {
1483 output_conv = w_oconv; cp++;
1486 unicode_bom_f=1; cp++;
1489 output_conv = w_oconv;
1492 #ifdef UTF8_INPUT_ENABLE
1493 case 'W': /* UTF-8 input */
1494 if ('1'== cp[0] && '6'==cp[1]) {
1495 input_f = UTF16BE_INPUT;
1496 utf16_mode = UTF16BE_INPUT;
1500 input_f = UTF16LE_INPUT;
1501 utf16_mode = UTF16LE_INPUT;
1502 } else if (cp[0] == 'B') {
1504 input_f = UTF16BE_INPUT;
1505 utf16_mode = UTF16BE_INPUT;
1507 } else if (cp[0] == '8') {
1509 input_f = UTF8_INPUT;
1511 input_f = UTF8_INPUT;
1514 /* Input code assumption */
1515 case 'J': /* JIS input */
1516 case 'E': /* AT&T EUC input */
1517 input_f = JIS_INPUT;
1519 case 'S': /* MS Kanji input */
1520 input_f = SJIS_INPUT;
1521 if (x0201_f==NO_X0201) x0201_f=TRUE;
1523 case 'Z': /* Convert X0208 alphabet to asii */
1524 /* bit:0 Convert X0208
1525 bit:1 Convert Kankaku to one space
1526 bit:2 Convert Kankaku to two spaces
1527 bit:3 Convert HTML Entity
1529 if ('9'>= *cp && *cp>='0')
1530 alpha_f |= 1<<(*cp++ -'0');
1534 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
1535 x0201_f = FALSE; /* No X0201->X0208 conversion */
1537 ESC-(-I in JIS, EUC, MS Kanji
1538 SI/SO in JIS, EUC, MS Kanji
1539 SSO in EUC, JIS, not in MS Kanji
1540 MS Kanji (0xa0-0xdf)
1542 ESC-(-I in JIS (0x20-0x5f)
1543 SSO in EUC (0xa0-0xdf)
1544 0xa0-0xd in MS Kanji (0xa0-0xdf)
1547 case 'X': /* Assume X0201 kana */
1548 /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1551 case 'F': /* prserve new lines */
1552 fold_preserve_f = TRUE;
1553 case 'f': /* folding -f60 or -f */
1556 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1558 fold_len += *cp++ - '0';
1560 if (!(0<fold_len && fold_len<BUFSIZ))
1561 fold_len = DEFAULT_FOLD;
1565 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1567 fold_margin += *cp++ - '0';
1571 case 'm': /* MIME support */
1572 /* mime_decode_f = TRUE; */ /* this has too large side effects... */
1573 if (*cp=='B'||*cp=='Q') {
1574 mime_decode_mode = *cp++;
1575 mimebuf_f = FIXED_MIME;
1576 } else if (*cp=='N') {
1577 mime_f = TRUE; cp++;
1578 } else if (*cp=='S') {
1579 mime_f = STRICT_MIME; cp++;
1580 } else if (*cp=='0') {
1581 mime_decode_f = FALSE;
1582 mime_f = FALSE; cp++;
1585 case 'M': /* MIME output */
1588 mimeout_f = FIXED_MIME; cp++;
1589 } else if (*cp=='Q') {
1591 mimeout_f = FIXED_MIME; cp++;
1596 case 'B': /* Broken JIS support */
1598 bit:1 allow any x on ESC-(-x or ESC-$-x
1599 bit:2 reset to ascii on NL
1601 if ('9'>= *cp && *cp>='0')
1602 broken_f |= 1<<(*cp++ -'0');
1607 case 'O':/* for Output file */
1611 case 'c':/* add cr code */
1614 case 'd':/* delete cr code */
1617 case 'I': /* ISO-2022-JP output */
1620 case 'L': /* line mode */
1621 if (*cp=='u') { /* unix */
1622 crmode_f = NL; cp++;
1623 } else if (*cp=='m') { /* mac */
1624 crmode_f = CR; cp++;
1625 } else if (*cp=='w') { /* windows */
1626 crmode_f = CRLF; cp++;
1627 } else if (*cp=='0') { /* no conversion */
1637 /* module muliple options in a string are allowed for Perl moudle */
1638 while(*cp && *cp++!='-');
1641 /* bogus option but ignored */
1647 #ifdef ANSI_C_PROTOTYPE
1648 struct input_code * find_inputcode_byfunc(int (*iconv_func)(int c2,int c1,int c0))
1650 struct input_code * find_inputcode_byfunc(iconv_func)
1651 int (*iconv_func)();
1655 struct input_code *p = input_code_list;
1657 if (iconv_func == p->iconv_func){
1666 #ifdef ANSI_C_PROTOTYPE
1667 void set_iconv(int f, int (*iconv_func)(int c2,int c1,int c0))
1669 void set_iconv(f, iconv_func)
1671 int (*iconv_func)();
1674 #ifdef INPUT_CODE_FIX
1682 #ifdef INPUT_CODE_FIX
1683 && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1689 if (estab_f && iconv_for_check != iconv){
1690 struct input_code *p = find_inputcode_byfunc(iconv);
1692 set_input_codename(p->name);
1693 debug(input_codename);
1695 iconv_for_check = iconv;
1700 #define SCORE_L2 (1) /*
\e$BBh
\e(B2
\e$B?e=`4A;z
\e(B */
1701 #define SCORE_KANA (SCORE_L2 << 1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
1702 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
1703 #ifdef SHIFTJIS_CP932
1704 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932
\e$B$K$h$kFI$_49$(
\e(B */
1705 #define SCORE_NO_EXIST (SCORE_CP932 << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1707 #define SCORE_NO_EXIST (SCORE_DEPEND << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1709 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME
\e$B$K$h$k;XDj
\e(B */
1710 #define SCORE_ERROR (SCORE_iMIME << 1) /*
\e$B%(%i!<
\e(B */
1712 #define SCORE_INIT (SCORE_iMIME)
1714 const int score_table_A0[] = {
1717 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1718 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1721 const int score_table_F0[] = {
1722 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1723 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1724 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1725 SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1728 void set_code_score(ptr, score)
1729 struct input_code *ptr;
1733 ptr->score |= score;
1737 void clr_code_score(ptr, score)
1738 struct input_code *ptr;
1742 ptr->score &= ~score;
1746 void code_score(ptr)
1747 struct input_code *ptr;
1749 int c2 = ptr->buf[0];
1750 #ifdef UTF8_OUTPUT_ENABLE
1751 int c1 = ptr->buf[1];
1754 set_code_score(ptr, SCORE_ERROR);
1755 }else if (c2 == SSO){
1756 set_code_score(ptr, SCORE_KANA);
1757 #ifdef UTF8_OUTPUT_ENABLE
1758 }else if (!e2w_conv(c2, c1)){
1759 set_code_score(ptr, SCORE_NO_EXIST);
1761 }else if ((c2 & 0x70) == 0x20){
1762 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1763 }else if ((c2 & 0x70) == 0x70){
1764 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1765 }else if ((c2 & 0x70) >= 0x50){
1766 set_code_score(ptr, SCORE_L2);
1770 void status_disable(ptr)
1771 struct input_code *ptr;
1776 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1779 void status_push_ch(ptr, c)
1780 struct input_code *ptr;
1783 ptr->buf[ptr->index++] = c;
1786 void status_clear(ptr)
1787 struct input_code *ptr;
1793 void status_reset(ptr)
1794 struct input_code *ptr;
1797 ptr->score = SCORE_INIT;
1800 void status_reinit(ptr)
1801 struct input_code *ptr;
1804 ptr->_file_stat = 0;
1807 void status_check(ptr, c)
1808 struct input_code *ptr;
1811 if (c <= DEL && estab_f){
1816 void s_status(ptr, c)
1817 struct input_code *ptr;
1822 status_check(ptr, c);
1827 #ifdef NUMCHAR_OPTION
1828 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1831 }else if (0xa1 <= c && c <= 0xdf){
1832 status_push_ch(ptr, SSO);
1833 status_push_ch(ptr, c);
1836 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
1838 status_push_ch(ptr, c);
1839 #ifdef SHIFTJIS_CP932
1841 && CP932_TABLE_BEGIN <= c && c <= CP932_TABLE_END){
1843 status_push_ch(ptr, c);
1844 #endif /* SHIFTJIS_CP932 */
1846 }else if (x0212_f && 0xf0 <= c && c <= 0xfc){
1848 status_push_ch(ptr, c);
1849 #endif /* X0212_ENABLE */
1851 status_disable(ptr);
1855 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1856 status_push_ch(ptr, c);
1857 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
1861 status_disable(ptr);
1865 #ifdef SHIFTJIS_CP932
1866 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1867 status_push_ch(ptr, c);
1868 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
1869 set_code_score(ptr, SCORE_CP932);
1874 #endif /* SHIFTJIS_CP932 */
1875 #ifndef X0212_ENABLE
1876 status_disable(ptr);
1882 void e_status(ptr, c)
1883 struct input_code *ptr;
1888 status_check(ptr, c);
1893 #ifdef NUMCHAR_OPTION
1894 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1897 }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
1899 status_push_ch(ptr, c);
1901 }else if (0x8f == c){
1903 status_push_ch(ptr, c);
1904 #endif /* X0212_ENABLE */
1906 status_disable(ptr);
1910 if (0xa1 <= c && c <= 0xfe){
1911 status_push_ch(ptr, c);
1915 status_disable(ptr);
1920 if (0xa1 <= c && c <= 0xfe){
1922 status_push_ch(ptr, c);
1924 status_disable(ptr);
1926 #endif /* X0212_ENABLE */
1930 #ifdef UTF8_INPUT_ENABLE
1931 void w16_status(ptr, c)
1932 struct input_code *ptr;
1939 if (ptr->_file_stat == 0){
1940 if (c == 0xfe || c == 0xff){
1942 status_push_ch(ptr, c);
1943 ptr->_file_stat = 1;
1945 status_disable(ptr);
1946 ptr->_file_stat = -1;
1948 }else if (ptr->_file_stat > 0){
1950 status_push_ch(ptr, c);
1951 }else if (ptr->_file_stat < 0){
1952 status_disable(ptr);
1958 status_disable(ptr);
1959 ptr->_file_stat = -1;
1961 status_push_ch(ptr, c);
1968 if (ptr->stat != c && (c == 0xfe || c == 0xff)){
1969 status_push_ch(ptr, c);
1972 status_disable(ptr);
1973 ptr->_file_stat = -1;
1979 void w_status(ptr, c)
1980 struct input_code *ptr;
1985 status_check(ptr, c);
1990 #ifdef NUMCHAR_OPTION
1991 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1994 }else if (0xc0 <= c && c <= 0xdf){
1996 status_push_ch(ptr, c);
1997 }else if (0xe0 <= c && c <= 0xef){
1999 status_push_ch(ptr, c);
2001 status_disable(ptr);
2006 if (0x80 <= c && c <= 0xbf){
2007 status_push_ch(ptr, c);
2008 if (ptr->index > ptr->stat){
2009 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2010 && ptr->buf[2] == 0xbf);
2011 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2012 &ptr->buf[0], &ptr->buf[1]);
2019 status_disable(ptr);
2030 int action_flag = 1;
2031 struct input_code *result = 0;
2032 struct input_code *p = input_code_list;
2034 (p->status_func)(p, c);
2037 }else if(p->stat == 0){
2048 if (result && !estab_f){
2049 set_iconv(TRUE, result->iconv_func);
2050 }else if (c <= DEL){
2051 struct input_code *ptr = input_code_list;
2066 return std_gc_buf[--std_gc_ndx];
2077 if (std_gc_ndx == STD_GC_BUFSIZE){
2080 std_gc_buf[std_gc_ndx++] = c;
2094 #if !defined(PERL_XS) && !defined(WIN32DLL)
2101 while ((c = (*i_getc)(f)) != EOF)
2110 oconv = output_conv;
2113 /* replace continucation module, from output side */
2115 /* output redicrection */
2117 if (noout_f || guess_f){
2124 if (mimeout_f == TRUE) {
2125 o_base64conv = oconv; oconv = base64_conv;
2127 /* base64_count = 0; */
2131 o_crconv = oconv; oconv = cr_conv;
2134 o_rot_conv = oconv; oconv = rot_conv;
2137 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2140 o_hira_conv = oconv; oconv = hira_conv;
2143 o_fconv = oconv; oconv = fold_conv;
2146 if (alpha_f || x0201_f) {
2147 o_zconv = oconv; oconv = z_conv;
2151 i_ungetc = std_ungetc;
2152 /* input redicrection */
2155 i_cgetc = i_getc; i_getc = cap_getc;
2156 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2159 i_ugetc = i_getc; i_getc = url_getc;
2160 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2163 #ifdef NUMCHAR_OPTION
2165 i_ngetc = i_getc; i_getc = numchar_getc;
2166 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2169 #ifdef UNICODE_NORMALIZATION
2170 if (nfc_f && input_f == UTF8_INPUT){
2171 i_nfc_getc = i_getc; i_getc = nfc_getc;
2172 i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2175 if (mime_f && mimebuf_f==FIXED_MIME) {
2176 i_mgetc = i_getc; i_getc = mime_getc;
2177 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2180 i_bgetc = i_getc; i_getc = broken_getc;
2181 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2183 if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
2184 set_iconv(-TRUE, e_iconv);
2185 } else if (input_f == SJIS_INPUT) {
2186 set_iconv(-TRUE, s_iconv);
2187 #ifdef UTF8_INPUT_ENABLE
2188 } else if (input_f == UTF8_INPUT) {
2189 set_iconv(-TRUE, w_iconv);
2190 } else if (input_f == UTF16BE_INPUT) {
2191 set_iconv(-TRUE, w_iconv16);
2192 } else if (input_f == UTF16LE_INPUT) {
2193 set_iconv(-TRUE, w_iconv16);
2196 set_iconv(FALSE, e_iconv);
2200 struct input_code *p = input_code_list;
2208 Conversion main loop. Code detection only.
2217 int is_8bit = FALSE;
2219 module_connection();
2222 if(input_f == SJIS_INPUT
2223 #ifdef UTF8_INPUT_ENABLE
2224 || input_f == UTF8_INPUT || input_f == UTF16BE_INPUT
2232 output_mode = ASCII;
2235 #define NEXT continue /* no output, get next */
2236 #define SEND ; /* output c1 and c2, get next */
2237 #define LAST break /* end of loop, go closing */
2239 while ((c1 = (*i_getc)(f)) != EOF) {
2244 /* in case of 8th bit is on */
2245 if (!estab_f&&!mime_decode_mode) {
2246 /* in case of not established yet */
2247 /* It is still ambiguious */
2248 if (h_conv(f, c2, c1)==EOF)
2254 /* in case of already established */
2256 /* ignore bogus code */
2262 /* second byte, 7 bit code */
2263 /* it might be kanji shitfted */
2264 if ((c1 == DEL) || (c1 <= SPACE)) {
2265 /* ignore bogus first code */
2273 #ifdef UTF8_INPUT_ENABLE
2282 #ifdef NUMCHAR_OPTION
2283 } else if ((c1 & CLASS_MASK) == CLASS_UTF16){
2286 } else if (c1 > DEL) {
2288 if (!estab_f && !iso8859_f) {
2289 /* not established yet */
2290 if (!is_8bit) is_8bit = TRUE;
2293 } else { /* estab_f==TRUE */
2298 } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2299 /* SJIS X0201 Case... */
2300 if(iso2022jp_f && x0201_f==NO_X0201) {
2301 (*oconv)(GETA1, GETA2);
2308 } else if (c1==SSO && iconv != s_iconv) {
2309 /* EUC X0201 Case */
2310 c1 = (*i_getc)(f); /* skip SSO */
2312 if (SSP<=c1 && c1<0xe0) {
2313 if(iso2022jp_f && x0201_f==NO_X0201) {
2314 (*oconv)(GETA1, GETA2);
2321 } else { /* bogus code, skip SSO and one byte */
2325 /* already established */
2330 } else if ((c1 > SPACE) && (c1 != DEL)) {
2331 /* in case of Roman characters */
2333 /* output 1 shifted byte */
2337 } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
2338 /* output 1 shifted byte */
2339 if(iso2022jp_f && x0201_f==NO_X0201) {
2340 (*oconv)(GETA1, GETA2);
2347 /* look like bogus code */
2350 } else if (input_mode == X0208) {
2351 /* in case of Kanji shifted */
2354 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
2355 /* Check MIME code */
2356 if ((c1 = (*i_getc)(f)) == EOF) {
2359 } else if (c1 == '?') {
2360 /* =? is mime conversion start sequence */
2361 if(mime_f == STRICT_MIME) {
2362 /* check in real detail */
2363 if (mime_begin_strict(f) == EOF)
2367 } else if (mime_begin(f) == EOF)
2377 /* normal ASCII code */
2380 } else if (!is_8bit && c1 == SI) {
2383 } else if (!is_8bit && c1 == SO) {
2386 } else if (!is_8bit && c1 == ESC ) {
2387 if ((c1 = (*i_getc)(f)) == EOF) {
2388 /* (*oconv)(0, ESC); don't send bogus code */
2390 } else if (c1 == '$') {
2391 if ((c1 = (*i_getc)(f)) == EOF) {
2393 (*oconv)(0, ESC); don't send bogus code
2394 (*oconv)(0, '$'); */
2396 } else if (c1 == '@'|| c1 == 'B') {
2397 /* This is kanji introduction */
2400 set_input_codename("ISO-2022-JP");
2402 debug(input_codename);
2405 } else if (c1 == '(') {
2406 if ((c1 = (*i_getc)(f)) == EOF) {
2407 /* don't send bogus code
2413 } else if (c1 == '@'|| c1 == 'B') {
2414 /* This is kanji introduction */
2419 } else if (c1 == 'D'){
2423 #endif /* X0212_ENABLE */
2425 /* could be some special code */
2432 } else if (broken_f&0x2) {
2433 /* accept any ESC-(-x as broken code ... */
2443 } else if (c1 == '(') {
2444 if ((c1 = (*i_getc)(f)) == EOF) {
2445 /* don't send bogus code
2447 (*oconv)(0, '('); */
2451 /* This is X0201 kana introduction */
2452 input_mode = X0201; shift_mode = X0201;
2454 } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2455 /* This is X0208 kanji introduction */
2456 input_mode = ASCII; shift_mode = FALSE;
2458 } else if (broken_f&0x2) {
2459 input_mode = ASCII; shift_mode = FALSE;
2464 /* maintain various input_mode here */
2468 } else if ( c1 == 'N' || c1 == 'n' ){
2470 c3 = (*i_getc)(f); /* skip SS2 */
2471 if ( (SPACE<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2486 } else if ((c1 == NL || c1 == CR) && broken_f&4) {
2487 input_mode = ASCII; set_iconv(FALSE, 0);
2489 } else if (c1 == NL && mime_decode_f && !mime_decode_mode ) {
2490 if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2498 } else if (c1 == CR && mime_decode_f && !mime_decode_mode ) {
2499 if ((c1=(*i_getc)(f))!=EOF) {
2503 } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2519 if (input_mode == X0208)
2520 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2522 else if (input_mode == X0212)
2523 (*oconv)((0x8f << 8) | c2, c1);
2524 #endif /* X0212_ENABLE */
2525 else if (input_mode)
2526 (*oconv)(input_mode, c1); /* other special case */
2527 else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */
2528 int c0 = (*i_getc)(f);
2531 (*iconv)(c2, c1, c0);
2537 /* goto next_word */
2541 (*iconv)(EOF, 0, 0);
2542 if (!is_inputcode_set)
2545 struct input_code *p = input_code_list;
2546 struct input_code *result = p;
2548 if (p->score < result->score) result = p;
2551 set_input_codename(result->name);
2566 /** it must NOT be in the kanji shifte sequence */
2567 /** it must NOT be written in JIS7 */
2568 /** and it must be after 2 byte 8bit code */
2575 while ((c1 = (*i_getc)(f)) != EOF) {
2581 if (push_hold_buf(c1) == EOF || estab_f){
2587 struct input_code *p = input_code_list;
2588 struct input_code *result = p;
2593 if (p->score < result->score){
2598 set_iconv(FALSE, result->iconv_func);
2603 ** 1) EOF is detected, or
2604 ** 2) Code is established, or
2605 ** 3) Buffer is FULL (but last word is pushed)
2607 ** in 1) and 3) cases, we continue to use
2608 ** Kanji codes by oconv and leave estab_f unchanged.
2613 while (wc < hold_count){
2614 c2 = hold_buf[wc++];
2616 #ifdef NUMCHAR_OPTION
2617 || (c2 & CLASS_MASK) == CLASS_UTF16
2622 }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
2623 (*iconv)(X0201, c2, 0);
2626 if (wc < hold_count){
2627 c1 = hold_buf[wc++];
2636 if ((*iconv)(c2, c1, 0) < 0){
2638 if (wc < hold_count){
2639 c0 = hold_buf[wc++];
2648 (*iconv)(c2, c1, c0);
2661 if (hold_count >= HOLD_SIZE*2)
2663 hold_buf[hold_count++] = c2;
2664 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
2667 int s2e_conv(c2, c1, p2, p1)
2671 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
2674 #ifdef SHIFTJIS_CP932
2675 if (cp51932_f && CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){
2676 extern const unsigned short shiftjis_cp932[3][189];
2677 val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
2683 #endif /* SHIFTJIS_CP932 */
2685 if (x0212_f && 0xfa <= c2 && c2 <= 0xfc){
2686 extern const unsigned short shiftjis_x0212[3][189];
2687 val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
2690 c2 = (0x8f << 8) | (val >> 8);
2703 c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
2705 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
2714 c2 = x0212_unshift(c2);
2729 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2732 int ret = s2e_conv(c2, c1, &c2, &c1);
2733 if (ret) return ret;
2747 }else if (c2 == 0x8f){
2751 c2 = (c2 << 8) | (c1 & 0x7f);
2753 #ifdef SHIFTJIS_CP932
2756 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2757 s2e_conv(s2, s1, &c2, &c1);
2758 if ((c2 & 0xff00) == 0){
2764 #endif /* SHIFTJIS_CP932 */
2765 #endif /* X0212_ENABLE */
2766 } else if (c2 == SSO){
2769 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2779 #ifdef UTF8_INPUT_ENABLE
2781 w2e_conv(c2, c1, c0, p2, p1)
2790 }else if (0xc0 <= c2 && c2 <= 0xef) {
2791 ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
2792 #ifdef NUMCHAR_OPTION
2795 if (p1) *p1 = CLASS_UTF16 | ww16_conv(c2, c1, c0);
2810 /* throw away ZERO WIDTH NO-BREAK SPACE (U+FEFF) */
2811 if(ignore_zwnbsp_f){
2812 ignore_zwnbsp_f = FALSE;
2813 if(c2 == 0xef && c1 == 0xbb && c0 == 0xbf)
2817 if (c2 == 0) /* 0x00-0x7f */
2818 c1 &= 0x7F; /* 1byte */
2820 if ((c2 & 0xe0) == 0xc0){ /* 0xc0-0xdf */
2822 if((c2 & 0xFE) == 0xC0 || c1 < 0x80 || 0xBF < c1) return 0;
2823 }else if ((c2 & 0xf0) == 0xe0) /* 0xe0-0xef */
2824 return -1; /* 3bytes */
2826 else if (0xf0 <= c2)
2827 return 0; /* 4,5,6bytes */
2828 else if ((c2 & 0xc0) == 0x80) /* 0x80-0xbf */
2829 return 0; /* trail byte */
2833 /* must be 3bytes */
2835 if(c1 < 0xA0 || 0xBF < c1 || c0 < 0x80 || 0xBF < c0)
2837 }else if(c2 == 0xED){
2838 if(c1 < 0x80 || 0x9F < c1 || c0 < 0x80 || 0xBF < c0)
2840 }else if((c2 & 0xf0) == 0xe0){
2841 if(c1 < 0x80 || 0xBF < c1 || c0 < 0x80 || 0xBF < c0)
2845 if (c2 == 0 || c2 == EOF){
2846 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
2847 } else if (internal_unicode_f && (output_conv == w_oconv || output_conv == w_oconv16)){
2848 unsigned short val = 0;
2853 val = ww16_conv(c2, c1, c0);
2854 c2 = (val >> 8) & 0xff;
2858 ret = w2e_conv(c2, c1, c0, &c2, &c1);
2867 w16w_conv(val, p2, p1, p0)
2875 }else if (val < 0x800){
2876 *p2 = 0xc0 | (val >> 6);
2877 *p1 = 0x80 | (val & 0x3f);
2880 *p2 = 0xe0 | (val >> 12);
2881 *p1 = 0x80 | ((val >> 6) & 0x3f);
2882 *p0 = 0x80 | (val & 0x3f);
2887 ww16_conv(c2, c1, c0)
2893 }else if (c2 >= 0xe0){
2894 val = (c2 & 0x0f) << 12;
2895 val |= (c1 & 0x3f) << 6;
2897 }else if (c2 >= 0xc0){
2898 val = (c2 & 0x1f) << 6;
2907 w16e_conv(val, p2, p1)
2933 w16w_conv(val, &c2, &c1, &c0);
2934 ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
2935 #ifdef NUMCHAR_OPTION
2938 *p1 = CLASS_UTF16 | val;
2947 w_iconv16(c2, c1, c0)
2952 /* throw away ZERO WIDTH NO-BREAK SPACE (U+FEFF) */
2953 if(ignore_zwnbsp_f){
2954 ignore_zwnbsp_f = FALSE;
2955 if (c2==0376 && c1==0377){
2956 utf16_mode = UTF16BE_INPUT;
2958 }else if(c2==0377 && c1==0376){
2959 utf16_mode = UTF16LE_INPUT;
2963 if (c2 != EOF && utf16_mode == UTF16LE_INPUT) {
2965 tmp=c1; c1=c2; c2=tmp;
2967 if ((c2==0 && c1 < 0x80) || c2==EOF) {
2970 }else if((c2>>3)==27){ /* surrogate pair */
2972 #ifdef UTF8_OUTPUT_ENABLE
2973 }else if (internal_unicode_f && (output_conv == w_oconv || output_conv == w_oconv16)){
2975 }else ret = w16e_conv(((c2<<8)&0xff00) + c1, &c2, &c1);
2976 if (ret) return ret;
2982 unicode_to_jis_common(c2, c1, c0, p2, p1)
2986 extern const unsigned short *const utf8_to_euc_2bytes[];
2987 extern const unsigned short *const *const utf8_to_euc_3bytes[];
2991 if (ms_ucs_map_f && cp51932_f){
2992 /* CP932/CP51932: U+00A6 (BROKEN BAR) -> not 0x8fa2c3, but 0x7c */
3005 }else if(strict_mapping_f){
3009 case 0xAB: case 0xAD: case 0xB2: case 0xB3:
3010 case 0xB5: case 0xB7: case 0xB9: case 0xBB:
3022 ret = w_iconv_common(c2, c1, utf8_to_euc_2bytes, sizeof_utf8_to_euc_2bytes, p2, p1);
3023 if(!ret && !ms_ucs_map_f && !x0212_f){
3024 if(*p2 == 0 && *p1 < 0x80){
3026 }else if(*p2 > 0xFF){
3028 if (e2s_conv(*p2, *p1, &s2, &s1) == 0){
3029 s2e_conv(s2, s1, p2, p1);
3030 if(*p2 == 0 && *p1 < 0x80)
3036 if(unicode_round_trip_f){
3041 if(c0 == 0x95) return 1;
3044 if(c0 == 0xA5) return 1;
3051 if(c0 == 0xBF) return 1;
3054 if(c0 == 0x8D) return 1;
3057 if(0xA0 <= c0 && c0 <= 0xA5) return 1;
3065 if(c2 == 0xE2 && c1 == 0x80 && c0 == 0xBE){
3069 }else if(c2 == 0xEF && c1 == 0xBD && c0 == 0x9E){
3070 if (p2) *p2 = 0x8F22;
3075 if(!strict_mapping_f);
3076 else if(ms_ucs_map_f && cp51932_f){
3077 /* Microsoft Code Page */
3083 case 0x94: case 0x96: case 0xBE:
3104 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94)
3107 ret = w_iconv_common(c1, c0, utf8_to_euc_3bytes[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
3113 w_iconv_common(c1, c0, pp, psize, p2, p1)
3115 const unsigned short *const *pp;
3120 const unsigned short *p;
3123 if (pp == 0) return 1;
3126 if (c1 < 0 || psize <= c1) return 1;
3128 if (p == 0) return 1;
3131 if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1;
3133 if (val == 0) return 1;
3134 if (disable_cp932ext_f && (
3135 (val>>8) == 0x2D || /* disable NEC special characters */
3136 val > 0xF300 /* disable NEC special characters */
3144 if (c2 == SO) c2 = X0201;
3153 #ifdef UTF8_OUTPUT_ENABLE
3155 nkf_each_char_to_hex(f, c)
3156 void (*f)PROTO((int c2,int c1));
3159 const char *hex = "0123456789ABCDEF";
3165 (*f)(0, hex[(c>>shift)&0xF]);
3176 encode_fallback_html(c)
3183 (*oconv)(0, 0x30+(c/1000000)%10);
3185 (*oconv)(0, 0x30+(c/100000 )%10);
3187 (*oconv)(0, 0x30+(c/10000 )%10);
3189 (*oconv)(0, 0x30+(c/1000 )%10);
3191 (*oconv)(0, 0x30+(c/100 )%10);
3193 (*oconv)(0, 0x30+(c/10 )%10);
3195 (*oconv)(0, 0x30+ c %10);
3201 encode_fallback_xml(c)
3207 nkf_each_char_to_hex(oconv, c);
3213 encode_fallback_java(c)
3216 const char *hex = "0123456789ABCDEF";
3218 if((c&0x00FFFFFF) > 0xFFFF){
3222 (*oconv)(0, hex[(c>>20)&0xF]);
3223 (*oconv)(0, hex[(c>>16)&0xF]);
3227 (*oconv)(0, hex[(c>>12)&0xF]);
3228 (*oconv)(0, hex[(c>> 8)&0xF]);
3229 (*oconv)(0, hex[(c>> 4)&0xF]);
3230 (*oconv)(0, hex[ c &0xF]);
3235 encode_fallback_perl(c)
3241 nkf_each_char_to_hex(oconv, c);
3247 encode_fallback_subchar(c)
3250 c = unicode_subchar;
3251 (*oconv)((c>>8)&0xFF, c&0xFF);
3257 (*oconv)(0, (c>>shift)&0xFF);
3271 extern const unsigned short euc_to_utf8_1byte[];
3272 extern const unsigned short *const euc_to_utf8_2bytes[];
3273 extern const unsigned short *const euc_to_utf8_2bytes_ms[];
3274 const unsigned short *p;
3277 p = euc_to_utf8_1byte;
3279 } else if (c2 >> 8 == 0x8f){
3280 if(!ms_ucs_map_f && c2 == 0x8F22 && c1 == 0x43){
3283 extern const unsigned short *const x0212_to_utf8_2bytes[];
3284 c2 = (c2&0x7f) - 0x21;
3285 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3286 p = x0212_to_utf8_2bytes[c2];
3292 c2 = (c2&0x7f) - 0x21;
3293 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3294 p = ms_ucs_map_f ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
3299 c1 = (c1 & 0x7f) - 0x21;
3300 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
3317 if (unicode_bom_f==2) {
3324 #ifdef NUMCHAR_OPTION
3325 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3326 w16w_conv(c1, &c2, &c1, &c0);
3330 if (c0) (*o_putc)(c0);
3337 output_mode = ASCII;
3339 } else if (c2 == ISO8859_1) {
3340 output_mode = ISO8859_1;
3341 (*o_putc)(c1 | 0x080);
3344 if (internal_unicode_f && (iconv == w_iconv || iconv == w_iconv16))
3345 val = ((c2<<8)&0xff00) + c1;
3346 else val = e2w_conv(c2, c1);
3348 w16w_conv(val, &c2, &c1, &c0);
3352 if (c0) (*o_putc)(c0);
3368 if (unicode_bom_f==2) {
3370 (*o_putc)((unsigned char)'\377');
3374 (*o_putc)((unsigned char)'\377');
3379 if (internal_unicode_f && (iconv == w_iconv || iconv == w_iconv16)){
3380 } else if (c2 == ISO8859_1) {
3383 #ifdef NUMCHAR_OPTION
3384 } else if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16) {
3385 c2 = (c1 >> 8) & 0xff;
3389 unsigned short val = e2w_conv(c2, c1);
3390 c2 = (val >> 8) & 0xff;
3409 #ifdef NUMCHAR_OPTION
3410 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3411 w16e_conv(c1, &c2, &c1);
3412 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3413 if(encode_fallback)(*encode_fallback)(c1);
3421 } else if (c2 == 0) {
3422 output_mode = ASCII;
3424 } else if (c2 == X0201) {
3425 output_mode = JAPANESE_EUC;
3426 (*o_putc)(SSO); (*o_putc)(c1|0x80);
3427 } else if (c2 == ISO8859_1) {
3428 output_mode = ISO8859_1;
3429 (*o_putc)(c1 | 0x080);
3431 } else if ((c2 & 0xff00) >> 8 == 0x8f){
3432 output_mode = JAPANESE_EUC;
3433 #ifdef SHIFTJIS_CP932
3436 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3437 s2e_conv(s2, s1, &c2, &c1);
3442 output_mode = ASCII;
3444 }else if ((c2 & 0xff00) >> 8 == 0x8f){
3447 (*o_putc)((c2 & 0x7f) | 0x080);
3448 (*o_putc)(c1 | 0x080);
3451 (*o_putc)((c2 & 0x7f) | 0x080);
3452 (*o_putc)(c1 | 0x080);
3456 if ((c1<0x21 || 0x7e<c1) ||
3457 (c2<0x21 || 0x7e<c2)) {
3458 set_iconv(FALSE, 0);
3459 return; /* too late to rescue this char */
3461 output_mode = JAPANESE_EUC;
3462 (*o_putc)(c2 | 0x080);
3463 (*o_putc)(c1 | 0x080);
3473 if ((ret & 0xff00) == 0x8f00){
3474 if (0x75 <= c && c <= 0x7f){
3475 ret = c + (0x109 - 0x75);
3478 if (0x75 <= c && c <= 0x7f){
3479 ret = c + (0x113 - 0x75);
3486 int x0212_unshift(c)
3490 if (0x7f <= c && c <= 0x88){
3491 ret = c + (0x75 - 0x7f);
3492 }else if (0x89 <= c && c <= 0x92){
3493 ret = (0x8f << 8) | 0x80 | (c + (0x75 - 0x89));
3497 #endif /* X0212_ENABLE */
3500 e2s_conv(c2, c1, p2, p1)
3501 int c2, c1, *p2, *p1;
3505 const unsigned short *ptr;
3507 extern const unsigned short *const x0212_shiftjis[];
3508 if ((c2 & 0xff00) == 0x8f00){
3510 if (0x21 <= ndx && ndx <= 0x7e){
3511 ptr = x0212_shiftjis[ndx - 0x21];
3513 val = ptr[(c1 & 0x7f) - 0x21];
3523 c2 = x0212_shift(c2);
3525 #endif /* X0212_ENABLE */
3526 if ((c2 & 0xff00) == 0x8f00){
3529 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
3530 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
3539 #ifdef NUMCHAR_OPTION
3540 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3541 w16e_conv(c1, &c2, &c1);
3542 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3543 if(encode_fallback)(*encode_fallback)(c1);
3551 } else if (c2 == 0) {
3552 output_mode = ASCII;
3554 } else if (c2 == X0201) {
3555 output_mode = SHIFT_JIS;
3557 } else if (c2 == ISO8859_1) {
3558 output_mode = ISO8859_1;
3559 (*o_putc)(c1 | 0x080);
3561 } else if ((c2 & 0xff00) >> 8 == 0x8f){
3562 output_mode = SHIFT_JIS;
3563 if (e2s_conv(c2, c1, &c2, &c1) == 0){
3569 if ((c1<0x20 || 0x7e<c1) ||
3570 (c2<0x20 || 0x7e<c2)) {
3571 set_iconv(FALSE, 0);
3572 return; /* too late to rescue this char */
3574 output_mode = SHIFT_JIS;
3575 e2s_conv(c2, c1, &c2, &c1);
3577 #ifdef SHIFTJIS_CP932
3579 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3580 extern const unsigned short cp932inv[2][189];
3581 int c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3587 #endif /* SHIFTJIS_CP932 */
3590 if (prefix_table[(unsigned char)c1]){
3591 (*o_putc)(prefix_table[(unsigned char)c1]);
3602 #ifdef NUMCHAR_OPTION
3603 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3604 w16e_conv(c1, &c2, &c1);
3605 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3606 if(encode_fallback)(*encode_fallback)(c1);
3612 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
3615 (*o_putc)(ascii_intro);
3616 output_mode = ASCII;
3620 } else if ((c2 & 0xff00) >> 8 == 0x8f){
3621 if (output_mode!=X0212) {
3622 output_mode = X0212;
3628 (*o_putc)(c2 & 0x7f);
3631 } else if (c2==X0201) {
3632 if (output_mode!=X0201) {
3633 output_mode = X0201;
3639 } else if (c2==ISO8859_1) {
3640 /* iso8859 introduction, or 8th bit on */
3641 /* Can we convert in 7bit form using ESC-'-'-A ?
3643 output_mode = ISO8859_1;
3645 } else if (c2 == 0) {
3646 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
3649 (*o_putc)(ascii_intro);
3650 output_mode = ASCII;
3654 if (output_mode != X0208) {
3655 output_mode = X0208;
3658 (*o_putc)(kanji_intro);
3660 if (c1<0x20 || 0x7e<c1)
3662 if (c2<0x20 || 0x7e<c2)
3674 mime_prechar(c2, c1);
3675 (*o_base64conv)(c2,c1);
3679 STATIC int broken_buf[3];
3680 STATIC int broken_counter = 0;
3681 STATIC int broken_last = 0;
3688 if (broken_counter>0) {
3689 return broken_buf[--broken_counter];
3692 if (c=='$' && broken_last != ESC
3693 && (input_mode==ASCII || input_mode==X0201)) {
3696 if (c1=='@'|| c1=='B') {
3697 broken_buf[0]=c1; broken_buf[1]=c;
3704 } else if (c=='(' && broken_last != ESC
3705 && (input_mode==X0208 || input_mode==X0201)) { /* ) */
3708 if (c1=='J'|| c1=='B') {
3709 broken_buf[0]=c1; broken_buf[1]=c;
3727 if (broken_counter<2)
3728 broken_buf[broken_counter++]=c;
3732 STATIC int prev_cr = 0;
3740 if (! (c2==0&&c1==NL) ) {
3746 } else if (c1=='\r') {
3748 } else if (c1=='\n') {
3749 if (crmode_f==CRLF) {
3750 (*o_crconv)(0,'\r');
3751 } else if (crmode_f==CR) {
3752 (*o_crconv)(0,'\r');
3756 } else if (c1!='\032' || crmode_f!=NL){
3762 Return value of fold_conv()
3764 \n add newline and output char
3765 \r add newline and output nothing
3768 1 (or else) normal output
3770 fold state in prev (previous character)
3772 >0x80 Japanese (X0208/X0201)
3777 This fold algorthm does not preserve heading space in a line.
3778 This is the main difference from fmt.
3781 #define char_size(c2,c1) (c2?2:1)
3790 if (c1== '\r' && !fold_preserve_f) {
3791 fold_state=0; /* ignore cr */
3792 }else if (c1== '\n'&&f_prev=='\r' && fold_preserve_f) {
3794 fold_state=0; /* ignore cr */
3795 } else if (c1== BS) {
3796 if (f_line>0) f_line--;
3798 } else if (c2==EOF && f_line != 0) { /* close open last line */
3800 } else if ((c1=='\n' && !fold_preserve_f)
3801 || ((c1=='\r'||(c1=='\n'&&f_prev!='\r'))
3802 && fold_preserve_f)) {
3804 if (fold_preserve_f) {
3808 } else if ((f_prev == c1 && !fold_preserve_f)
3809 || (f_prev == '\n' && fold_preserve_f)
3810 ) { /* duplicate newline */
3813 fold_state = '\n'; /* output two newline */
3819 if (f_prev&0x80) { /* Japanese? */
3821 fold_state = 0; /* ignore given single newline */
3822 } else if (f_prev==' ') {
3826 if (++f_line<=fold_len)
3830 fold_state = '\r'; /* fold and output nothing */
3834 } else if (c1=='\f') {
3839 fold_state = '\n'; /* output newline and clear */
3840 } else if ( (c2==0 && c1==' ')||
3841 (c2==0 && c1=='\t')||
3842 (c2=='!'&& c1=='!')) {
3843 /* X0208 kankaku or ascii space */
3844 if (f_prev == ' ') {
3845 fold_state = 0; /* remove duplicate spaces */
3848 if (++f_line<=fold_len)
3849 fold_state = ' '; /* output ASCII space only */
3851 f_prev = ' '; f_line = 0;
3852 fold_state = '\r'; /* fold and output nothing */
3856 prev0 = f_prev; /* we still need this one... , but almost done */
3858 if (c2 || c2==X0201)
3859 f_prev |= 0x80; /* this is Japanese */
3860 f_line += char_size(c2,c1);
3861 if (f_line<=fold_len) { /* normal case */
3864 if (f_line>fold_len+fold_margin) { /* too many kinsoku suspension */
3865 f_line = char_size(c2,c1);
3866 fold_state = '\n'; /* We can't wait, do fold now */
3867 } else if (c2==X0201) {
3868 /* simple kinsoku rules return 1 means no folding */
3869 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
3870 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
3871 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
3872 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
3873 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
3874 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
3875 else if (SPACE<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
3877 fold_state = '\n';/* add one new f_line before this character */
3880 fold_state = '\n';/* add one new f_line before this character */
3883 /* kinsoku point in ASCII */
3884 if ( c1==')'|| /* { [ ( */
3895 /* just after special */
3896 } else if (!is_alnum(prev0)) {
3897 f_line = char_size(c2,c1);
3899 } else if ((prev0==' ') || /* ignored new f_line */
3900 (prev0=='\n')|| /* ignored new f_line */
3901 (prev0&0x80)) { /* X0208 - ASCII */
3902 f_line = char_size(c2,c1);
3903 fold_state = '\n';/* add one new f_line before this character */
3905 fold_state = 1; /* default no fold in ASCII */
3909 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
3910 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
3911 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
3912 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
3913 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
3914 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
3915 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
3916 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
3917 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
3918 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
3919 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
3920 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
3921 /* default no fold in kinsoku */
3924 f_line = char_size(c2,c1);
3925 /* add one new f_line before this character */
3928 f_line = char_size(c2,c1);
3930 /* add one new f_line before this character */
3935 /* terminator process */
3936 switch(fold_state) {
3955 int z_prev2=0,z_prev1=0;
3962 /* if (c2) c1 &= 0x7f; assertion */
3964 if (x0201_f && z_prev2==X0201) { /* X0201 */
3965 if (c1==(0xde&0x7f)) { /*
\e$BByE@
\e(B */
3967 (*o_zconv)(dv[(z_prev1-SPACE)*2],dv[(z_prev1-SPACE)*2+1]);
3969 } else if (c1==(0xdf&0x7f)&&ev[(z_prev1-SPACE)*2]) { /*
\e$BH>ByE@
\e(B */
3971 (*o_zconv)(ev[(z_prev1-SPACE)*2],ev[(z_prev1-SPACE)*2+1]);
3975 (*o_zconv)(cv[(z_prev1-SPACE)*2],cv[(z_prev1-SPACE)*2+1]);
3984 if (x0201_f && c2==X0201) {
3985 if (dv[(c1-SPACE)*2]||ev[(c1-SPACE)*2]) {
3986 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
3987 z_prev1 = c1; z_prev2 = c2;
3990 (*o_zconv)(cv[(c1-SPACE)*2],cv[(c1-SPACE)*2+1]);
3995 /* JISX0208 Alphabet */
3996 if (alpha_f && c2 == 0x23 ) {
3998 } else if (alpha_f && c2 == 0x21 ) {
3999 /* JISX0208 Kigou */
4004 } else if (alpha_f&0x4) {
4009 } else if (0x20<c1 && c1<0x7f && fv[c1-0x20]) {
4015 case '>': entity = ">"; break;
4016 case '<': entity = "<"; break;
4017 case '\"': entity = """; break;
4018 case '&': entity = "&"; break;
4021 while (*entity) (*o_zconv)(0, *entity++);
4031 #define rot13(c) ( \
4033 (c <= 'M') ? (c + 13): \
4034 (c <= 'Z') ? (c - 13): \
4036 (c <= 'm') ? (c + 13): \
4037 (c <= 'z') ? (c - 13): \
4041 #define rot47(c) ( \
4043 ( c <= 'O' ) ? (c + 47) : \
4044 ( c <= '~' ) ? (c - 47) : \
4052 if (c2==0 || c2==X0201 || c2==ISO8859_1) {
4058 (*o_rot_conv)(c2,c1);
4065 if ((hira_f & 1) && c2==0x25 && 0x20<c1 && c1<0x74) {
4067 } else if ((hira_f & 2) && c2==0x24 && 0x20<c1 && c1<0x74) {
4070 (*o_hira_conv)(c2,c1);
4075 iso2022jp_check_conv(c2,c1)
4078 STATIC const int range[RANGE_NUM_MAX][2] = {
4101 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
4105 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
4110 for (i = 0; i < RANGE_NUM_MAX; i++) {
4111 start = range[i][0];
4114 if (c >= start && c <= end) {
4119 (*o_iso2022jp_check_conv)(c2,c1);
4123 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
4125 const unsigned char *mime_pattern[] = {
4126 (const unsigned char *)"\075?EUC-JP?B?",
4127 (const unsigned char *)"\075?SHIFT_JIS?B?",
4128 (const unsigned char *)"\075?ISO-8859-1?Q?",
4129 (const unsigned char *)"\075?ISO-8859-1?B?",
4130 (const unsigned char *)"\075?ISO-2022-JP?B?",
4131 (const unsigned char *)"\075?ISO-2022-JP?Q?",
4132 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
4133 (const unsigned char *)"\075?UTF-8?B?",
4134 (const unsigned char *)"\075?UTF-8?Q?",
4136 (const unsigned char *)"\075?US-ASCII?Q?",
4141 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
4142 int (*mime_priority_func[])PROTO((int c2, int c1, int c0)) = {
4143 e_iconv, s_iconv, 0, 0, 0, 0,
4144 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
4150 const int mime_encode[] = {
4151 JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201,
4152 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
4159 const int mime_encode_method[] = {
4160 'B', 'B','Q', 'B', 'B', 'Q',
4161 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
4169 #define MAXRECOVER 20
4174 if (i_getc!=mime_getc) {
4175 i_mgetc = i_getc; i_getc = mime_getc;
4176 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
4177 if(mime_f==STRICT_MIME) {
4178 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
4179 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
4185 unswitch_mime_getc()
4187 if(mime_f==STRICT_MIME) {
4188 i_mgetc = i_mgetc_buf;
4189 i_mungetc = i_mungetc_buf;
4192 i_ungetc = i_mungetc;
4193 if(mime_iconv_back)set_iconv(FALSE, mime_iconv_back);
4194 mime_iconv_back = NULL;
4198 mime_begin_strict(f)
4203 const unsigned char *p,*q;
4204 int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
4206 mime_decode_mode = FALSE;
4207 /* =? has been checked */
4209 p = mime_pattern[j];
4212 for(i=2;p[i]>' ';i++) { /* start at =? */
4213 if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) {
4214 /* pattern fails, try next one */
4216 while ((p = mime_pattern[++j])) {
4217 for(k=2;k<i;k++) /* assume length(p) > i */
4218 if (p[k]!=q[k]) break;
4219 if (k==i && nkf_toupper(c1)==p[k]) break;
4221 if (p) continue; /* found next one, continue */
4222 /* all fails, output from recovery buffer */
4230 mime_decode_mode = p[i-2];
4232 mime_iconv_back = iconv;
4233 set_iconv(FALSE, mime_priority_func[j]);
4234 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
4236 if (mime_decode_mode=='B') {
4237 mimebuf_f = unbuf_f;
4239 /* do MIME integrity check */
4240 return mime_integrity(f,mime_pattern[j]);
4252 /* we don't keep eof of Fifo, becase it contains ?= as
4253 a terminator. It was checked in mime_integrity. */
4254 return ((mimebuf_f)?
4255 (*i_mgetc_buf)(f):Fifo(mime_input++));
4259 mime_ungetc_buf(c,f)
4264 (*i_mungetc_buf)(c,f);
4266 Fifo(--mime_input)=c;
4277 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
4278 /* re-read and convert again from mime_buffer. */
4280 /* =? has been checked */
4282 Fifo(mime_last++)='='; Fifo(mime_last++)='?';
4283 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
4284 /* We accept any character type even if it is breaked by new lines */
4285 c1 = (*i_getc)(f); Fifo(mime_last++)= c1 ;
4286 if (c1=='\n'||c1==' '||c1=='\r'||
4287 c1=='-'||c1=='_'||is_alnum(c1) ) continue;
4289 /* Failed. But this could be another MIME preemble */
4297 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
4298 if (!(++i<MAXRECOVER) || c1==EOF) break;
4299 if (c1=='b'||c1=='B') {
4300 mime_decode_mode = 'B';
4301 } else if (c1=='q'||c1=='Q') {
4302 mime_decode_mode = 'Q';
4306 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
4307 if (!(++i<MAXRECOVER) || c1==EOF) break;
4309 mime_decode_mode = FALSE;
4315 if (!mime_decode_mode) {
4316 /* false MIME premble, restart from mime_buffer */
4317 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
4318 /* Since we are in MIME mode until buffer becomes empty, */
4319 /* we never go into mime_begin again for a while. */
4322 /* discard mime preemble, and goto MIME mode */
4324 /* do no MIME integrity check */
4325 return c1; /* used only for checking EOF */
4340 fprintf(stderr, "%s\n", str);
4346 set_input_codename (codename)
4351 strcmp(codename, "") != 0 &&
4352 strcmp(codename, input_codename) != 0)
4354 is_inputcode_mixed = TRUE;
4356 input_codename = codename;
4357 is_inputcode_set = TRUE;
4360 #if !defined(PERL_XS) && !defined(WIN32DLL)
4362 print_guessed_code (filename)
4365 char *codename = "BINARY";
4366 if (!is_inputcode_mixed) {
4367 if (strcmp(input_codename, "") == 0) {
4370 codename = input_codename;
4373 if (filename != NULL) printf("%s:", filename);
4374 printf("%s\n", codename);
4380 #ifdef ANSI_C_PROTOTYPE
4381 int hex_getc(int ch, FILE *f, int (*g)(FILE *f), int (*u)(int c, FILE *f))
4384 hex_getc(ch, f, g, u)
4397 if (!nkf_isxdigit(c2)){
4402 if (!nkf_isxdigit(c3)){
4407 return (hex2bin(c2) << 4) | hex2bin(c3);
4414 return hex_getc(':', f, i_cgetc, i_cungetc);
4422 return (*i_cungetc)(c, f);
4429 return hex_getc('%', f, i_ugetc, i_uungetc);
4437 return (*i_uungetc)(c, f);
4441 #ifdef NUMCHAR_OPTION
4446 int (*g)() = i_ngetc;
4447 int (*u)() = i_nungetc;
4458 if (buf[i] == 'x' || buf[i] == 'X'){
4459 for (j = 0; j < 5; j++){
4461 if (!nkf_isxdigit(buf[i])){
4468 c |= hex2bin(buf[i]);
4471 for (j = 0; j < 6; j++){
4475 if (!nkf_isdigit(buf[i])){
4482 c += hex2bin(buf[i]);
4488 return CLASS_UTF16 | c;
4498 numchar_ungetc(c, f)
4502 return (*i_nungetc)(c, f);
4506 #ifdef UNICODE_NORMALIZATION
4508 /* Normalization Form C */
4513 int (*g)() = i_nfc_getc;
4514 int (*u)() = i_nfc_ungetc;
4515 int i=0, j, k=1, lower, upper;
4517 const int *array = NULL;
4518 extern const struct normalization_pair normalization_table[];
4521 while (k > 0 && ((buf[i] & 0xc0) != 0x80)){
4522 lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
4523 while (upper >= lower) {
4524 j = (lower+upper) / 2;
4525 array = normalization_table[j].nfd;
4526 for (k=0; k < NORMALIZATION_TABLE_NFD_LENGTH && array[k]; k++){
4527 if (array[k] != buf[k]){
4528 array[k] < buf[k] ? (lower = j + 1) : (upper = j - 1);
4535 array = normalization_table[j].nfc;
4536 for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++)
4553 return (*i_nfc_ungetc)(c, f);
4555 #endif /* UNICODE_NORMALIZATION */
4562 int c1, c2, c3, c4, cc;
4563 int t1, t2, t3, t4, mode, exit_mode;
4567 int lwsp_size = 128;
4569 if (mime_top != mime_last) { /* Something is in FIFO */
4570 return Fifo(mime_top++);
4572 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
4573 mime_decode_mode=FALSE;
4574 unswitch_mime_getc();
4575 return (*i_getc)(f);
4578 if (mimebuf_f == FIXED_MIME)
4579 exit_mode = mime_decode_mode;
4582 if (mime_decode_mode == 'Q') {
4583 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4585 if (c1=='_') return ' ';
4586 if (c1<=' ' || DEL<=c1) {
4587 mime_decode_mode = exit_mode; /* prepare for quit */
4590 if (c1!='=' && c1!='?') {
4594 mime_decode_mode = exit_mode; /* prepare for quit */
4595 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
4596 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
4597 /* end Q encoding */
4598 input_mode = exit_mode;
4600 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
4601 if (lwsp_buf==NULL) {
4602 perror("can't malloc");
4605 while ((c1=(*i_getc)(f))!=EOF) {
4610 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4618 if ((c1=(*i_getc)(f))!=EOF && c1 == NL) {
4619 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4634 lwsp_buf[lwsp_count] = c1;
4635 if (lwsp_count++>lwsp_size){
4637 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4638 if (lwsp_buf_new==NULL) {
4641 perror("can't realloc");
4644 lwsp_buf = lwsp_buf_new;
4650 if (lwsp_count > 0) {
4651 if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) {
4655 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4656 i_ungetc(lwsp_buf[lwsp_count],f);
4664 if (c1=='='&&c2<' ') { /* this is soft wrap */
4665 while((c1 = (*i_mgetc)(f)) <=' ') {
4666 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4668 mime_decode_mode = 'Q'; /* still in MIME */
4669 goto restart_mime_q;
4672 mime_decode_mode = 'Q'; /* still in MIME */
4676 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
4677 if (c2<=' ') return c2;
4678 mime_decode_mode = 'Q'; /* still in MIME */
4679 #define hex(c) (('0'<=c&&c<='9')?(c-'0'):\
4680 ('A'<=c&&c<='F')?(c-'A'+10):('a'<=c&&c<='f')?(c-'a'+10):0)
4681 return ((hex(c2)<<4) + hex(c3));
4684 if (mime_decode_mode != 'B') {
4685 mime_decode_mode = FALSE;
4686 return (*i_mgetc)(f);
4690 /* Base64 encoding */
4692 MIME allows line break in the middle of
4693 Base64, but we are very pessimistic in decoding
4694 in unbuf mode because MIME encoded code may broken by
4695 less or editor's control sequence (such as ESC-[-K in unbuffered
4696 mode. ignore incomplete MIME.
4698 mode = mime_decode_mode;
4699 mime_decode_mode = exit_mode; /* prepare for quit */
4701 while ((c1 = (*i_mgetc)(f))<=' ') {
4706 if ((c2 = (*i_mgetc)(f))<=' ') {
4709 if (mime_f != STRICT_MIME) goto mime_c2_retry;
4710 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4713 if ((c1 == '?') && (c2 == '=')) {
4716 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
4717 if (lwsp_buf==NULL) {
4718 perror("can't malloc");
4721 while ((c1=(*i_getc)(f))!=EOF) {
4726 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4734 if ((c1=(*i_getc)(f))!=EOF) {
4738 } else if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4753 lwsp_buf[lwsp_count] = c1;
4754 if (lwsp_count++>lwsp_size){
4756 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4757 if (lwsp_buf_new==NULL) {
4760 perror("can't realloc");
4763 lwsp_buf = lwsp_buf_new;
4769 if (lwsp_count > 0) {
4770 if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) {
4774 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4775 i_ungetc(lwsp_buf[lwsp_count],f);
4784 if ((c3 = (*i_mgetc)(f))<=' ') {
4787 if (mime_f != STRICT_MIME) goto mime_c3_retry;
4788 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4792 if ((c4 = (*i_mgetc)(f))<=' ') {
4795 if (mime_f != STRICT_MIME) goto mime_c4_retry;
4796 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4800 mime_decode_mode = mode; /* still in MIME sigh... */
4802 /* BASE 64 decoding */
4804 t1 = 0x3f & base64decode(c1);
4805 t2 = 0x3f & base64decode(c2);
4806 t3 = 0x3f & base64decode(c3);
4807 t4 = 0x3f & base64decode(c4);
4808 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
4810 Fifo(mime_last++) = cc;
4811 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
4813 Fifo(mime_last++) = cc;
4814 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
4816 Fifo(mime_last++) = cc;
4821 return Fifo(mime_top++);
4829 Fifo(--mime_top) = c;
4836 const unsigned char *p;
4840 /* In buffered mode, read until =? or NL or buffer full
4842 mime_input = mime_top;
4843 mime_last = mime_top;
4845 while(*p) Fifo(mime_input++) = *p++;
4848 while((c=(*i_getc)(f))!=EOF) {
4849 if (((mime_input-mime_top)&MIME_BUF_MASK)==0) {
4850 break; /* buffer full */
4852 if (c=='=' && d=='?') {
4853 /* checked. skip header, start decode */
4854 Fifo(mime_input++) = c;
4855 /* mime_last_input = mime_input; */
4860 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
4862 /* Should we check length mod 4? */
4863 Fifo(mime_input++) = c;
4866 /* In case of Incomplete MIME, no MIME decode */
4867 Fifo(mime_input++) = c;
4868 mime_last = mime_input; /* point undecoded buffer */
4869 mime_decode_mode = 1; /* no decode on Fifo last in mime_getc */
4870 switch_mime_getc(); /* anyway we need buffered getc */
4881 i = c - 'A'; /* A..Z 0-25 */
4883 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
4885 } else if (c > '/') {
4886 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
4887 } else if (c == '+') {
4888 i = '>' /* 62 */ ; /* + 62 */
4890 i = '?' /* 63 */ ; /* / 63 */
4895 STATIC const char basis_64[] =
4896 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
4899 #define MIMEOUT_BUF_LENGTH (60)
4900 char mimeout_buf[MIMEOUT_BUF_LENGTH+1];
4901 int mimeout_buf_count = 0;
4902 int mimeout_preserve_space = 0;
4903 #define itoh4(c) (c>=10?c+'A'-10:c+'0')
4909 const unsigned char *p;
4912 p = mime_pattern[0];
4913 for(i=0;mime_encode[i];i++) {
4914 if (mode == mime_encode[i]) {
4915 p = mime_pattern[i];
4919 mimeout_mode = mime_encode_method[i];
4922 if (base64_count>45) {
4923 if (mimeout_buf_count>0 && nkf_isblank(mimeout_buf[i])){
4924 (*o_mputc)(mimeout_buf[i]);
4930 if (!mimeout_preserve_space && mimeout_buf_count>0
4931 && (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
4932 || mimeout_buf[i]==CR || mimeout_buf[i]==NL )) {
4936 if (!mimeout_preserve_space) {
4937 for (;i<mimeout_buf_count;i++) {
4938 if (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
4939 || mimeout_buf[i]==CR || mimeout_buf[i]==NL ) {
4940 (*o_mputc)(mimeout_buf[i]);
4947 mimeout_preserve_space = FALSE;
4953 j = mimeout_buf_count;
4954 mimeout_buf_count = 0;
4956 mime_putc(mimeout_buf[i]);
4972 switch(mimeout_mode) {
4977 (*o_mputc)(basis_64[((b64c & 0x3)<< 4)]);
4983 (*o_mputc)(basis_64[((b64c & 0xF) << 2)]);
4989 if (mimeout_f!=FIXED_MIME) {
4991 } else if (mimeout_mode != 'Q')
5000 switch(mimeout_mode) {
5005 } else if (c==CR||c==NL) {
5008 } else if(c<SPACE||c=='='||c=='?'||c=='_'||DEL<=c) {
5010 (*o_mputc)(itoh4(((c>>4)&0xf)));
5011 (*o_mputc)(itoh4((c&0xf)));
5020 (*o_mputc)(basis_64[c>>2]);
5025 (*o_mputc)(basis_64[((b64c & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
5031 (*o_mputc)(basis_64[((b64c & 0xF) << 2) | ((c & 0xC0) >>6)]);
5032 (*o_mputc)(basis_64[c & 0x3F]);
5043 int mime_lastchar2, mime_lastchar1;
5045 void mime_prechar(c2, c1)
5050 if (base64_count + mimeout_buf_count/3*4> 66){
5051 (*o_base64conv)(EOF,0);
5052 (*o_base64conv)(0,NL);
5053 (*o_base64conv)(0,SPACE);
5055 }/*else if (mime_lastchar2){
5056 if (c1 <=DEL && !nkf_isspace(c1)){
5057 (*o_base64conv)(0,SPACE);
5061 if (c2 && mime_lastchar2 == 0
5062 && mime_lastchar1 && !nkf_isspace(mime_lastchar1)){
5063 (*o_base64conv)(0,SPACE);
5066 mime_lastchar2 = c2;
5067 mime_lastchar1 = c1;
5078 if (mimeout_f == FIXED_MIME){
5079 if (mimeout_mode == 'Q'){
5080 if (base64_count > 71){
5081 if (c!=CR && c!=NL) {
5088 if (base64_count > 71){
5093 if (c == EOF) { /* c==EOF */
5097 if (c != EOF) { /* c==EOF */
5103 /* mimeout_f != FIXED_MIME */
5105 if (c == EOF) { /* c==EOF */
5106 j = mimeout_buf_count;
5107 mimeout_buf_count = 0;
5110 /*if (nkf_isspace(mimeout_buf[i])){
5113 mimeout_addchar(mimeout_buf[i]);
5117 (*o_mputc)(mimeout_buf[i]);
5123 if (mimeout_mode=='Q') {
5124 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
5136 if (mimeout_buf_count > 0){
5137 lastchar = mimeout_buf[mimeout_buf_count - 1];
5142 if (!mimeout_mode) {
5143 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1)) {
5144 if (nkf_isspace(c)) {
5145 if (c==CR || c==NL) {
5148 for (i=0;i<mimeout_buf_count;i++) {
5149 (*o_mputc)(mimeout_buf[i]);
5150 if (mimeout_buf[i] == CR || mimeout_buf[i] == NL){
5157 mimeout_buf_count = 1;
5159 if (base64_count > 1
5160 && base64_count + mimeout_buf_count > 76){
5163 if (!nkf_isspace(mimeout_buf[0])){
5168 mimeout_buf[mimeout_buf_count++] = c;
5169 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
5170 open_mime(output_mode);
5175 if (lastchar==CR || lastchar == NL){
5176 for (i=0;i<mimeout_buf_count;i++) {
5177 (*o_mputc)(mimeout_buf[i]);
5180 mimeout_buf_count = 0;
5182 if (lastchar==SPACE) {
5183 for (i=0;i<mimeout_buf_count-1;i++) {
5184 (*o_mputc)(mimeout_buf[i]);
5187 mimeout_buf[0] = SPACE;
5188 mimeout_buf_count = 1;
5190 open_mime(output_mode);
5193 /* mimeout_mode == 'B', 1, 2 */
5194 if ( c<=DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
5195 if (lastchar == CR || lastchar == NL){
5196 if (nkf_isblank(c)) {
5197 for (i=0;i<mimeout_buf_count;i++) {
5198 mimeout_addchar(mimeout_buf[i]);
5200 mimeout_buf_count = 0;
5201 } else if (SPACE<c && c<DEL) {
5203 for (i=0;i<mimeout_buf_count;i++) {
5204 (*o_mputc)(mimeout_buf[i]);
5207 mimeout_buf_count = 0;
5210 if (c==SPACE || c==TAB || c==CR || c==NL) {
5211 for (i=0;i<mimeout_buf_count;i++) {
5212 if (SPACE<mimeout_buf[i] && mimeout_buf[i]<DEL) {
5214 for (i=0;i<mimeout_buf_count;i++) {
5215 (*o_mputc)(mimeout_buf[i]);
5218 mimeout_buf_count = 0;
5221 mimeout_buf[mimeout_buf_count++] = c;
5222 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
5224 for (i=0;i<mimeout_buf_count;i++) {
5225 (*o_mputc)(mimeout_buf[i]);
5228 mimeout_buf_count = 0;
5232 if (mimeout_buf_count>0 && SPACE<c && c!='=') {
5233 mimeout_buf[mimeout_buf_count++] = c;
5234 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
5235 j = mimeout_buf_count;
5236 mimeout_buf_count = 0;
5238 mimeout_addchar(mimeout_buf[i]);
5245 if (mimeout_buf_count>0) {
5246 j = mimeout_buf_count;
5247 mimeout_buf_count = 0;
5249 if (mimeout_buf[i]==CR || mimeout_buf[i]==NL)
5251 mimeout_addchar(mimeout_buf[i]);
5257 (*o_mputc)(mimeout_buf[i]);
5259 open_mime(output_mode);
5266 #if defined(PERL_XS) || defined(WIN32DLL)
5271 struct input_code *p = input_code_list;
5284 mime_f = STRICT_MIME;
5285 mime_decode_f = FALSE;
5290 #if defined(MSDOS) || defined(__OS2__)
5295 iso2022jp_f = FALSE;
5296 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
5297 internal_unicode_f = FALSE;
5299 #ifdef UTF8_OUTPUT_ENABLE
5302 ms_ucs_map_f = FALSE;
5303 strict_mapping_f = TRUE;
5304 disable_cp932ext_f = FALSE;
5305 ignore_zwnbsp_f = TRUE;
5306 unicode_round_trip_f = FALSE;
5307 encode_fallback = NULL;
5308 unicode_subchar = '?';
5310 #ifdef UNICODE_NORMALIZATION
5323 is_inputcode_mixed = FALSE;
5324 is_inputcode_set = FALSE;
5328 #ifdef SHIFTJIS_CP932
5337 for (i = 0; i < 256; i++){
5338 prefix_table[i] = 0;
5341 #ifdef UTF8_INPUT_ENABLE
5342 utf16_mode = UTF16BE_INPUT;
5344 mimeout_buf_count = 0;
5349 fold_preserve_f = FALSE;
5352 kanji_intro = DEFAULT_J;
5353 ascii_intro = DEFAULT_R;
5354 fold_margin = FOLD_MARGIN;
5355 output_conv = DEFAULT_CONV;
5356 oconv = DEFAULT_CONV;
5357 o_zconv = no_connection;
5358 o_fconv = no_connection;
5359 o_crconv = no_connection;
5360 o_rot_conv = no_connection;
5361 o_hira_conv = no_connection;
5362 o_base64conv = no_connection;
5363 o_iso2022jp_check_conv = no_connection;
5366 i_ungetc = std_ungetc;
5368 i_bungetc = std_ungetc;
5371 i_mungetc = std_ungetc;
5372 i_mgetc_buf = std_getc;
5373 i_mungetc_buf = std_ungetc;
5374 output_mode = ASCII;
5377 mime_decode_mode = FALSE;
5383 z_prev2=0,z_prev1=0;
5385 iconv_for_check = 0;
5387 input_codename = "";
5395 no_connection(c2,c1)
5398 no_connection2(c2,c1,0);
5402 no_connection2(c2,c1,c0)
5405 fprintf(stderr,"nkf internal module connection failure.\n");
5407 return 0; /* LINT */
5412 #define fprintf dllprintf
5417 fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
5418 fprintf(stderr,"Flags:\n");
5419 fprintf(stderr,"b,u Output is buffered (DEFAULT),Output is unbuffered\n");
5420 #ifdef DEFAULT_CODE_SJIS
5421 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC), UTF-8N\n");
5423 #ifdef DEFAULT_CODE_JIS
5424 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC), UTF-8N\n");
5426 #ifdef DEFAULT_CODE_EUC
5427 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT), UTF-8N\n");
5429 #ifdef DEFAULT_CODE_UTF8
5430 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC), UTF-8N (DEFAULT)\n");
5432 #ifdef UTF8_OUTPUT_ENABLE
5433 fprintf(stderr," After 'w' you can add more options. (80?|16((B|L)0?)?) \n");
5435 fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC), UTF-8\n");
5436 #ifdef UTF8_INPUT_ENABLE
5437 fprintf(stderr," After 'W' you can add more options. (8|16(B|L)?) \n");
5439 fprintf(stderr,"t no conversion\n");
5440 fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n");
5441 fprintf(stderr,"r {de/en}crypt ROT13/47\n");
5442 fprintf(stderr,"h 1 katakana->hiragana, 2 hiragana->katakana, 3 both\n");
5443 fprintf(stderr,"v Show this usage. V: show version\n");
5444 fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
5445 fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
5446 fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
5447 fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
5448 fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces,\n");
5449 fprintf(stderr," 3: Convert HTML Entity\n");
5450 fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
5451 fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
5453 fprintf(stderr,"T Text mode output\n");
5455 fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
5456 fprintf(stderr,"d,c Delete \\r in line feed and \\032, Add \\r in line feed\n");
5457 fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n");
5458 fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
5459 fprintf(stderr,"long name options\n");
5460 fprintf(stderr," --ic=<input codeset> --oc=<output codeset> set the input or output codeset\n");
5461 fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n");
5462 fprintf(stderr," --jis,--euc,--sjis,--utf8,--utf16,--mime,--base64 convert for the code\n");
5463 fprintf(stderr," --hiragana, --katakana Hiragana/Katakana Conversion\n");
5464 fprintf(stderr," --prefix= Insert escape before troublesome characters of Shift_JIS\n");
5466 fprintf(stderr," --cap-input, --url-input Convert hex after ':' or '%%'\n");
5468 #ifdef NUMCHAR_OPTION
5469 fprintf(stderr," --numchar-input Convert Unicode Character Reference\n");
5471 #ifdef UTF8_INPUT_ENABLE
5472 fprintf(stderr," --fb-{skip, html, xml, perl, java, subchar}\n");
5473 fprintf(stderr," set the way nkf handles unassigned characters\n");
5476 fprintf(stderr," --overwrite Overwrite original listed files by filtered result\n");
5478 fprintf(stderr," -g, --guess Guess the input code\n");
5479 fprintf(stderr," --help,--version\n");
5486 fprintf(stderr,"Network Kanji Filter Version %s (%s) "
5487 #if defined(MSDOS) && !defined(__WIN32__) && !defined(__WIN16__)
5490 #if defined(MSDOS) && defined(__WIN16__)
5493 #if defined(MSDOS) && defined(__WIN32__)
5499 ,NKF_VERSION,NKF_RELEASE_DATE);
5500 fprintf(stderr,"\n%s\n",CopyRight);
5505 **
\e$B%Q%C%A@):n<T
\e(B
5506 ** void@merope.pleiades.or.jp (Kusakabe Youichi)
5507 ** NIDE Naoyuki <nide@ics.nara-wu.ac.jp>
5508 ** ohta@src.ricoh.co.jp (Junn Ohta)
5509 ** inouet@strl.nhk.or.jp (Tomoyuki Inoue)
5510 ** kiri@pulser.win.or.jp (Tetsuaki Kiriyama)
5511 ** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp>
5512 ** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe)
5513 ** kono@ie.u-ryukyu.ac.jp (Shinji Kono)
5514 ** GHG00637@nifty-serve.or.jp (COW)