1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
5 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
6 ** Copyright (C) 1996,1998
8 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
9 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
10 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
11 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
13 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
14 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
15 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
16 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
17 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
18 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
20 ** Everyone is permitted to do anything on this program
21 ** including copying, modifying, improving,
22 ** as long as you don't try to pretend that you wrote it.
23 ** i.e., the above copyright notice has to appear in all copies.
24 ** Binary distribution requires original version messages.
25 ** You don't have to ask before copying, redistribution or publishing.
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
29 /***********************************************************************
30 ** UTF-8
\e$B%5%]!<%H$K$D$$$F
\e(B
31 **
\e$B=>Mh$N
\e(B nkf
\e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9
\e(B
32 ** nkf -e
\e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G
\e(B UTF-8
\e$B$HH=Dj$5$l$l$P!"
\e(B
33 **
\e$B$=$N$^$^
\e(B euc-jp
\e$B$KJQ49$5$l$^$9
\e(B
35 **
\e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#
\e(B
36 ** (
\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O
\e(B)
38 **
\e$B2?$+LdBj$r8+$D$1$?$i!"
\e(B
39 ** E-Mail: furukawa@tcp-ip.or.jp
40 **
\e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#
\e(B
41 ***********************************************************************/
42 /* $Id: nkf.c,v 1.88 2006/01/11 12:12:30 naruse Exp $ */
43 #define NKF_VERSION "2.0.5"
44 #define NKF_RELEASE_DATE "2006-01-11"
48 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2006 Kono, Furukawa, Naruse"
55 ** USAGE: nkf [flags] [file]
58 ** b Output is buffered (DEFAULT)
59 ** u Output is unbuffered
63 ** j Outout code is JIS 7 bit (DEFAULT SELECT)
64 ** s Output code is MS Kanji (DEFAULT SELECT)
65 ** e Output code is AT&T JIS (DEFAULT SELECT)
66 ** w Output code is AT&T JIS (DEFAULT SELECT)
67 ** l Output code is JIS 7bit and ISO8859-1 Latin-1
69 ** m MIME conversion for ISO-2022-JP
70 ** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
71 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
72 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
73 ** M MIME output conversion
75 ** r {de/en}crypt ROT13/47
79 ** T Text mode output (for MS-DOS)
81 ** x Do not convert X0201 kana into X0208
82 ** Z Convert X0208 alphabet to ASCII
87 ** B try to fix broken JIS, missing Escape
88 ** B[1-9] broken level
90 ** O Output to 'nkf.out' file or last file name
91 ** d Delete \r in line feed
92 ** c Add \r in line feed
93 ** -- other long option
94 ** -- ignore following option (don't use with -O )
98 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__)) && !defined(MSDOS)
100 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
116 #if defined(MSDOS) || defined(__OS2__)
123 #define setbinmode(fp) fsetbin(fp)
124 #else /* Microsoft C, Turbo C */
125 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
127 #else /* UNIX,OS/2 */
128 #define setbinmode(fp)
131 #ifdef _IOFBF /* SysV and MSDOS, Windows */
132 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
134 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
137 /*Borland C++ 4.5 EasyWin*/
138 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
147 /* added by satoru@isoternet.org */
148 #include <sys/stat.h>
149 #ifndef MSDOS /* UNIX, OS/2 */
152 #else /* defined(MSDOS) */
154 #ifdef __BORLANDC__ /* BCC32 */
156 #else /* !defined(__BORLANDC__) */
157 #include <sys/utime.h>
158 #endif /* (__BORLANDC__) */
159 #else /* !defined(__WIN32__) */
160 #if defined(_MSC_VER) || defined(__MINGW32__) /* VC++, MinGW */
161 #include <sys/utime.h>
162 #elif defined(__TURBOC__) /* BCC */
164 #elif defined(LSI_C) /* LSI C */
165 #endif /* (__WIN32__) */
177 /* state of output_mode and input_mode
194 #define X0213_1 0x2850
195 #define X0213_2 0x2850
197 /* Input Assumption */
201 #define LATIN1_INPUT 6
203 #define STRICT_MIME 8
208 #define JAPANESE_EUC 10
212 #define UTF8_INPUT 13
213 #define UTF16BE_INPUT 14
214 #define UTF16LE_INPUT 15
234 #define is_alnum(c) \
235 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
237 /* I don't trust portablity of toupper */
238 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
239 #define nkf_isoctal(c) ('0'<=c && c<='7')
240 #define nkf_isdigit(c) ('0'<=c && c<='9')
241 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
242 #define nkf_isblank(c) (c == SPACE || c == TAB)
243 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == NL)
244 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
245 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
246 #define hex2bin(x) ( nkf_isdigit(x) ? x - '0' : nkf_toupper(x) - 'A' + 10)
248 #define HOLD_SIZE 1024
249 #define IOBUF_SIZE 16384
251 #define DEFAULT_J 'B'
252 #define DEFAULT_R 'B'
254 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
255 #define SJ6394 0x0161 /* 63 - 94 ku offset */
257 #define RANGE_NUM_MAX 18
262 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
263 #define sizeof_euc_utf8 94
264 #define sizeof_euc_to_utf8_1byte 94
265 #define sizeof_euc_to_utf8_2bytes 94
266 #define sizeof_utf8_to_euc_C2 64
267 #define sizeof_utf8_to_euc_E5B8 64
268 #define sizeof_utf8_to_euc_2bytes 112
269 #define sizeof_utf8_to_euc_3bytes 16
272 /* MIME preprocessor */
274 #ifdef EASYWIN /*Easy Win */
275 extern POINT _BufferSize;
278 /* function prototype */
280 #ifdef ANSI_C_PROTOTYPE
282 #define STATIC static
296 void (*status_func)PROTO((struct input_code *, int));
297 int (*iconv_func)PROTO((int c2, int c1, int c0));
301 STATIC char *input_codename = "";
304 STATIC const char *CopyRight = COPY_RIGHT;
306 #if !defined(PERL_XS) && !defined(WIN32DLL)
307 STATIC int noconvert PROTO((FILE *f));
309 STATIC int kanji_convert PROTO((FILE *f));
310 STATIC int h_conv PROTO((FILE *f,int c2,int c1));
311 STATIC int push_hold_buf PROTO((int c2));
312 STATIC void set_iconv PROTO((int f, int (*iconv_func)(int c2,int c1,int c0)));
313 STATIC int s_iconv PROTO((int c2,int c1,int c0));
314 STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
315 STATIC int e_iconv PROTO((int c2,int c1,int c0));
316 #ifdef UTF8_INPUT_ENABLE
317 /* don't convert characters when the mapping is not defined in the standard */
318 STATIC int strict_mapping_f = TRUE;
319 /* disable NEC special, NEC-selected IBM extended and IBM extended characters */
320 STATIC int disable_cp932ext_f = FALSE;
321 /* ignore ZERO WIDTH NO-BREAK SPACE */
322 STATIC int ignore_zwnbsp_f = TRUE;
323 /* don't convert characters that can't secure round trip convertion */
324 STATIC int unicode_round_trip_f = FALSE;
325 STATIC void encode_fallback_html PROTO((int c));
326 STATIC void encode_fallback_xml PROTO((int c));
327 STATIC void encode_fallback_java PROTO((int c));
328 STATIC void encode_fallback_perl PROTO((int c));
329 STATIC void encode_fallback_subchar PROTO((int c));
330 STATIC void (*encode_fallback)PROTO((int c)) = NULL;
331 STATIC int w2e_conv PROTO((int c2,int c1,int c0,int *p2,int *p1));
332 STATIC int w_iconv PROTO((int c2,int c1,int c0));
333 STATIC int w_iconv16 PROTO((int c2,int c1,int c0));
334 STATIC int unicode_to_jis_common PROTO((int c2,int c1,int c0,int *p2,int *p1));
335 STATIC int w_iconv_common PROTO((int c1,int c0,const unsigned short *const *pp,int psize,int *p2,int *p1));
336 STATIC int ww16_conv PROTO((int c2, int c1, int c0));
337 STATIC int w16e_conv PROTO((unsigned short val,int *p2,int *p1));
339 #ifdef UTF8_OUTPUT_ENABLE
340 STATIC int e2w_conv PROTO((int c2,int c1));
341 STATIC void w_oconv PROTO((int c2,int c1));
342 STATIC void w_oconv16 PROTO((int c2,int c1));
344 STATIC void e_oconv PROTO((int c2,int c1));
345 STATIC int e2s_conv PROTO((int c2, int c1, int *p2, int *p1));
346 STATIC void s_oconv PROTO((int c2,int c1));
347 STATIC void j_oconv PROTO((int c2,int c1));
348 STATIC void fold_conv PROTO((int c2,int c1));
349 STATIC void cr_conv PROTO((int c2,int c1));
350 STATIC void z_conv PROTO((int c2,int c1));
351 STATIC void rot_conv PROTO((int c2,int c1));
352 STATIC void hira_conv PROTO((int c2,int c1));
353 STATIC void base64_conv PROTO((int c2,int c1));
354 STATIC void iso2022jp_check_conv PROTO((int c2,int c1));
355 STATIC void no_connection PROTO((int c2,int c1));
356 STATIC int no_connection2 PROTO((int c2,int c1,int c0));
358 STATIC void code_score PROTO((struct input_code *ptr));
359 STATIC void code_status PROTO((int c));
361 STATIC void std_putc PROTO((int c));
362 STATIC int std_getc PROTO((FILE *f));
363 STATIC int std_ungetc PROTO((int c,FILE *f));
365 STATIC int broken_getc PROTO((FILE *f));
366 STATIC int broken_ungetc PROTO((int c,FILE *f));
368 STATIC int mime_begin PROTO((FILE *f));
369 STATIC int mime_getc PROTO((FILE *f));
370 STATIC int mime_ungetc PROTO((int c,FILE *f));
372 STATIC int mime_begin_strict PROTO((FILE *f));
373 STATIC int mime_getc_buf PROTO((FILE *f));
374 STATIC int mime_ungetc_buf PROTO((int c,FILE *f));
375 STATIC int mime_integrity PROTO((FILE *f,const unsigned char *p));
377 STATIC int base64decode PROTO((int c));
378 STATIC void mime_prechar PROTO((int c2, int c1));
379 STATIC void mime_putc PROTO((int c));
380 STATIC void open_mime PROTO((int c));
381 STATIC void close_mime PROTO(());
383 STATIC void usage PROTO(());
384 STATIC void version PROTO(());
386 STATIC void options PROTO((unsigned char *c));
387 #if defined(PERL_XS) || defined(WIN32DLL)
388 STATIC void reinit PROTO(());
393 #if !defined(PERL_XS) && !defined(WIN32DLL)
394 STATIC unsigned char stdibuf[IOBUF_SIZE];
395 STATIC unsigned char stdobuf[IOBUF_SIZE];
397 STATIC unsigned char hold_buf[HOLD_SIZE*2];
398 STATIC int hold_count;
400 /* MIME preprocessor fifo */
402 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
403 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
404 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
405 STATIC unsigned char mime_buf[MIME_BUF_SIZE];
406 STATIC unsigned int mime_top = 0;
407 STATIC unsigned int mime_last = 0; /* decoded */
408 STATIC unsigned int mime_input = 0; /* undecoded */
409 STATIC int (*mime_iconv_back)PROTO((int c2,int c1,int c0)) = NULL;
412 STATIC int unbuf_f = FALSE;
413 STATIC int estab_f = FALSE;
414 STATIC int nop_f = FALSE;
415 STATIC int binmode_f = TRUE; /* binary mode */
416 STATIC int rot_f = FALSE; /* rot14/43 mode */
417 STATIC int hira_f = FALSE; /* hira/kata henkan */
418 STATIC int input_f = FALSE; /* non fixed input code */
419 STATIC int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
420 STATIC int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
421 STATIC int mime_decode_f = FALSE; /* mime decode is explicitly on */
422 STATIC int mimebuf_f = FALSE; /* MIME buffered input */
423 STATIC int broken_f = FALSE; /* convert ESC-less broken JIS */
424 STATIC int iso8859_f = FALSE; /* ISO8859 through */
425 STATIC int mimeout_f = FALSE; /* base64 mode */
426 #if defined(MSDOS) || defined(__OS2__)
427 STATIC int x0201_f = TRUE; /* Assume JISX0201 kana */
429 STATIC int x0201_f = NO_X0201; /* Assume NO JISX0201 */
431 STATIC int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
432 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
433 STATIC int internal_unicode_f = FALSE; /* Internal Unicode Processing */
435 #ifdef UTF8_OUTPUT_ENABLE
436 STATIC int unicode_bom_f= 0; /* Output Unicode BOM */
437 STATIC int w_oconv16_LE = 0; /* utf-16 little endian */
438 STATIC int ms_ucs_map_f = FALSE; /* Microsoft UCS Mapping Compatible */
439 STATIC int unicode_subchar = '?'; /* the regular substitution character */
442 #ifdef UNICODE_NORMALIZATION
443 STATIC int nfc_f = FALSE;
444 STATIC int (*i_nfc_getc)PROTO((FILE *)) = std_getc; /* input of ugetc */
445 STATIC int (*i_nfc_ungetc)PROTO((int c ,FILE *f)) = std_ungetc;
446 STATIC int nfc_getc PROTO((FILE *f));
447 STATIC int nfc_ungetc PROTO((int c,FILE *f));
451 STATIC int cap_f = FALSE;
452 STATIC int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */
453 STATIC int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc;
454 STATIC int cap_getc PROTO((FILE *f));
455 STATIC int cap_ungetc PROTO((int c,FILE *f));
457 STATIC int url_f = FALSE;
458 STATIC int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
459 STATIC int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc;
460 STATIC int url_getc PROTO((FILE *f));
461 STATIC int url_ungetc PROTO((int c,FILE *f));
464 #ifdef NUMCHAR_OPTION
465 #define CLASS_MASK 0x0f000000
466 #define CLASS_UTF16 0x01000000
467 STATIC int numchar_f = FALSE;
468 STATIC int (*i_ngetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
469 STATIC int (*i_nungetc)PROTO((int c ,FILE *f)) = std_ungetc;
470 STATIC int numchar_getc PROTO((FILE *f));
471 STATIC int numchar_ungetc PROTO((int c,FILE *f));
475 STATIC int noout_f = FALSE;
476 STATIC void no_putc PROTO((int c));
477 STATIC int debug_f = FALSE;
478 STATIC void debug PROTO((const char *str));
479 STATIC int (*iconv_for_check)() = 0;
482 STATIC int guess_f = FALSE;
484 STATIC void print_guessed_code PROTO((char *filename));
486 STATIC void set_input_codename PROTO((char *codename));
487 STATIC int is_inputcode_mixed = FALSE;
488 STATIC int is_inputcode_set = FALSE;
491 STATIC int exec_f = 0;
494 #ifdef SHIFTJIS_CP932
495 /* invert IBM extended characters to others
496 and controls some UCS mapping for Microsoft Code Page */
497 STATIC int cp51932_f = TRUE;
498 #define CP932_TABLE_BEGIN (0xfa)
499 #define CP932_TABLE_END (0xfc)
501 /* invert NEC-selected IBM extended characters to IBM extended characters */
502 STATIC int cp932inv_f = TRUE;
503 #define CP932INV_TABLE_BEGIN (0xed)
504 #define CP932INV_TABLE_END (0xee)
506 /* STATIC int cp932_conv PROTO((int c2, int c1)); */
507 #endif /* SHIFTJIS_CP932 */
510 STATIC int x0212_f = FALSE;
511 STATIC int x0212_shift PROTO((int c));
512 STATIC int x0212_unshift PROTO((int c));
513 STATIC int x0213_f = FALSE;
516 STATIC unsigned char prefix_table[256];
518 STATIC void e_status PROTO((struct input_code *, int));
519 STATIC void s_status PROTO((struct input_code *, int));
521 #ifdef UTF8_INPUT_ENABLE
522 STATIC void w_status PROTO((struct input_code *, int));
523 STATIC void w16_status PROTO((struct input_code *, int));
524 STATIC int utf16_mode = UTF16BE_INPUT;
527 struct input_code input_code_list[] = {
528 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
529 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
530 #ifdef UTF8_INPUT_ENABLE
531 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
532 {"UTF-16", 0, 0, 0, {0, 0, 0}, w16_status, w_iconv16, 0},
537 STATIC int mimeout_mode = 0;
538 STATIC int base64_count = 0;
540 /* X0208 -> ASCII converter */
543 STATIC int f_line = 0; /* chars in line */
544 STATIC int f_prev = 0;
545 STATIC int fold_preserve_f = FALSE; /* preserve new lines */
546 STATIC int fold_f = FALSE;
547 STATIC int fold_len = 0;
550 STATIC unsigned char kanji_intro = DEFAULT_J;
551 STATIC unsigned char ascii_intro = DEFAULT_R;
555 #define FOLD_MARGIN 10
556 #define DEFAULT_FOLD 60
558 STATIC int fold_margin = FOLD_MARGIN;
562 #ifdef DEFAULT_CODE_JIS
563 # define DEFAULT_CONV j_oconv
565 #ifdef DEFAULT_CODE_SJIS
566 # define DEFAULT_CONV s_oconv
568 #ifdef DEFAULT_CODE_EUC
569 # define DEFAULT_CONV e_oconv
571 #ifdef DEFAULT_CODE_UTF8
572 # define DEFAULT_CONV w_oconv
575 /* process default */
576 STATIC void (*output_conv)PROTO((int c2,int c1)) = DEFAULT_CONV;
578 STATIC void (*oconv)PROTO((int c2,int c1)) = no_connection;
579 /* s_iconv or oconv */
580 STATIC int (*iconv)PROTO((int c2,int c1,int c0)) = no_connection2;
582 STATIC void (*o_zconv)PROTO((int c2,int c1)) = no_connection;
583 STATIC void (*o_fconv)PROTO((int c2,int c1)) = no_connection;
584 STATIC void (*o_crconv)PROTO((int c2,int c1)) = no_connection;
585 STATIC void (*o_rot_conv)PROTO((int c2,int c1)) = no_connection;
586 STATIC void (*o_hira_conv)PROTO((int c2,int c1)) = no_connection;
587 STATIC void (*o_base64conv)PROTO((int c2,int c1)) = no_connection;
588 STATIC void (*o_iso2022jp_check_conv)PROTO((int c2,int c1)) = no_connection;
590 /* STATIC redirections */
592 STATIC void (*o_putc)PROTO((int c)) = std_putc;
594 STATIC int (*i_getc)PROTO((FILE *f)) = std_getc; /* general input */
595 STATIC int (*i_ungetc)PROTO((int c,FILE *f)) =std_ungetc;
597 STATIC int (*i_bgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
598 STATIC int (*i_bungetc)PROTO((int c ,FILE *f)) = std_ungetc;
600 STATIC void (*o_mputc)PROTO((int c)) = std_putc ; /* output of mputc */
602 STATIC int (*i_mgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
603 STATIC int (*i_mungetc)PROTO((int c ,FILE *f)) = std_ungetc;
605 /* for strict mime */
606 STATIC int (*i_mgetc_buf)PROTO((FILE *)) = std_getc; /* input of mgetc_buf */
607 STATIC int (*i_mungetc_buf)PROTO((int c,FILE *f)) = std_ungetc;
610 STATIC int output_mode = ASCII, /* output kanji mode */
611 input_mode = ASCII, /* input kanji mode */
612 shift_mode = FALSE; /* TRUE shift out, or X0201 */
613 STATIC int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
615 /* X0201 / X0208 conversion tables */
617 /* X0201 kana conversion table */
620 unsigned char cv[]= {
621 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
622 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
623 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
624 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
625 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
626 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
627 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
628 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
629 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
630 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
631 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
632 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
633 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
634 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
635 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
636 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
640 /* X0201 kana conversion table for daguten */
643 unsigned char dv[]= {
644 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
645 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
646 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
647 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
648 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
649 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
650 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
651 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
652 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
653 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
654 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
655 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
656 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
657 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
658 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
659 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
662 /* X0201 kana conversion table for han-daguten */
665 unsigned char ev[]= {
666 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
667 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
668 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
669 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
670 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
671 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
672 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
673 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
674 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
675 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
676 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
677 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
678 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
679 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
680 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
681 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
685 /* X0208 kigou conversion table */
686 /* 0x8140 - 0x819e */
688 unsigned char fv[] = {
690 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
691 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
692 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
693 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
694 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
695 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
696 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
697 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
698 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
699 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
700 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
701 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
707 STATIC int file_out = FALSE;
709 STATIC int overwrite = FALSE;
712 STATIC int crmode_f = 0; /* CR, NL, CRLF */
713 #ifdef EASYWIN /*Easy Win */
714 STATIC int end_check;
717 #define STD_GC_BUFSIZE (256)
718 int std_gc_buf[STD_GC_BUFSIZE];
722 #include "nkf32dll.c"
723 #elif defined(PERL_XS)
733 char *outfname = NULL;
736 #ifdef EASYWIN /*Easy Win */
737 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
740 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
741 cp = (unsigned char *)*argv;
746 if (pipe(fds) < 0 || (pid = fork()) < 0){
757 execvp(argv[1], &argv[1]);
771 if(x0201_f == WISH_TRUE)
772 x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
774 if (binmode_f == TRUE)
776 if (freopen("","wb",stdout) == NULL)
783 setbuf(stdout, (char *) NULL);
785 setvbuffer(stdout, stdobuf, IOBUF_SIZE);
788 if (binmode_f == TRUE)
790 if (freopen("","rb",stdin) == NULL) return (-1);
794 setvbuffer(stdin, stdibuf, IOBUF_SIZE);
798 kanji_convert(stdin);
799 if (guess_f) print_guessed_code(NULL);
804 is_inputcode_mixed = FALSE;
805 is_inputcode_set = FALSE;
810 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
819 /* reopen file for stdout */
820 if (file_out == TRUE) {
823 outfname = malloc(strlen(origfname)
824 + strlen(".nkftmpXXXXXX")
830 strcpy(outfname, origfname);
834 for (i = strlen(outfname); i; --i){
835 if (outfname[i - 1] == '/'
836 || outfname[i - 1] == '\\'){
842 strcat(outfname, "ntXXXXXX");
844 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC,
847 strcat(outfname, ".nkftmpXXXXXX");
848 fd = mkstemp(outfname);
851 || (fd_backup = dup(fileno(stdout))) < 0
852 || dup2(fd, fileno(stdout)) < 0
863 outfname = "nkf.out";
866 if(freopen(outfname, "w", stdout) == NULL) {
870 if (binmode_f == TRUE) {
872 if (freopen("","wb",stdout) == NULL)
879 if (binmode_f == TRUE)
881 if (freopen("","rb",fin) == NULL)
886 setvbuffer(fin, stdibuf, IOBUF_SIZE);
890 char *filename = NULL;
892 if (nfiles > 1) filename = origfname;
893 if (guess_f) print_guessed_code(filename);
899 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
907 if (dup2(fd_backup, fileno(stdout)) < 0){
910 if (stat(origfname, &sb)) {
911 fprintf(stderr, "Can't stat %s\n", origfname);
913 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
914 if (chmod(outfname, sb.st_mode)) {
915 fprintf(stderr, "Can't set permission %s\n", outfname);
918 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
919 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
920 tb[0] = tb[1] = sb.st_mtime;
921 if (utime(outfname, tb)) {
922 fprintf(stderr, "Can't set timestamp %s\n", outfname);
925 tb.actime = sb.st_atime;
926 tb.modtime = sb.st_mtime;
927 if (utime(outfname, &tb)) {
928 fprintf(stderr, "Can't set timestamp %s\n", outfname);
932 if (unlink(origfname)){
936 if (rename(outfname, origfname)) {
938 fprintf(stderr, "Can't rename %s to %s\n",
939 outfname, origfname);
947 #ifdef EASYWIN /*Easy Win */
948 if (file_out == FALSE)
949 scanf("%d",&end_check);
952 #else /* for Other OS */
953 if (file_out == TRUE)
958 #endif /* WIN32DLL */
985 {"katakana-hiragana","h3"},
992 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
993 {"internal-unicode", ""},
995 #ifdef UTF8_OUTPUT_ENABLE
1005 {"fb-subchar=", ""},
1007 #ifdef UTF8_INPUT_ENABLE
1008 {"utf8-input", "W"},
1009 {"utf16-input", "W16"},
1010 {"disable-cp932ext", ""},
1011 {"strict-mapping", ""},
1012 {"enable-round-trip",""},
1014 #ifdef UNICODE_NORMALIZATION
1015 {"utf8mac-input", ""},
1024 #ifdef NUMCHAR_OPTION
1025 {"numchar-input", ""},
1031 #ifdef SHIFTJIS_CP932
1041 STATIC int option_mode = 0;
1048 unsigned char *p = NULL;
1049 unsigned char *cp_back = NULL;
1050 unsigned char codeset[32];
1054 while(*cp && *cp++!='-');
1055 while (*cp || cp_back) {
1063 case '-': /* literal options */
1064 if (!*cp || *cp == SPACE) { /* ignore the rest of arguments */
1068 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1069 p = (unsigned char *)long_option[i].name;
1070 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1071 if (*p == cp[j] || cp[j] == ' '){
1078 while(*cp && *cp != SPACE && cp++);
1079 if (long_option[i].alias[0]){
1081 cp = (unsigned char *)long_option[i].alias;
1083 if (strcmp(long_option[i].name, "ic=") == 0){
1084 for (i=0; i < 16 && SPACE < p[i] && p[i] < DEL; i++){
1085 codeset[i] = nkf_toupper(p[i]);
1088 if(strcmp(codeset, "ISO-2022-JP") == 0){
1089 input_f = JIS_INPUT;
1090 }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1091 input_f = SJIS_INPUT;
1092 if (x0201_f==NO_X0201) x0201_f=TRUE;
1093 }else if(strcmp(codeset, "CP932") == 0){
1094 input_f = SJIS_INPUT;
1096 #ifdef SHIFTJIS_CP932
1100 #ifdef UTF8_OUTPUT_ENABLE
1101 ms_ucs_map_f = TRUE;
1103 }else if(strcmp(codeset, "EUCJP") == 0 ||
1104 strcmp(codeset, "EUC-JP") == 0){
1105 input_f = JIS_INPUT;
1106 }else if(strcmp(codeset, "CP51932") == 0){
1107 input_f = JIS_INPUT;
1109 #ifdef SHIFTJIS_CP932
1113 #ifdef UTF8_OUTPUT_ENABLE
1114 ms_ucs_map_f = TRUE;
1116 }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1117 strcmp(codeset, "EUCJP-MS") == 0){
1118 input_f = JIS_INPUT;
1120 #ifdef SHIFTJIS_CP932
1124 #ifdef UTF8_OUTPUT_ENABLE
1125 ms_ucs_map_f = TRUE;
1127 }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1128 strcmp(codeset, "EUCJP-ASCII") == 0){
1129 input_f = JIS_INPUT;
1131 #ifdef SHIFTJIS_CP932
1135 #ifdef UTF8_OUTPUT_ENABLE
1136 ms_ucs_map_f = FALSE;
1138 }else if(strcmp(codeset, "SHIFT_JISX0213") == 0){
1139 input_f = SJIS_INPUT;
1141 #ifdef SHIFTJIS_CP932
1145 if (x0201_f==NO_X0201) x0201_f=TRUE;
1146 }else if(strcmp(codeset, "EUC-JISX0213") == 0){
1147 input_f = JIS_INPUT;
1150 #ifdef SHIFTJIS_CP932
1154 #ifdef UTF8_INPUT_ENABLE
1155 }else if(strcmp(codeset, "UTF-8") == 0 ||
1156 strcmp(codeset, "UTF-8N") == 0 ||
1157 strcmp(codeset, "UTF-8-BOM") == 0){
1158 input_f = UTF8_INPUT;
1159 #ifdef UNICODE_NORMALIZATION
1160 }else if(strcmp(codeset, "UTF8-MAC") == 0 ||
1161 strcmp(codeset, "UTF-8-MAC") == 0){
1162 input_f = UTF8_INPUT;
1165 }else if(strcmp(codeset, "UTF-16") == 0){
1166 input_f = UTF16BE_INPUT;
1167 utf16_mode = UTF16BE_INPUT;
1168 }else if(strcmp(codeset, "UTF-16BE") == 0 ||
1169 strcmp(codeset, "UTF-16BE-BOM") == 0){
1170 input_f = UTF16BE_INPUT;
1171 utf16_mode = UTF16BE_INPUT;
1172 }else if(strcmp(codeset, "UTF-16LE") == 0 ||
1173 strcmp(codeset, "UTF-16LE-BOM") == 0){
1174 input_f = UTF16LE_INPUT;
1175 utf16_mode = UTF16LE_INPUT;
1180 if (strcmp(long_option[i].name, "oc=") == 0){
1181 for (i=0; i < 16 && SPACE < p[i] && p[i] < DEL; i++){
1182 codeset[i] = nkf_toupper(p[i]);
1185 if(strcmp(codeset, "ISO-2022-JP") == 0){
1186 output_conv = j_oconv;
1187 }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1188 output_conv = s_oconv;
1189 }else if(strcmp(codeset, "CP932") == 0){
1190 output_conv = s_oconv;
1192 #ifdef SHIFTJIS_CP932
1196 #ifdef UTF8_OUTPUT_ENABLE
1197 ms_ucs_map_f = TRUE;
1199 }else if(strcmp(codeset, "EUCJP") == 0 ||
1200 strcmp(codeset, "EUC-JP") == 0){
1201 output_conv = e_oconv;
1202 }else if(strcmp(codeset, "CP51932") == 0){
1203 output_conv = e_oconv;
1205 #ifdef SHIFTJIS_CP932
1209 #ifdef UTF8_OUTPUT_ENABLE
1210 ms_ucs_map_f = TRUE;
1212 }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1213 strcmp(codeset, "EUCJP-MS") == 0){
1214 output_conv = e_oconv;
1217 #ifdef SHIFTJIS_CP932
1220 #ifdef UTF8_OUTPUT_ENABLE
1221 ms_ucs_map_f = TRUE;
1223 }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1224 strcmp(codeset, "EUCJP-ASCII") == 0){
1225 output_conv = e_oconv;
1228 #ifdef SHIFTJIS_CP932
1231 #ifdef UTF8_OUTPUT_ENABLE
1232 ms_ucs_map_f = FALSE;
1234 }else if(strcmp(codeset, "SHIFT_JISX0213") == 0){
1235 output_conv = s_oconv;
1237 }else if(strcmp(codeset, "EUC-JISX0213") == 0){
1238 output_conv = e_oconv;
1241 #ifdef UTF8_OUTPUT_ENABLE
1242 }else if(strcmp(codeset, "UTF-8") == 0){
1243 output_conv = w_oconv;
1244 }else if(strcmp(codeset, "UTF-8N") == 0){
1245 output_conv = w_oconv;
1247 }else if(strcmp(codeset, "UTF-8-BOM") == 0){
1248 output_conv = w_oconv;
1250 }else if(strcmp(codeset, "UTF-16BE") == 0){
1251 output_conv = w_oconv16;
1253 }else if(strcmp(codeset, "UTF-16") == 0 ||
1254 strcmp(codeset, "UTF-16BE-BOM") == 0){
1255 output_conv = w_oconv16;
1257 }else if(strcmp(codeset, "UTF-16LE") == 0){
1258 output_conv = w_oconv16;
1261 }else if(strcmp(codeset, "UTF-16LE-BOM") == 0){
1262 output_conv = w_oconv16;
1270 if (strcmp(long_option[i].name, "overwrite") == 0){
1277 if (strcmp(long_option[i].name, "cap-input") == 0){
1281 if (strcmp(long_option[i].name, "url-input") == 0){
1286 #ifdef NUMCHAR_OPTION
1287 if (strcmp(long_option[i].name, "numchar-input") == 0){
1293 if (strcmp(long_option[i].name, "no-output") == 0){
1297 if (strcmp(long_option[i].name, "debug") == 0){
1302 if (strcmp(long_option[i].name, "cp932") == 0){
1303 #ifdef SHIFTJIS_CP932
1307 #ifdef UTF8_OUTPUT_ENABLE
1308 ms_ucs_map_f = TRUE;
1312 if (strcmp(long_option[i].name, "no-cp932") == 0){
1313 #ifdef SHIFTJIS_CP932
1317 #ifdef UTF8_OUTPUT_ENABLE
1318 ms_ucs_map_f = FALSE;
1322 #ifdef SHIFTJIS_CP932
1323 if (strcmp(long_option[i].name, "cp932inv") == 0){
1330 if (strcmp(long_option[i].name, "x0212") == 0){
1337 if (strcmp(long_option[i].name, "exec-in") == 0){
1341 if (strcmp(long_option[i].name, "exec-out") == 0){
1346 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1347 if (strcmp(long_option[i].name, "internal-unicode") == 0){
1348 internal_unicode_f = TRUE;
1351 if (strcmp(long_option[i].name, "disable-cp932ext") == 0){
1352 disable_cp932ext_f = TRUE;
1355 if (strcmp(long_option[i].name, "enable-round-trip") == 0){
1356 unicode_round_trip_f = TRUE;
1359 if (strcmp(long_option[i].name, "fb-skip") == 0){
1360 encode_fallback = NULL;
1363 if (strcmp(long_option[i].name, "fb-html") == 0){
1364 encode_fallback = encode_fallback_html;
1367 if (strcmp(long_option[i].name, "fb-xml" ) == 0){
1368 encode_fallback = encode_fallback_xml;
1371 if (strcmp(long_option[i].name, "fb-java") == 0){
1372 encode_fallback = encode_fallback_java;
1375 if (strcmp(long_option[i].name, "fb-perl") == 0){
1376 encode_fallback = encode_fallback_perl;
1379 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1380 encode_fallback = encode_fallback_subchar;
1383 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1384 encode_fallback = encode_fallback_subchar;
1385 unicode_subchar = 0;
1387 /* decimal number */
1388 for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1389 unicode_subchar *= 10;
1390 unicode_subchar += hex2bin(p[i]);
1392 }else if(p[1] == 'x' || p[1] == 'X'){
1393 /* hexadecimal number */
1394 for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1395 unicode_subchar <<= 4;
1396 unicode_subchar |= hex2bin(p[i]);
1400 for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1401 unicode_subchar *= 8;
1402 unicode_subchar += hex2bin(p[i]);
1405 w16e_conv(unicode_subchar, &i, &j);
1406 unicode_subchar = i<<8 | j;
1410 #ifdef UTF8_OUTPUT_ENABLE
1411 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1412 ms_ucs_map_f = TRUE;
1416 #ifdef UNICODE_NORMALIZATION
1417 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1418 input_f = UTF8_INPUT;
1423 if (strcmp(long_option[i].name, "prefix=") == 0){
1424 if (' ' < p[0] && p[0] < 128){
1425 for (i = 1; ' ' < p[i] && p[i] < 128; i++){
1426 prefix_table[p[i]] = p[0];
1433 case 'b': /* buffered mode */
1436 case 'u': /* non bufferd mode */
1439 case 't': /* transparent mode */
1442 case 'j': /* JIS output */
1444 output_conv = j_oconv;
1446 case 'e': /* AT&T EUC output */
1447 output_conv = e_oconv;
1449 case 's': /* SJIS output */
1450 output_conv = s_oconv;
1452 case 'l': /* ISO8859 Latin-1 support, no conversion */
1453 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
1454 input_f = LATIN1_INPUT;
1456 case 'i': /* Kanji IN ESC-$-@/B */
1457 if (*cp=='@'||*cp=='B')
1458 kanji_intro = *cp++;
1460 case 'o': /* ASCII IN ESC-(-J/B */
1461 if (*cp=='J'||*cp=='B'||*cp=='H')
1462 ascii_intro = *cp++;
1466 bit:1 katakana->hiragana
1467 bit:2 hiragana->katakana
1469 if ('9'>= *cp && *cp>='0')
1470 hira_f |= (*cp++ -'0');
1477 #if defined(MSDOS) || defined(__OS2__)
1492 #ifdef UTF8_OUTPUT_ENABLE
1493 case 'w': /* UTF-8 output */
1494 if ('1'== cp[0] && '6'==cp[1]) {
1495 output_conv = w_oconv16; cp+=2;
1497 unicode_bom_f=2; cp++;
1500 unicode_bom_f=1; cp++;
1502 } else if (cp[0] == 'B') {
1503 unicode_bom_f=2; cp++;
1505 unicode_bom_f=1; cp++;
1508 } else if (cp[0] == '8') {
1509 output_conv = w_oconv; cp++;
1512 unicode_bom_f=1; cp++;
1515 output_conv = w_oconv;
1518 #ifdef UTF8_INPUT_ENABLE
1519 case 'W': /* UTF-8 input */
1520 if ('1'== cp[0] && '6'==cp[1]) {
1521 input_f = UTF16BE_INPUT;
1522 utf16_mode = UTF16BE_INPUT;
1526 input_f = UTF16LE_INPUT;
1527 utf16_mode = UTF16LE_INPUT;
1528 } else if (cp[0] == 'B') {
1530 input_f = UTF16BE_INPUT;
1531 utf16_mode = UTF16BE_INPUT;
1533 } else if (cp[0] == '8') {
1535 input_f = UTF8_INPUT;
1537 input_f = UTF8_INPUT;
1540 /* Input code assumption */
1541 case 'J': /* JIS input */
1542 case 'E': /* AT&T EUC input */
1543 input_f = JIS_INPUT;
1545 case 'S': /* MS Kanji input */
1546 input_f = SJIS_INPUT;
1547 if (x0201_f==NO_X0201) x0201_f=TRUE;
1549 case 'Z': /* Convert X0208 alphabet to asii */
1550 /* bit:0 Convert X0208
1551 bit:1 Convert Kankaku to one space
1552 bit:2 Convert Kankaku to two spaces
1553 bit:3 Convert HTML Entity
1555 if ('9'>= *cp && *cp>='0')
1556 alpha_f |= 1<<(*cp++ -'0');
1560 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
1561 x0201_f = FALSE; /* No X0201->X0208 conversion */
1563 ESC-(-I in JIS, EUC, MS Kanji
1564 SI/SO in JIS, EUC, MS Kanji
1565 SSO in EUC, JIS, not in MS Kanji
1566 MS Kanji (0xa0-0xdf)
1568 ESC-(-I in JIS (0x20-0x5f)
1569 SSO in EUC (0xa0-0xdf)
1570 0xa0-0xd in MS Kanji (0xa0-0xdf)
1573 case 'X': /* Assume X0201 kana */
1574 /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1577 case 'F': /* prserve new lines */
1578 fold_preserve_f = TRUE;
1579 case 'f': /* folding -f60 or -f */
1582 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1584 fold_len += *cp++ - '0';
1586 if (!(0<fold_len && fold_len<BUFSIZ))
1587 fold_len = DEFAULT_FOLD;
1591 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1593 fold_margin += *cp++ - '0';
1597 case 'm': /* MIME support */
1598 /* mime_decode_f = TRUE; */ /* this has too large side effects... */
1599 if (*cp=='B'||*cp=='Q') {
1600 mime_decode_mode = *cp++;
1601 mimebuf_f = FIXED_MIME;
1602 } else if (*cp=='N') {
1603 mime_f = TRUE; cp++;
1604 } else if (*cp=='S') {
1605 mime_f = STRICT_MIME; cp++;
1606 } else if (*cp=='0') {
1607 mime_decode_f = FALSE;
1608 mime_f = FALSE; cp++;
1611 case 'M': /* MIME output */
1614 mimeout_f = FIXED_MIME; cp++;
1615 } else if (*cp=='Q') {
1617 mimeout_f = FIXED_MIME; cp++;
1622 case 'B': /* Broken JIS support */
1624 bit:1 allow any x on ESC-(-x or ESC-$-x
1625 bit:2 reset to ascii on NL
1627 if ('9'>= *cp && *cp>='0')
1628 broken_f |= 1<<(*cp++ -'0');
1633 case 'O':/* for Output file */
1637 case 'c':/* add cr code */
1640 case 'd':/* delete cr code */
1643 case 'I': /* ISO-2022-JP output */
1646 case 'L': /* line mode */
1647 if (*cp=='u') { /* unix */
1648 crmode_f = NL; cp++;
1649 } else if (*cp=='m') { /* mac */
1650 crmode_f = CR; cp++;
1651 } else if (*cp=='w') { /* windows */
1652 crmode_f = CRLF; cp++;
1653 } else if (*cp=='0') { /* no conversion */
1663 /* module muliple options in a string are allowed for Perl moudle */
1664 while(*cp && *cp++!='-');
1667 /* bogus option but ignored */
1673 #ifdef ANSI_C_PROTOTYPE
1674 struct input_code * find_inputcode_byfunc(int (*iconv_func)(int c2,int c1,int c0))
1676 struct input_code * find_inputcode_byfunc(iconv_func)
1677 int (*iconv_func)();
1681 struct input_code *p = input_code_list;
1683 if (iconv_func == p->iconv_func){
1692 #ifdef ANSI_C_PROTOTYPE
1693 void set_iconv(int f, int (*iconv_func)(int c2,int c1,int c0))
1695 void set_iconv(f, iconv_func)
1697 int (*iconv_func)();
1700 #ifdef INPUT_CODE_FIX
1708 #ifdef INPUT_CODE_FIX
1709 && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1715 if (estab_f && iconv_for_check != iconv){
1716 struct input_code *p = find_inputcode_byfunc(iconv);
1718 set_input_codename(p->name);
1719 debug(input_codename);
1721 iconv_for_check = iconv;
1726 #define SCORE_L2 (1) /*
\e$BBh
\e(B2
\e$B?e=`4A;z
\e(B */
1727 #define SCORE_KANA (SCORE_L2 << 1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
1728 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
1729 #ifdef SHIFTJIS_CP932
1730 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932
\e$B$K$h$kFI$_49$(
\e(B */
1731 #define SCORE_NO_EXIST (SCORE_CP932 << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1733 #define SCORE_NO_EXIST (SCORE_DEPEND << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1735 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME
\e$B$K$h$k;XDj
\e(B */
1736 #define SCORE_ERROR (SCORE_iMIME << 1) /*
\e$B%(%i!<
\e(B */
1738 #define SCORE_INIT (SCORE_iMIME)
1740 const int score_table_A0[] = {
1743 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1744 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1747 const int score_table_F0[] = {
1748 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1749 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1750 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1751 SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1754 void set_code_score(ptr, score)
1755 struct input_code *ptr;
1759 ptr->score |= score;
1763 void clr_code_score(ptr, score)
1764 struct input_code *ptr;
1768 ptr->score &= ~score;
1772 void code_score(ptr)
1773 struct input_code *ptr;
1775 int c2 = ptr->buf[0];
1776 #ifdef UTF8_OUTPUT_ENABLE
1777 int c1 = ptr->buf[1];
1780 set_code_score(ptr, SCORE_ERROR);
1781 }else if (c2 == SSO){
1782 set_code_score(ptr, SCORE_KANA);
1783 #ifdef UTF8_OUTPUT_ENABLE
1784 }else if (!e2w_conv(c2, c1)){
1785 set_code_score(ptr, SCORE_NO_EXIST);
1787 }else if ((c2 & 0x70) == 0x20){
1788 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1789 }else if ((c2 & 0x70) == 0x70){
1790 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1791 }else if ((c2 & 0x70) >= 0x50){
1792 set_code_score(ptr, SCORE_L2);
1796 void status_disable(ptr)
1797 struct input_code *ptr;
1802 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1805 void status_push_ch(ptr, c)
1806 struct input_code *ptr;
1809 ptr->buf[ptr->index++] = c;
1812 void status_clear(ptr)
1813 struct input_code *ptr;
1819 void status_reset(ptr)
1820 struct input_code *ptr;
1823 ptr->score = SCORE_INIT;
1826 void status_reinit(ptr)
1827 struct input_code *ptr;
1830 ptr->_file_stat = 0;
1833 void status_check(ptr, c)
1834 struct input_code *ptr;
1837 if (c <= DEL && estab_f){
1842 void s_status(ptr, c)
1843 struct input_code *ptr;
1848 status_check(ptr, c);
1853 #ifdef NUMCHAR_OPTION
1854 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1857 }else if (0xa1 <= c && c <= 0xdf){
1858 status_push_ch(ptr, SSO);
1859 status_push_ch(ptr, c);
1862 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
1864 status_push_ch(ptr, c);
1865 #ifdef SHIFTJIS_CP932
1867 && CP932_TABLE_BEGIN <= c && c <= CP932_TABLE_END){
1869 status_push_ch(ptr, c);
1870 #endif /* SHIFTJIS_CP932 */
1872 }else if (x0212_f && 0xf0 <= c && c <= 0xfc){
1874 status_push_ch(ptr, c);
1875 #endif /* X0212_ENABLE */
1877 status_disable(ptr);
1881 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1882 status_push_ch(ptr, c);
1883 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
1887 status_disable(ptr);
1891 #ifdef SHIFTJIS_CP932
1892 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1893 status_push_ch(ptr, c);
1894 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
1895 set_code_score(ptr, SCORE_CP932);
1900 #endif /* SHIFTJIS_CP932 */
1901 #ifndef X0212_ENABLE
1902 status_disable(ptr);
1908 void e_status(ptr, c)
1909 struct input_code *ptr;
1914 status_check(ptr, c);
1919 #ifdef NUMCHAR_OPTION
1920 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1923 }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
1925 status_push_ch(ptr, c);
1927 }else if (0x8f == c){
1929 status_push_ch(ptr, c);
1930 #endif /* X0212_ENABLE */
1932 status_disable(ptr);
1936 if (0xa1 <= c && c <= 0xfe){
1937 status_push_ch(ptr, c);
1941 status_disable(ptr);
1946 if (0xa1 <= c && c <= 0xfe){
1948 status_push_ch(ptr, c);
1950 status_disable(ptr);
1952 #endif /* X0212_ENABLE */
1956 #ifdef UTF8_INPUT_ENABLE
1957 void w16_status(ptr, c)
1958 struct input_code *ptr;
1965 if (ptr->_file_stat == 0){
1966 if (c == 0xfe || c == 0xff){
1968 status_push_ch(ptr, c);
1969 ptr->_file_stat = 1;
1971 status_disable(ptr);
1972 ptr->_file_stat = -1;
1974 }else if (ptr->_file_stat > 0){
1976 status_push_ch(ptr, c);
1977 }else if (ptr->_file_stat < 0){
1978 status_disable(ptr);
1984 status_disable(ptr);
1985 ptr->_file_stat = -1;
1987 status_push_ch(ptr, c);
1994 if (ptr->stat != c && (c == 0xfe || c == 0xff)){
1995 status_push_ch(ptr, c);
1998 status_disable(ptr);
1999 ptr->_file_stat = -1;
2005 void w_status(ptr, c)
2006 struct input_code *ptr;
2011 status_check(ptr, c);
2016 #ifdef NUMCHAR_OPTION
2017 }else if ((c & CLASS_MASK) == CLASS_UTF16){
2020 }else if (0xc0 <= c && c <= 0xdf){
2022 status_push_ch(ptr, c);
2023 }else if (0xe0 <= c && c <= 0xef){
2025 status_push_ch(ptr, c);
2027 status_disable(ptr);
2032 if (0x80 <= c && c <= 0xbf){
2033 status_push_ch(ptr, c);
2034 if (ptr->index > ptr->stat){
2035 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2036 && ptr->buf[2] == 0xbf);
2037 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2038 &ptr->buf[0], &ptr->buf[1]);
2045 status_disable(ptr);
2056 int action_flag = 1;
2057 struct input_code *result = 0;
2058 struct input_code *p = input_code_list;
2060 (p->status_func)(p, c);
2063 }else if(p->stat == 0){
2074 if (result && !estab_f){
2075 set_iconv(TRUE, result->iconv_func);
2076 }else if (c <= DEL){
2077 struct input_code *ptr = input_code_list;
2092 return std_gc_buf[--std_gc_ndx];
2103 if (std_gc_ndx == STD_GC_BUFSIZE){
2106 std_gc_buf[std_gc_ndx++] = c;
2120 #if !defined(PERL_XS) && !defined(WIN32DLL)
2127 while ((c = (*i_getc)(f)) != EOF)
2136 oconv = output_conv;
2139 /* replace continucation module, from output side */
2141 /* output redicrection */
2143 if (noout_f || guess_f){
2150 if (mimeout_f == TRUE) {
2151 o_base64conv = oconv; oconv = base64_conv;
2153 /* base64_count = 0; */
2157 o_crconv = oconv; oconv = cr_conv;
2160 o_rot_conv = oconv; oconv = rot_conv;
2163 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2166 o_hira_conv = oconv; oconv = hira_conv;
2169 o_fconv = oconv; oconv = fold_conv;
2172 if (alpha_f || x0201_f) {
2173 o_zconv = oconv; oconv = z_conv;
2177 i_ungetc = std_ungetc;
2178 /* input redicrection */
2181 i_cgetc = i_getc; i_getc = cap_getc;
2182 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2185 i_ugetc = i_getc; i_getc = url_getc;
2186 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2189 #ifdef NUMCHAR_OPTION
2191 i_ngetc = i_getc; i_getc = numchar_getc;
2192 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2195 #ifdef UNICODE_NORMALIZATION
2196 if (nfc_f && input_f == UTF8_INPUT){
2197 i_nfc_getc = i_getc; i_getc = nfc_getc;
2198 i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2201 if (mime_f && mimebuf_f==FIXED_MIME) {
2202 i_mgetc = i_getc; i_getc = mime_getc;
2203 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2206 i_bgetc = i_getc; i_getc = broken_getc;
2207 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2209 if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
2210 set_iconv(-TRUE, e_iconv);
2211 } else if (input_f == SJIS_INPUT) {
2212 set_iconv(-TRUE, s_iconv);
2213 #ifdef UTF8_INPUT_ENABLE
2214 } else if (input_f == UTF8_INPUT) {
2215 set_iconv(-TRUE, w_iconv);
2216 } else if (input_f == UTF16BE_INPUT) {
2217 set_iconv(-TRUE, w_iconv16);
2218 } else if (input_f == UTF16LE_INPUT) {
2219 set_iconv(-TRUE, w_iconv16);
2222 set_iconv(FALSE, e_iconv);
2226 struct input_code *p = input_code_list;
2234 Conversion main loop. Code detection only.
2243 int is_8bit = FALSE;
2245 module_connection();
2248 if(input_f == SJIS_INPUT
2249 #ifdef UTF8_INPUT_ENABLE
2250 || input_f == UTF8_INPUT || input_f == UTF16BE_INPUT
2258 output_mode = ASCII;
2261 #define NEXT continue /* no output, get next */
2262 #define SEND ; /* output c1 and c2, get next */
2263 #define LAST break /* end of loop, go closing */
2265 while ((c1 = (*i_getc)(f)) != EOF) {
2270 /* in case of 8th bit is on */
2271 if (!estab_f&&!mime_decode_mode) {
2272 /* in case of not established yet */
2273 /* It is still ambiguious */
2274 if (h_conv(f, c2, c1)==EOF)
2280 /* in case of already established */
2282 /* ignore bogus code */
2288 /* second byte, 7 bit code */
2289 /* it might be kanji shitfted */
2290 if ((c1 == DEL) || (c1 <= SPACE)) {
2291 /* ignore bogus first code */
2299 #ifdef UTF8_INPUT_ENABLE
2308 #ifdef NUMCHAR_OPTION
2309 } else if ((c1 & CLASS_MASK) == CLASS_UTF16){
2312 } else if (c1 > DEL) {
2314 if (!estab_f && !iso8859_f) {
2315 /* not established yet */
2316 if (!is_8bit) is_8bit = TRUE;
2319 } else { /* estab_f==TRUE */
2324 } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2325 /* SJIS X0201 Case... */
2326 if(iso2022jp_f && x0201_f==NO_X0201) {
2327 (*oconv)(GETA1, GETA2);
2334 } else if (c1==SSO && iconv != s_iconv) {
2335 /* EUC X0201 Case */
2336 c1 = (*i_getc)(f); /* skip SSO */
2338 if (SSP<=c1 && c1<0xe0) {
2339 if(iso2022jp_f && x0201_f==NO_X0201) {
2340 (*oconv)(GETA1, GETA2);
2347 } else { /* bogus code, skip SSO and one byte */
2351 /* already established */
2356 } else if ((c1 > SPACE) && (c1 != DEL)) {
2357 /* in case of Roman characters */
2359 /* output 1 shifted byte */
2363 } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
2364 /* output 1 shifted byte */
2365 if(iso2022jp_f && x0201_f==NO_X0201) {
2366 (*oconv)(GETA1, GETA2);
2373 /* look like bogus code */
2376 } else if (input_mode == X0208) {
2377 /* in case of Kanji shifted */
2380 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
2381 /* Check MIME code */
2382 if ((c1 = (*i_getc)(f)) == EOF) {
2385 } else if (c1 == '?') {
2386 /* =? is mime conversion start sequence */
2387 if(mime_f == STRICT_MIME) {
2388 /* check in real detail */
2389 if (mime_begin_strict(f) == EOF)
2393 } else if (mime_begin(f) == EOF)
2403 /* normal ASCII code */
2406 } else if (!is_8bit && c1 == SI) {
2409 } else if (!is_8bit && c1 == SO) {
2412 } else if (!is_8bit && c1 == ESC ) {
2413 if ((c1 = (*i_getc)(f)) == EOF) {
2414 /* (*oconv)(0, ESC); don't send bogus code */
2416 } else if (c1 == '$') {
2417 if ((c1 = (*i_getc)(f)) == EOF) {
2419 (*oconv)(0, ESC); don't send bogus code
2420 (*oconv)(0, '$'); */
2422 } else if (c1 == '@'|| c1 == 'B') {
2423 /* This is kanji introduction */
2426 set_input_codename("ISO-2022-JP");
2428 debug(input_codename);
2431 } else if (c1 == '(') {
2432 if ((c1 = (*i_getc)(f)) == EOF) {
2433 /* don't send bogus code
2439 } else if (c1 == '@'|| c1 == 'B') {
2440 /* This is kanji introduction */
2445 } else if (c1 == 'D'){
2449 #endif /* X0212_ENABLE */
2451 /* could be some special code */
2458 } else if (broken_f&0x2) {
2459 /* accept any ESC-(-x as broken code ... */
2469 } else if (c1 == '(') {
2470 if ((c1 = (*i_getc)(f)) == EOF) {
2471 /* don't send bogus code
2473 (*oconv)(0, '('); */
2477 /* This is X0201 kana introduction */
2478 input_mode = X0201; shift_mode = X0201;
2480 } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2481 /* This is X0208 kanji introduction */
2482 input_mode = ASCII; shift_mode = FALSE;
2484 } else if (broken_f&0x2) {
2485 input_mode = ASCII; shift_mode = FALSE;
2490 /* maintain various input_mode here */
2494 } else if ( c1 == 'N' || c1 == 'n' ){
2496 c3 = (*i_getc)(f); /* skip SS2 */
2497 if ( (SPACE<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2512 } else if ((c1 == NL || c1 == CR) && broken_f&4) {
2513 input_mode = ASCII; set_iconv(FALSE, 0);
2515 } else if (c1 == NL && mime_decode_f && !mime_decode_mode ) {
2516 if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2524 } else if (c1 == CR && mime_decode_f && !mime_decode_mode ) {
2525 if ((c1=(*i_getc)(f))!=EOF) {
2529 } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2545 if (input_mode == X0208)
2546 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2548 else if (input_mode == X0212)
2549 (*oconv)((0x8f << 8) | c2, c1);
2550 #endif /* X0212_ENABLE */
2551 else if (input_mode)
2552 (*oconv)(input_mode, c1); /* other special case */
2553 else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */
2554 int c0 = (*i_getc)(f);
2557 (*iconv)(c2, c1, c0);
2563 /* goto next_word */
2567 (*iconv)(EOF, 0, 0);
2568 if (!is_inputcode_set)
2571 struct input_code *p = input_code_list;
2572 struct input_code *result = p;
2574 if (p->score < result->score) result = p;
2577 set_input_codename(result->name);
2592 /** it must NOT be in the kanji shifte sequence */
2593 /** it must NOT be written in JIS7 */
2594 /** and it must be after 2 byte 8bit code */
2601 while ((c1 = (*i_getc)(f)) != EOF) {
2607 if (push_hold_buf(c1) == EOF || estab_f){
2613 struct input_code *p = input_code_list;
2614 struct input_code *result = p;
2619 if (p->score < result->score){
2624 set_iconv(FALSE, result->iconv_func);
2629 ** 1) EOF is detected, or
2630 ** 2) Code is established, or
2631 ** 3) Buffer is FULL (but last word is pushed)
2633 ** in 1) and 3) cases, we continue to use
2634 ** Kanji codes by oconv and leave estab_f unchanged.
2639 while (wc < hold_count){
2640 c2 = hold_buf[wc++];
2642 #ifdef NUMCHAR_OPTION
2643 || (c2 & CLASS_MASK) == CLASS_UTF16
2648 }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
2649 (*iconv)(X0201, c2, 0);
2652 if (wc < hold_count){
2653 c1 = hold_buf[wc++];
2662 if ((*iconv)(c2, c1, 0) < 0){
2664 if (wc < hold_count){
2665 c0 = hold_buf[wc++];
2674 (*iconv)(c2, c1, c0);
2687 if (hold_count >= HOLD_SIZE*2)
2689 hold_buf[hold_count++] = c2;
2690 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
2693 const int shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
2695 int s2e_conv(c2, c1, p2, p1)
2699 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
2702 #ifdef SHIFTJIS_CP932
2703 if (cp51932_f && CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){
2704 extern const unsigned short shiftjis_cp932[3][189];
2705 val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
2711 #endif /* SHIFTJIS_CP932 */
2713 if (!x0213_f && x0212_f && 0xfa <= c2 && c2 <= 0xfc){
2714 extern const unsigned short shiftjis_x0212[3][189];
2715 val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
2718 c2 = (0x8f << 8) | (val >> 8);
2731 if(x0213_f && c2 >= 0xF0){
2732 if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
2733 c2 = 0x8F20 + shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
2734 }else{ /* 78<=k<=94 */
2735 c2 = 0x8F00 | (c2 * 2 - 0x17B);
2736 if (0x9E < c1) c2++;
2739 c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
2740 if (0x9E < c1) c2++;
2743 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1F);
2751 c2 = x0212_unshift(c2);
2766 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2769 int ret = s2e_conv(c2, c1, &c2, &c1);
2770 if (ret) return ret;
2784 }else if (c2 == 0x8f){
2788 c2 = (c2 << 8) | (c1 & 0x7f);
2790 #ifdef SHIFTJIS_CP932
2793 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2794 s2e_conv(s2, s1, &c2, &c1);
2795 if ((c2 & 0xff00) == 0){
2801 #endif /* SHIFTJIS_CP932 */
2802 #endif /* X0212_ENABLE */
2803 } else if (c2 == SSO){
2806 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2816 #ifdef UTF8_INPUT_ENABLE
2818 w2e_conv(c2, c1, c0, p2, p1)
2827 }else if (0xc0 <= c2 && c2 <= 0xef) {
2828 ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
2829 #ifdef NUMCHAR_OPTION
2832 if (p1) *p1 = CLASS_UTF16 | ww16_conv(c2, c1, c0);
2847 /* throw away ZERO WIDTH NO-BREAK SPACE (U+FEFF) */
2848 if(ignore_zwnbsp_f){
2849 ignore_zwnbsp_f = FALSE;
2850 if(c2 == 0xef && c1 == 0xbb && c0 == 0xbf)
2854 if (c2 == 0) /* 0x00-0x7f */
2855 c1 &= 0x7F; /* 1byte */
2857 if ((c2 & 0xe0) == 0xc0){ /* 0xc0-0xdf */
2859 if((c2 & 0xFE) == 0xC0 || c1 < 0x80 || 0xBF < c1) return 0;
2860 }else if ((c2 & 0xf0) == 0xe0) /* 0xe0-0xef */
2861 return -1; /* 3bytes */
2863 else if (0xf0 <= c2)
2864 return 0; /* 4,5,6bytes */
2865 else if ((c2 & 0xc0) == 0x80) /* 0x80-0xbf */
2866 return 0; /* trail byte */
2870 /* must be 3bytes */
2872 if(c1 < 0xA0 || 0xBF < c1 || c0 < 0x80 || 0xBF < c0)
2874 }else if(c2 == 0xED){
2875 if(c1 < 0x80 || 0x9F < c1 || c0 < 0x80 || 0xBF < c0)
2877 }else if((c2 & 0xf0) == 0xe0){
2878 if(c1 < 0x80 || 0xBF < c1 || c0 < 0x80 || 0xBF < c0)
2882 if (c2 == 0 || c2 == EOF){
2883 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
2884 } else if (internal_unicode_f && (output_conv == w_oconv || output_conv == w_oconv16)){
2885 unsigned short val = 0;
2890 val = ww16_conv(c2, c1, c0);
2891 c2 = (val >> 8) & 0xff;
2895 ret = w2e_conv(c2, c1, c0, &c2, &c1);
2904 w16w_conv(val, p2, p1, p0)
2912 }else if (val < 0x800){
2913 *p2 = 0xc0 | (val >> 6);
2914 *p1 = 0x80 | (val & 0x3f);
2917 *p2 = 0xe0 | (val >> 12);
2918 *p1 = 0x80 | ((val >> 6) & 0x3f);
2919 *p0 = 0x80 | (val & 0x3f);
2924 ww16_conv(c2, c1, c0)
2930 }else if (c2 >= 0xe0){
2931 val = (c2 & 0x0f) << 12;
2932 val |= (c1 & 0x3f) << 6;
2934 }else if (c2 >= 0xc0){
2935 val = (c2 & 0x1f) << 6;
2944 w16e_conv(val, p2, p1)
2970 w16w_conv(val, &c2, &c1, &c0);
2971 ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
2972 #ifdef NUMCHAR_OPTION
2975 *p1 = CLASS_UTF16 | val;
2984 w_iconv16(c2, c1, c0)
2989 /* throw away ZERO WIDTH NO-BREAK SPACE (U+FEFF) */
2990 if(ignore_zwnbsp_f){
2991 ignore_zwnbsp_f = FALSE;
2992 if (c2==0376 && c1==0377){
2993 utf16_mode = UTF16BE_INPUT;
2995 }else if(c2==0377 && c1==0376){
2996 utf16_mode = UTF16LE_INPUT;
3000 if (c2 != EOF && utf16_mode == UTF16LE_INPUT) {
3002 tmp=c1; c1=c2; c2=tmp;
3004 if ((c2==0 && c1 < 0x80) || c2==EOF) {
3007 }else if((c2>>3)==27){ /* surrogate pair */
3009 #ifdef UTF8_OUTPUT_ENABLE
3010 }else if (internal_unicode_f && (output_conv == w_oconv || output_conv == w_oconv16)){
3012 }else ret = w16e_conv(((c2<<8)&0xff00) + c1, &c2, &c1);
3013 if (ret) return ret;
3019 unicode_to_jis_common(c2, c1, c0, p2, p1)
3023 extern const unsigned short *const utf8_to_euc_2bytes[];
3024 extern const unsigned short *const *const utf8_to_euc_3bytes[];
3028 if (ms_ucs_map_f && cp51932_f){
3029 /* CP932/CP51932: U+00A6 (BROKEN BAR) -> not 0x8fa2c3, but 0x7c */
3042 }else if(strict_mapping_f){
3046 case 0xAB: case 0xAD: case 0xB2: case 0xB3:
3047 case 0xB5: case 0xB7: case 0xB9: case 0xBB:
3059 ret = w_iconv_common(c2, c1, utf8_to_euc_2bytes, sizeof_utf8_to_euc_2bytes, p2, p1);
3060 if(!ret && !ms_ucs_map_f && !x0212_f){
3061 if(*p2 == 0 && *p1 < 0x80){
3063 }else if(*p2 > 0xFF){
3065 if (e2s_conv(*p2, *p1, &s2, &s1) == 0){
3066 s2e_conv(s2, s1, p2, p1);
3067 if(*p2 == 0 && *p1 < 0x80)
3073 if(unicode_round_trip_f){
3078 if(c0 == 0x95) return 1;
3081 if(c0 == 0xA5) return 1;
3088 if(c0 == 0xBF) return 1;
3091 if(c0 == 0x8D) return 1;
3094 if(0xA0 <= c0 && c0 <= 0xA5) return 1;
3102 if(c2 == 0xE2 && c1 == 0x80 && c0 == 0xBE){
3106 }else if(c2 == 0xEF && c1 == 0xBD && c0 == 0x9E){
3107 if (p2) *p2 = 0x8F22;
3112 if(!strict_mapping_f);
3113 else if(ms_ucs_map_f && cp51932_f){
3114 /* Microsoft Code Page */
3120 case 0x94: case 0x96: case 0xBE:
3141 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94)
3144 ret = w_iconv_common(c1, c0, utf8_to_euc_3bytes[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
3150 w_iconv_common(c1, c0, pp, psize, p2, p1)
3152 const unsigned short *const *pp;
3157 const unsigned short *p;
3160 if (pp == 0) return 1;
3163 if (c1 < 0 || psize <= c1) return 1;
3165 if (p == 0) return 1;
3168 if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1;
3170 if (val == 0) return 1;
3171 if (disable_cp932ext_f && (
3172 (val>>8) == 0x2D || /* disable NEC special characters */
3173 val > 0xF300 /* disable NEC special characters */
3181 if (c2 == SO) c2 = X0201;
3190 #ifdef UTF8_OUTPUT_ENABLE
3192 nkf_each_char_to_hex(f, c)
3193 void (*f)PROTO((int c2,int c1));
3196 const char *hex = "0123456789ABCDEF";
3202 (*f)(0, hex[(c>>shift)&0xF]);
3213 encode_fallback_html(c)
3220 (*oconv)(0, 0x30+(c/1000000)%10);
3222 (*oconv)(0, 0x30+(c/100000 )%10);
3224 (*oconv)(0, 0x30+(c/10000 )%10);
3226 (*oconv)(0, 0x30+(c/1000 )%10);
3228 (*oconv)(0, 0x30+(c/100 )%10);
3230 (*oconv)(0, 0x30+(c/10 )%10);
3232 (*oconv)(0, 0x30+ c %10);
3238 encode_fallback_xml(c)
3244 nkf_each_char_to_hex(oconv, c);
3250 encode_fallback_java(c)
3253 const char *hex = "0123456789ABCDEF";
3255 if((c&0x00FFFFFF) > 0xFFFF){
3259 (*oconv)(0, hex[(c>>20)&0xF]);
3260 (*oconv)(0, hex[(c>>16)&0xF]);
3264 (*oconv)(0, hex[(c>>12)&0xF]);
3265 (*oconv)(0, hex[(c>> 8)&0xF]);
3266 (*oconv)(0, hex[(c>> 4)&0xF]);
3267 (*oconv)(0, hex[ c &0xF]);
3272 encode_fallback_perl(c)
3278 nkf_each_char_to_hex(oconv, c);
3284 encode_fallback_subchar(c)
3287 c = unicode_subchar;
3288 (*oconv)((c>>8)&0xFF, c&0xFF);
3294 (*oconv)(0, (c>>shift)&0xFF);
3308 extern const unsigned short euc_to_utf8_1byte[];
3309 extern const unsigned short *const euc_to_utf8_2bytes[];
3310 extern const unsigned short *const euc_to_utf8_2bytes_ms[];
3311 const unsigned short *p;
3314 p = euc_to_utf8_1byte;
3316 } else if (c2 >> 8 == 0x8f){
3317 if(!ms_ucs_map_f && c2 == 0x8F22 && c1 == 0x43){
3320 extern const unsigned short *const x0212_to_utf8_2bytes[];
3321 c2 = (c2&0x7f) - 0x21;
3322 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3323 p = x0212_to_utf8_2bytes[c2];
3329 c2 = (c2&0x7f) - 0x21;
3330 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3331 p = ms_ucs_map_f ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
3336 c1 = (c1 & 0x7f) - 0x21;
3337 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
3354 if (unicode_bom_f==2) {
3361 #ifdef NUMCHAR_OPTION
3362 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3363 w16w_conv(c1, &c2, &c1, &c0);
3367 if (c0) (*o_putc)(c0);
3374 output_mode = ASCII;
3376 } else if (c2 == ISO8859_1) {
3377 output_mode = ISO8859_1;
3378 (*o_putc)(c1 | 0x080);
3381 if (internal_unicode_f && (iconv == w_iconv || iconv == w_iconv16))
3382 val = ((c2<<8)&0xff00) + c1;
3383 else val = e2w_conv(c2, c1);
3385 w16w_conv(val, &c2, &c1, &c0);
3389 if (c0) (*o_putc)(c0);
3405 if (unicode_bom_f==2) {
3407 (*o_putc)((unsigned char)'\377');
3411 (*o_putc)((unsigned char)'\377');
3416 if (internal_unicode_f && (iconv == w_iconv || iconv == w_iconv16)){
3417 } else if (c2 == ISO8859_1) {
3420 #ifdef NUMCHAR_OPTION
3421 } else if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16) {
3422 c2 = (c1 >> 8) & 0xff;
3426 unsigned short val = e2w_conv(c2, c1);
3427 c2 = (val >> 8) & 0xff;
3446 #ifdef NUMCHAR_OPTION
3447 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3448 w16e_conv(c1, &c2, &c1);
3449 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3450 if(encode_fallback)(*encode_fallback)(c1);
3458 } else if (c2 == 0) {
3459 output_mode = ASCII;
3461 } else if (c2 == X0201) {
3462 output_mode = JAPANESE_EUC;
3463 (*o_putc)(SSO); (*o_putc)(c1|0x80);
3464 } else if (c2 == ISO8859_1) {
3465 output_mode = ISO8859_1;
3466 (*o_putc)(c1 | 0x080);
3468 } else if ((c2 & 0xff00) >> 8 == 0x8f){
3469 output_mode = JAPANESE_EUC;
3470 #ifdef SHIFTJIS_CP932
3473 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3474 s2e_conv(s2, s1, &c2, &c1);
3479 output_mode = ASCII;
3481 }else if ((c2 & 0xff00) >> 8 == 0x8f){
3484 (*o_putc)((c2 & 0x7f) | 0x080);
3485 (*o_putc)(c1 | 0x080);
3488 (*o_putc)((c2 & 0x7f) | 0x080);
3489 (*o_putc)(c1 | 0x080);
3493 if ((c1<0x21 || 0x7e<c1) ||
3494 (c2<0x21 || 0x7e<c2)) {
3495 set_iconv(FALSE, 0);
3496 return; /* too late to rescue this char */
3498 output_mode = JAPANESE_EUC;
3499 (*o_putc)(c2 | 0x080);
3500 (*o_putc)(c1 | 0x080);
3510 if ((ret & 0xff00) == 0x8f00){
3511 if (0x75 <= c && c <= 0x7f){
3512 ret = c + (0x109 - 0x75);
3515 if (0x75 <= c && c <= 0x7f){
3516 ret = c + (0x113 - 0x75);
3523 int x0212_unshift(c)
3527 if (0x7f <= c && c <= 0x88){
3528 ret = c + (0x75 - 0x7f);
3529 }else if (0x89 <= c && c <= 0x92){
3530 ret = (0x8f << 8) | 0x80 | (c + (0x75 - 0x89));
3534 #endif /* X0212_ENABLE */
3537 e2s_conv(c2, c1, p2, p1)
3538 int c2, c1, *p2, *p1;
3541 if ((c2 & 0xff00) == 0x8f00){
3544 if((0x21 <= ndx && ndx <= 0x2F)){
3545 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
3546 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
3548 }else if(0x6E <= ndx && ndx <= 0x7E){
3549 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
3550 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
3556 else if(0x21 <= ndx && ndx <= 0x7e){
3558 const unsigned short *ptr;
3559 extern const unsigned short *const x0212_shiftjis[];
3561 ptr = x0212_shiftjis[ndx - 0x21];
3563 val = ptr[(c1 & 0x7f) - 0x21];
3572 c2 = x0212_shift(c2);
3574 #endif /* X0212_ENABLE */
3576 if(0x7F < c2) return 1;
3577 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
3578 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
3587 #ifdef NUMCHAR_OPTION
3588 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3589 w16e_conv(c1, &c2, &c1);
3590 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3591 if(encode_fallback)(*encode_fallback)(c1);
3599 } else if (c2 == 0) {
3600 output_mode = ASCII;
3602 } else if (c2 == X0201) {
3603 output_mode = SHIFT_JIS;
3605 } else if (c2 == ISO8859_1) {
3606 output_mode = ISO8859_1;
3607 (*o_putc)(c1 | 0x080);
3609 } else if ((c2 & 0xff00) >> 8 == 0x8f){
3610 output_mode = SHIFT_JIS;
3611 if (e2s_conv(c2, c1, &c2, &c1) == 0){
3617 if ((c1<0x20 || 0x7e<c1) ||
3618 (c2<0x20 || 0x7e<c2)) {
3619 set_iconv(FALSE, 0);
3620 return; /* too late to rescue this char */
3622 output_mode = SHIFT_JIS;
3623 e2s_conv(c2, c1, &c2, &c1);
3625 #ifdef SHIFTJIS_CP932
3627 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3628 extern const unsigned short cp932inv[2][189];
3629 int c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3635 #endif /* SHIFTJIS_CP932 */
3638 if (prefix_table[(unsigned char)c1]){
3639 (*o_putc)(prefix_table[(unsigned char)c1]);
3650 #ifdef NUMCHAR_OPTION
3651 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3652 w16e_conv(c1, &c2, &c1);
3653 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3654 if(encode_fallback)(*encode_fallback)(c1);
3660 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
3663 (*o_putc)(ascii_intro);
3664 output_mode = ASCII;
3668 } else if ((c2 & 0xff00) >> 8 == 0x8f){
3670 if(output_mode!=X0213_2){
3671 output_mode = X0213_2;
3674 if(output_mode!=X0212){
3675 output_mode = X0212;
3681 (*o_putc)(output_mode & 0x7F);
3682 (*o_putc)(c2 & 0x7f);
3685 } else if (c2==X0201) {
3686 if (output_mode!=X0201) {
3687 output_mode = X0201;
3693 } else if (c2==ISO8859_1) {
3694 /* iso8859 introduction, or 8th bit on */
3695 /* Can we convert in 7bit form using ESC-'-'-A ?
3697 output_mode = ISO8859_1;
3699 } else if (c2 == 0) {
3700 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
3703 (*o_putc)(ascii_intro);
3704 output_mode = ASCII;
3709 if (output_mode!=X0213_1) {
3710 output_mode = X0213_1;
3714 (*o_putc)(output_mode & 0x7F);
3716 }else if (output_mode != X0208) {
3717 output_mode = X0208;
3720 (*o_putc)(kanji_intro);
3722 if (c1<0x20 || 0x7e<c1)
3724 if (c2<0x20 || 0x7e<c2)
3736 mime_prechar(c2, c1);
3737 (*o_base64conv)(c2,c1);
3741 STATIC int broken_buf[3];
3742 STATIC int broken_counter = 0;
3743 STATIC int broken_last = 0;
3750 if (broken_counter>0) {
3751 return broken_buf[--broken_counter];
3754 if (c=='$' && broken_last != ESC
3755 && (input_mode==ASCII || input_mode==X0201)) {
3758 if (c1=='@'|| c1=='B') {
3759 broken_buf[0]=c1; broken_buf[1]=c;
3766 } else if (c=='(' && broken_last != ESC
3767 && (input_mode==X0208 || input_mode==X0201)) { /* ) */
3770 if (c1=='J'|| c1=='B') {
3771 broken_buf[0]=c1; broken_buf[1]=c;
3789 if (broken_counter<2)
3790 broken_buf[broken_counter++]=c;
3794 STATIC int prev_cr = 0;
3802 if (! (c2==0&&c1==NL) ) {
3808 } else if (c1=='\r') {
3810 } else if (c1=='\n') {
3811 if (crmode_f==CRLF) {
3812 (*o_crconv)(0,'\r');
3813 } else if (crmode_f==CR) {
3814 (*o_crconv)(0,'\r');
3818 } else if (c1!='\032' || crmode_f!=NL){
3824 Return value of fold_conv()
3826 \n add newline and output char
3827 \r add newline and output nothing
3830 1 (or else) normal output
3832 fold state in prev (previous character)
3834 >0x80 Japanese (X0208/X0201)
3839 This fold algorthm does not preserve heading space in a line.
3840 This is the main difference from fmt.
3843 #define char_size(c2,c1) (c2?2:1)
3852 if (c1== '\r' && !fold_preserve_f) {
3853 fold_state=0; /* ignore cr */
3854 }else if (c1== '\n'&&f_prev=='\r' && fold_preserve_f) {
3856 fold_state=0; /* ignore cr */
3857 } else if (c1== BS) {
3858 if (f_line>0) f_line--;
3860 } else if (c2==EOF && f_line != 0) { /* close open last line */
3862 } else if ((c1=='\n' && !fold_preserve_f)
3863 || ((c1=='\r'||(c1=='\n'&&f_prev!='\r'))
3864 && fold_preserve_f)) {
3866 if (fold_preserve_f) {
3870 } else if ((f_prev == c1 && !fold_preserve_f)
3871 || (f_prev == '\n' && fold_preserve_f)
3872 ) { /* duplicate newline */
3875 fold_state = '\n'; /* output two newline */
3881 if (f_prev&0x80) { /* Japanese? */
3883 fold_state = 0; /* ignore given single newline */
3884 } else if (f_prev==' ') {
3888 if (++f_line<=fold_len)
3892 fold_state = '\r'; /* fold and output nothing */
3896 } else if (c1=='\f') {
3901 fold_state = '\n'; /* output newline and clear */
3902 } else if ( (c2==0 && c1==' ')||
3903 (c2==0 && c1=='\t')||
3904 (c2=='!'&& c1=='!')) {
3905 /* X0208 kankaku or ascii space */
3906 if (f_prev == ' ') {
3907 fold_state = 0; /* remove duplicate spaces */
3910 if (++f_line<=fold_len)
3911 fold_state = ' '; /* output ASCII space only */
3913 f_prev = ' '; f_line = 0;
3914 fold_state = '\r'; /* fold and output nothing */
3918 prev0 = f_prev; /* we still need this one... , but almost done */
3920 if (c2 || c2==X0201)
3921 f_prev |= 0x80; /* this is Japanese */
3922 f_line += char_size(c2,c1);
3923 if (f_line<=fold_len) { /* normal case */
3926 if (f_line>fold_len+fold_margin) { /* too many kinsoku suspension */
3927 f_line = char_size(c2,c1);
3928 fold_state = '\n'; /* We can't wait, do fold now */
3929 } else if (c2==X0201) {
3930 /* simple kinsoku rules return 1 means no folding */
3931 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
3932 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
3933 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
3934 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
3935 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
3936 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
3937 else if (SPACE<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
3939 fold_state = '\n';/* add one new f_line before this character */
3942 fold_state = '\n';/* add one new f_line before this character */
3945 /* kinsoku point in ASCII */
3946 if ( c1==')'|| /* { [ ( */
3957 /* just after special */
3958 } else if (!is_alnum(prev0)) {
3959 f_line = char_size(c2,c1);
3961 } else if ((prev0==' ') || /* ignored new f_line */
3962 (prev0=='\n')|| /* ignored new f_line */
3963 (prev0&0x80)) { /* X0208 - ASCII */
3964 f_line = char_size(c2,c1);
3965 fold_state = '\n';/* add one new f_line before this character */
3967 fold_state = 1; /* default no fold in ASCII */
3971 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
3972 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
3973 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
3974 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
3975 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
3976 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
3977 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
3978 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
3979 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
3980 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
3981 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
3982 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
3983 /* default no fold in kinsoku */
3986 f_line = char_size(c2,c1);
3987 /* add one new f_line before this character */
3990 f_line = char_size(c2,c1);
3992 /* add one new f_line before this character */
3997 /* terminator process */
3998 switch(fold_state) {
4017 int z_prev2=0,z_prev1=0;
4024 /* if (c2) c1 &= 0x7f; assertion */
4026 if (x0201_f && z_prev2==X0201) { /* X0201 */
4027 if (c1==(0xde&0x7f)) { /*
\e$BByE@
\e(B */
4029 (*o_zconv)(dv[(z_prev1-SPACE)*2],dv[(z_prev1-SPACE)*2+1]);
4031 } else if (c1==(0xdf&0x7f)&&ev[(z_prev1-SPACE)*2]) { /*
\e$BH>ByE@
\e(B */
4033 (*o_zconv)(ev[(z_prev1-SPACE)*2],ev[(z_prev1-SPACE)*2+1]);
4037 (*o_zconv)(cv[(z_prev1-SPACE)*2],cv[(z_prev1-SPACE)*2+1]);
4046 if (x0201_f && c2==X0201) {
4047 if (dv[(c1-SPACE)*2]||ev[(c1-SPACE)*2]) {
4048 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
4049 z_prev1 = c1; z_prev2 = c2;
4052 (*o_zconv)(cv[(c1-SPACE)*2],cv[(c1-SPACE)*2+1]);
4057 /* JISX0208 Alphabet */
4058 if (alpha_f && c2 == 0x23 ) {
4060 } else if (alpha_f && c2 == 0x21 ) {
4061 /* JISX0208 Kigou */
4066 } else if (alpha_f&0x4) {
4071 } else if (0x20<c1 && c1<0x7f && fv[c1-0x20]) {
4077 case '>': entity = ">"; break;
4078 case '<': entity = "<"; break;
4079 case '\"': entity = """; break;
4080 case '&': entity = "&"; break;
4083 while (*entity) (*o_zconv)(0, *entity++);
4093 #define rot13(c) ( \
4095 (c <= 'M') ? (c + 13): \
4096 (c <= 'Z') ? (c - 13): \
4098 (c <= 'm') ? (c + 13): \
4099 (c <= 'z') ? (c - 13): \
4103 #define rot47(c) ( \
4105 ( c <= 'O' ) ? (c + 47) : \
4106 ( c <= '~' ) ? (c - 47) : \
4114 if (c2==0 || c2==X0201 || c2==ISO8859_1) {
4120 (*o_rot_conv)(c2,c1);
4127 if ((hira_f & 1) && c2==0x25 && 0x20<c1 && c1<0x74) {
4129 } else if ((hira_f & 2) && c2==0x24 && 0x20<c1 && c1<0x74) {
4132 (*o_hira_conv)(c2,c1);
4137 iso2022jp_check_conv(c2,c1)
4140 STATIC const int range[RANGE_NUM_MAX][2] = {
4163 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
4167 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
4172 for (i = 0; i < RANGE_NUM_MAX; i++) {
4173 start = range[i][0];
4176 if (c >= start && c <= end) {
4181 (*o_iso2022jp_check_conv)(c2,c1);
4185 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
4187 const unsigned char *mime_pattern[] = {
4188 (const unsigned char *)"\075?EUC-JP?B?",
4189 (const unsigned char *)"\075?SHIFT_JIS?B?",
4190 (const unsigned char *)"\075?ISO-8859-1?Q?",
4191 (const unsigned char *)"\075?ISO-8859-1?B?",
4192 (const unsigned char *)"\075?ISO-2022-JP?B?",
4193 (const unsigned char *)"\075?ISO-2022-JP?Q?",
4194 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
4195 (const unsigned char *)"\075?UTF-8?B?",
4196 (const unsigned char *)"\075?UTF-8?Q?",
4198 (const unsigned char *)"\075?US-ASCII?Q?",
4203 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
4204 int (*mime_priority_func[])PROTO((int c2, int c1, int c0)) = {
4205 e_iconv, s_iconv, 0, 0, 0, 0,
4206 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
4212 const int mime_encode[] = {
4213 JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201,
4214 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
4221 const int mime_encode_method[] = {
4222 'B', 'B','Q', 'B', 'B', 'Q',
4223 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
4231 #define MAXRECOVER 20
4236 if (i_getc!=mime_getc) {
4237 i_mgetc = i_getc; i_getc = mime_getc;
4238 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
4239 if(mime_f==STRICT_MIME) {
4240 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
4241 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
4247 unswitch_mime_getc()
4249 if(mime_f==STRICT_MIME) {
4250 i_mgetc = i_mgetc_buf;
4251 i_mungetc = i_mungetc_buf;
4254 i_ungetc = i_mungetc;
4255 if(mime_iconv_back)set_iconv(FALSE, mime_iconv_back);
4256 mime_iconv_back = NULL;
4260 mime_begin_strict(f)
4265 const unsigned char *p,*q;
4266 int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
4268 mime_decode_mode = FALSE;
4269 /* =? has been checked */
4271 p = mime_pattern[j];
4274 for(i=2;p[i]>' ';i++) { /* start at =? */
4275 if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) {
4276 /* pattern fails, try next one */
4278 while ((p = mime_pattern[++j])) {
4279 for(k=2;k<i;k++) /* assume length(p) > i */
4280 if (p[k]!=q[k]) break;
4281 if (k==i && nkf_toupper(c1)==p[k]) break;
4283 if (p) continue; /* found next one, continue */
4284 /* all fails, output from recovery buffer */
4292 mime_decode_mode = p[i-2];
4294 mime_iconv_back = iconv;
4295 set_iconv(FALSE, mime_priority_func[j]);
4296 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
4298 if (mime_decode_mode=='B') {
4299 mimebuf_f = unbuf_f;
4301 /* do MIME integrity check */
4302 return mime_integrity(f,mime_pattern[j]);
4314 /* we don't keep eof of Fifo, becase it contains ?= as
4315 a terminator. It was checked in mime_integrity. */
4316 return ((mimebuf_f)?
4317 (*i_mgetc_buf)(f):Fifo(mime_input++));
4321 mime_ungetc_buf(c,f)
4326 (*i_mungetc_buf)(c,f);
4328 Fifo(--mime_input)=c;
4339 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
4340 /* re-read and convert again from mime_buffer. */
4342 /* =? has been checked */
4344 Fifo(mime_last++)='='; Fifo(mime_last++)='?';
4345 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
4346 /* We accept any character type even if it is breaked by new lines */
4347 c1 = (*i_getc)(f); Fifo(mime_last++)= c1 ;
4348 if (c1=='\n'||c1==' '||c1=='\r'||
4349 c1=='-'||c1=='_'||is_alnum(c1) ) continue;
4351 /* Failed. But this could be another MIME preemble */
4359 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
4360 if (!(++i<MAXRECOVER) || c1==EOF) break;
4361 if (c1=='b'||c1=='B') {
4362 mime_decode_mode = 'B';
4363 } else if (c1=='q'||c1=='Q') {
4364 mime_decode_mode = 'Q';
4368 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
4369 if (!(++i<MAXRECOVER) || c1==EOF) break;
4371 mime_decode_mode = FALSE;
4377 if (!mime_decode_mode) {
4378 /* false MIME premble, restart from mime_buffer */
4379 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
4380 /* Since we are in MIME mode until buffer becomes empty, */
4381 /* we never go into mime_begin again for a while. */
4384 /* discard mime preemble, and goto MIME mode */
4386 /* do no MIME integrity check */
4387 return c1; /* used only for checking EOF */
4402 fprintf(stderr, "%s\n", str);
4408 set_input_codename (codename)
4413 strcmp(codename, "") != 0 &&
4414 strcmp(codename, input_codename) != 0)
4416 is_inputcode_mixed = TRUE;
4418 input_codename = codename;
4419 is_inputcode_set = TRUE;
4422 #if !defined(PERL_XS) && !defined(WIN32DLL)
4424 print_guessed_code (filename)
4427 char *codename = "BINARY";
4428 if (!is_inputcode_mixed) {
4429 if (strcmp(input_codename, "") == 0) {
4432 codename = input_codename;
4435 if (filename != NULL) printf("%s:", filename);
4436 printf("%s\n", codename);
4442 #ifdef ANSI_C_PROTOTYPE
4443 int hex_getc(int ch, FILE *f, int (*g)(FILE *f), int (*u)(int c, FILE *f))
4446 hex_getc(ch, f, g, u)
4459 if (!nkf_isxdigit(c2)){
4464 if (!nkf_isxdigit(c3)){
4469 return (hex2bin(c2) << 4) | hex2bin(c3);
4476 return hex_getc(':', f, i_cgetc, i_cungetc);
4484 return (*i_cungetc)(c, f);
4491 return hex_getc('%', f, i_ugetc, i_uungetc);
4499 return (*i_uungetc)(c, f);
4503 #ifdef NUMCHAR_OPTION
4508 int (*g)() = i_ngetc;
4509 int (*u)() = i_nungetc;
4520 if (buf[i] == 'x' || buf[i] == 'X'){
4521 for (j = 0; j < 5; j++){
4523 if (!nkf_isxdigit(buf[i])){
4530 c |= hex2bin(buf[i]);
4533 for (j = 0; j < 6; j++){
4537 if (!nkf_isdigit(buf[i])){
4544 c += hex2bin(buf[i]);
4550 return CLASS_UTF16 | c;
4560 numchar_ungetc(c, f)
4564 return (*i_nungetc)(c, f);
4568 #ifdef UNICODE_NORMALIZATION
4570 /* Normalization Form C */
4575 int (*g)() = i_nfc_getc;
4576 int (*u)() = i_nfc_ungetc;
4577 int i=0, j, k=1, lower, upper;
4579 const int *array = NULL;
4580 extern const struct normalization_pair normalization_table[];
4583 while (k > 0 && ((buf[i] & 0xc0) != 0x80)){
4584 lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
4585 while (upper >= lower) {
4586 j = (lower+upper) / 2;
4587 array = normalization_table[j].nfd;
4588 for (k=0; k < NORMALIZATION_TABLE_NFD_LENGTH && array[k]; k++){
4589 if (array[k] != buf[k]){
4590 array[k] < buf[k] ? (lower = j + 1) : (upper = j - 1);
4597 array = normalization_table[j].nfc;
4598 for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++)
4615 return (*i_nfc_ungetc)(c, f);
4617 #endif /* UNICODE_NORMALIZATION */
4624 int c1, c2, c3, c4, cc;
4625 int t1, t2, t3, t4, mode, exit_mode;
4629 int lwsp_size = 128;
4631 if (mime_top != mime_last) { /* Something is in FIFO */
4632 return Fifo(mime_top++);
4634 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
4635 mime_decode_mode=FALSE;
4636 unswitch_mime_getc();
4637 return (*i_getc)(f);
4640 if (mimebuf_f == FIXED_MIME)
4641 exit_mode = mime_decode_mode;
4644 if (mime_decode_mode == 'Q') {
4645 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4647 if (c1=='_') return ' ';
4648 if (c1<=' ' || DEL<=c1) {
4649 mime_decode_mode = exit_mode; /* prepare for quit */
4652 if (c1!='=' && c1!='?') {
4656 mime_decode_mode = exit_mode; /* prepare for quit */
4657 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
4658 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
4659 /* end Q encoding */
4660 input_mode = exit_mode;
4662 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
4663 if (lwsp_buf==NULL) {
4664 perror("can't malloc");
4667 while ((c1=(*i_getc)(f))!=EOF) {
4672 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4680 if ((c1=(*i_getc)(f))!=EOF && c1 == NL) {
4681 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4696 lwsp_buf[lwsp_count] = c1;
4697 if (lwsp_count++>lwsp_size){
4699 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4700 if (lwsp_buf_new==NULL) {
4703 perror("can't realloc");
4706 lwsp_buf = lwsp_buf_new;
4712 if (lwsp_count > 0) {
4713 if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) {
4717 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4718 i_ungetc(lwsp_buf[lwsp_count],f);
4726 if (c1=='='&&c2<' ') { /* this is soft wrap */
4727 while((c1 = (*i_mgetc)(f)) <=' ') {
4728 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4730 mime_decode_mode = 'Q'; /* still in MIME */
4731 goto restart_mime_q;
4734 mime_decode_mode = 'Q'; /* still in MIME */
4738 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
4739 if (c2<=' ') return c2;
4740 mime_decode_mode = 'Q'; /* still in MIME */
4741 #define hex(c) (('0'<=c&&c<='9')?(c-'0'):\
4742 ('A'<=c&&c<='F')?(c-'A'+10):('a'<=c&&c<='f')?(c-'a'+10):0)
4743 return ((hex(c2)<<4) + hex(c3));
4746 if (mime_decode_mode != 'B') {
4747 mime_decode_mode = FALSE;
4748 return (*i_mgetc)(f);
4752 /* Base64 encoding */
4754 MIME allows line break in the middle of
4755 Base64, but we are very pessimistic in decoding
4756 in unbuf mode because MIME encoded code may broken by
4757 less or editor's control sequence (such as ESC-[-K in unbuffered
4758 mode. ignore incomplete MIME.
4760 mode = mime_decode_mode;
4761 mime_decode_mode = exit_mode; /* prepare for quit */
4763 while ((c1 = (*i_mgetc)(f))<=' ') {
4768 if ((c2 = (*i_mgetc)(f))<=' ') {
4771 if (mime_f != STRICT_MIME) goto mime_c2_retry;
4772 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4775 if ((c1 == '?') && (c2 == '=')) {
4778 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
4779 if (lwsp_buf==NULL) {
4780 perror("can't malloc");
4783 while ((c1=(*i_getc)(f))!=EOF) {
4788 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4796 if ((c1=(*i_getc)(f))!=EOF) {
4800 } else if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4815 lwsp_buf[lwsp_count] = c1;
4816 if (lwsp_count++>lwsp_size){
4818 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4819 if (lwsp_buf_new==NULL) {
4822 perror("can't realloc");
4825 lwsp_buf = lwsp_buf_new;
4831 if (lwsp_count > 0) {
4832 if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) {
4836 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4837 i_ungetc(lwsp_buf[lwsp_count],f);
4846 if ((c3 = (*i_mgetc)(f))<=' ') {
4849 if (mime_f != STRICT_MIME) goto mime_c3_retry;
4850 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4854 if ((c4 = (*i_mgetc)(f))<=' ') {
4857 if (mime_f != STRICT_MIME) goto mime_c4_retry;
4858 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4862 mime_decode_mode = mode; /* still in MIME sigh... */
4864 /* BASE 64 decoding */
4866 t1 = 0x3f & base64decode(c1);
4867 t2 = 0x3f & base64decode(c2);
4868 t3 = 0x3f & base64decode(c3);
4869 t4 = 0x3f & base64decode(c4);
4870 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
4872 Fifo(mime_last++) = cc;
4873 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
4875 Fifo(mime_last++) = cc;
4876 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
4878 Fifo(mime_last++) = cc;
4883 return Fifo(mime_top++);
4891 Fifo(--mime_top) = c;
4898 const unsigned char *p;
4902 /* In buffered mode, read until =? or NL or buffer full
4904 mime_input = mime_top;
4905 mime_last = mime_top;
4907 while(*p) Fifo(mime_input++) = *p++;
4910 while((c=(*i_getc)(f))!=EOF) {
4911 if (((mime_input-mime_top)&MIME_BUF_MASK)==0) {
4912 break; /* buffer full */
4914 if (c=='=' && d=='?') {
4915 /* checked. skip header, start decode */
4916 Fifo(mime_input++) = c;
4917 /* mime_last_input = mime_input; */
4922 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
4924 /* Should we check length mod 4? */
4925 Fifo(mime_input++) = c;
4928 /* In case of Incomplete MIME, no MIME decode */
4929 Fifo(mime_input++) = c;
4930 mime_last = mime_input; /* point undecoded buffer */
4931 mime_decode_mode = 1; /* no decode on Fifo last in mime_getc */
4932 switch_mime_getc(); /* anyway we need buffered getc */
4943 i = c - 'A'; /* A..Z 0-25 */
4945 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
4947 } else if (c > '/') {
4948 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
4949 } else if (c == '+') {
4950 i = '>' /* 62 */ ; /* + 62 */
4952 i = '?' /* 63 */ ; /* / 63 */
4957 STATIC const char basis_64[] =
4958 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
4961 #define MIMEOUT_BUF_LENGTH (60)
4962 char mimeout_buf[MIMEOUT_BUF_LENGTH+1];
4963 int mimeout_buf_count = 0;
4964 int mimeout_preserve_space = 0;
4965 #define itoh4(c) (c>=10?c+'A'-10:c+'0')
4971 const unsigned char *p;
4974 p = mime_pattern[0];
4975 for(i=0;mime_encode[i];i++) {
4976 if (mode == mime_encode[i]) {
4977 p = mime_pattern[i];
4981 mimeout_mode = mime_encode_method[i];
4984 if (base64_count>45) {
4985 if (mimeout_buf_count>0 && nkf_isblank(mimeout_buf[i])){
4986 (*o_mputc)(mimeout_buf[i]);
4992 if (!mimeout_preserve_space && mimeout_buf_count>0
4993 && (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
4994 || mimeout_buf[i]==CR || mimeout_buf[i]==NL )) {
4998 if (!mimeout_preserve_space) {
4999 for (;i<mimeout_buf_count;i++) {
5000 if (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
5001 || mimeout_buf[i]==CR || mimeout_buf[i]==NL ) {
5002 (*o_mputc)(mimeout_buf[i]);
5009 mimeout_preserve_space = FALSE;
5015 j = mimeout_buf_count;
5016 mimeout_buf_count = 0;
5018 mime_putc(mimeout_buf[i]);
5034 switch(mimeout_mode) {
5039 (*o_mputc)(basis_64[((b64c & 0x3)<< 4)]);
5045 (*o_mputc)(basis_64[((b64c & 0xF) << 2)]);
5051 if (mimeout_f!=FIXED_MIME) {
5053 } else if (mimeout_mode != 'Q')
5062 switch(mimeout_mode) {
5067 } else if (c==CR||c==NL) {
5070 } else if(c<SPACE||c=='='||c=='?'||c=='_'||DEL<=c) {
5072 (*o_mputc)(itoh4(((c>>4)&0xf)));
5073 (*o_mputc)(itoh4((c&0xf)));
5082 (*o_mputc)(basis_64[c>>2]);
5087 (*o_mputc)(basis_64[((b64c & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
5093 (*o_mputc)(basis_64[((b64c & 0xF) << 2) | ((c & 0xC0) >>6)]);
5094 (*o_mputc)(basis_64[c & 0x3F]);
5105 int mime_lastchar2, mime_lastchar1;
5107 void mime_prechar(c2, c1)
5112 if (base64_count + mimeout_buf_count/3*4> 66){
5113 (*o_base64conv)(EOF,0);
5114 (*o_base64conv)(0,NL);
5115 (*o_base64conv)(0,SPACE);
5117 }/*else if (mime_lastchar2){
5118 if (c1 <=DEL && !nkf_isspace(c1)){
5119 (*o_base64conv)(0,SPACE);
5123 if (c2 && mime_lastchar2 == 0
5124 && mime_lastchar1 && !nkf_isspace(mime_lastchar1)){
5125 (*o_base64conv)(0,SPACE);
5128 mime_lastchar2 = c2;
5129 mime_lastchar1 = c1;
5140 if (mimeout_f == FIXED_MIME){
5141 if (mimeout_mode == 'Q'){
5142 if (base64_count > 71){
5143 if (c!=CR && c!=NL) {
5150 if (base64_count > 71){
5155 if (c == EOF) { /* c==EOF */
5159 if (c != EOF) { /* c==EOF */
5165 /* mimeout_f != FIXED_MIME */
5167 if (c == EOF) { /* c==EOF */
5168 j = mimeout_buf_count;
5169 mimeout_buf_count = 0;
5172 /*if (nkf_isspace(mimeout_buf[i])){
5175 mimeout_addchar(mimeout_buf[i]);
5179 (*o_mputc)(mimeout_buf[i]);
5185 if (mimeout_mode=='Q') {
5186 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
5198 if (mimeout_buf_count > 0){
5199 lastchar = mimeout_buf[mimeout_buf_count - 1];
5204 if (!mimeout_mode) {
5205 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1)) {
5206 if (nkf_isspace(c)) {
5207 if (c==CR || c==NL) {
5210 for (i=0;i<mimeout_buf_count;i++) {
5211 (*o_mputc)(mimeout_buf[i]);
5212 if (mimeout_buf[i] == CR || mimeout_buf[i] == NL){
5219 mimeout_buf_count = 1;
5221 if (base64_count > 1
5222 && base64_count + mimeout_buf_count > 76){
5225 if (!nkf_isspace(mimeout_buf[0])){
5230 mimeout_buf[mimeout_buf_count++] = c;
5231 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
5232 open_mime(output_mode);
5237 if (lastchar==CR || lastchar == NL){
5238 for (i=0;i<mimeout_buf_count;i++) {
5239 (*o_mputc)(mimeout_buf[i]);
5242 mimeout_buf_count = 0;
5244 if (lastchar==SPACE) {
5245 for (i=0;i<mimeout_buf_count-1;i++) {
5246 (*o_mputc)(mimeout_buf[i]);
5249 mimeout_buf[0] = SPACE;
5250 mimeout_buf_count = 1;
5252 open_mime(output_mode);
5255 /* mimeout_mode == 'B', 1, 2 */
5256 if ( c<=DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
5257 if (lastchar == CR || lastchar == NL){
5258 if (nkf_isblank(c)) {
5259 for (i=0;i<mimeout_buf_count;i++) {
5260 mimeout_addchar(mimeout_buf[i]);
5262 mimeout_buf_count = 0;
5263 } else if (SPACE<c && c<DEL) {
5265 for (i=0;i<mimeout_buf_count;i++) {
5266 (*o_mputc)(mimeout_buf[i]);
5269 mimeout_buf_count = 0;
5272 if (c==SPACE || c==TAB || c==CR || c==NL) {
5273 for (i=0;i<mimeout_buf_count;i++) {
5274 if (SPACE<mimeout_buf[i] && mimeout_buf[i]<DEL) {
5276 for (i=0;i<mimeout_buf_count;i++) {
5277 (*o_mputc)(mimeout_buf[i]);
5280 mimeout_buf_count = 0;
5283 mimeout_buf[mimeout_buf_count++] = c;
5284 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
5286 for (i=0;i<mimeout_buf_count;i++) {
5287 (*o_mputc)(mimeout_buf[i]);
5290 mimeout_buf_count = 0;
5294 if (mimeout_buf_count>0 && SPACE<c && c!='=') {
5295 mimeout_buf[mimeout_buf_count++] = c;
5296 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
5297 j = mimeout_buf_count;
5298 mimeout_buf_count = 0;
5300 mimeout_addchar(mimeout_buf[i]);
5307 if (mimeout_buf_count>0) {
5308 j = mimeout_buf_count;
5309 mimeout_buf_count = 0;
5311 if (mimeout_buf[i]==CR || mimeout_buf[i]==NL)
5313 mimeout_addchar(mimeout_buf[i]);
5319 (*o_mputc)(mimeout_buf[i]);
5321 open_mime(output_mode);
5328 #if defined(PERL_XS) || defined(WIN32DLL)
5333 struct input_code *p = input_code_list;
5346 mime_f = STRICT_MIME;
5347 mime_decode_f = FALSE;
5352 #if defined(MSDOS) || defined(__OS2__)
5357 iso2022jp_f = FALSE;
5358 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
5359 internal_unicode_f = FALSE;
5361 #ifdef UTF8_OUTPUT_ENABLE
5364 ms_ucs_map_f = FALSE;
5365 strict_mapping_f = TRUE;
5366 disable_cp932ext_f = FALSE;
5367 ignore_zwnbsp_f = TRUE;
5368 unicode_round_trip_f = FALSE;
5369 encode_fallback = NULL;
5370 unicode_subchar = '?';
5372 #ifdef UNICODE_NORMALIZATION
5385 is_inputcode_mixed = FALSE;
5386 is_inputcode_set = FALSE;
5390 #ifdef SHIFTJIS_CP932
5400 for (i = 0; i < 256; i++){
5401 prefix_table[i] = 0;
5404 #ifdef UTF8_INPUT_ENABLE
5405 utf16_mode = UTF16BE_INPUT;
5407 mimeout_buf_count = 0;
5412 fold_preserve_f = FALSE;
5415 kanji_intro = DEFAULT_J;
5416 ascii_intro = DEFAULT_R;
5417 fold_margin = FOLD_MARGIN;
5418 output_conv = DEFAULT_CONV;
5419 oconv = DEFAULT_CONV;
5420 o_zconv = no_connection;
5421 o_fconv = no_connection;
5422 o_crconv = no_connection;
5423 o_rot_conv = no_connection;
5424 o_hira_conv = no_connection;
5425 o_base64conv = no_connection;
5426 o_iso2022jp_check_conv = no_connection;
5429 i_ungetc = std_ungetc;
5431 i_bungetc = std_ungetc;
5434 i_mungetc = std_ungetc;
5435 i_mgetc_buf = std_getc;
5436 i_mungetc_buf = std_ungetc;
5437 output_mode = ASCII;
5440 mime_decode_mode = FALSE;
5446 z_prev2=0,z_prev1=0;
5448 iconv_for_check = 0;
5450 input_codename = "";
5458 no_connection(c2,c1)
5461 no_connection2(c2,c1,0);
5465 no_connection2(c2,c1,c0)
5468 fprintf(stderr,"nkf internal module connection failure.\n");
5470 return 0; /* LINT */
5475 #define fprintf dllprintf
5480 fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
5481 fprintf(stderr,"Flags:\n");
5482 fprintf(stderr,"b,u Output is buffered (DEFAULT),Output is unbuffered\n");
5483 #ifdef DEFAULT_CODE_SJIS
5484 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC), UTF-8N\n");
5486 #ifdef DEFAULT_CODE_JIS
5487 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC), UTF-8N\n");
5489 #ifdef DEFAULT_CODE_EUC
5490 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT), UTF-8N\n");
5492 #ifdef DEFAULT_CODE_UTF8
5493 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC), UTF-8N (DEFAULT)\n");
5495 #ifdef UTF8_OUTPUT_ENABLE
5496 fprintf(stderr," After 'w' you can add more options. (80?|16((B|L)0?)?) \n");
5498 fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC), UTF-8\n");
5499 #ifdef UTF8_INPUT_ENABLE
5500 fprintf(stderr," After 'W' you can add more options. (8|16(B|L)?) \n");
5502 fprintf(stderr,"t no conversion\n");
5503 fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n");
5504 fprintf(stderr,"r {de/en}crypt ROT13/47\n");
5505 fprintf(stderr,"h 1 katakana->hiragana, 2 hiragana->katakana, 3 both\n");
5506 fprintf(stderr,"v Show this usage. V: show version\n");
5507 fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
5508 fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
5509 fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
5510 fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
5511 fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces,\n");
5512 fprintf(stderr," 3: Convert HTML Entity\n");
5513 fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
5514 fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
5516 fprintf(stderr,"T Text mode output\n");
5518 fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
5519 fprintf(stderr,"d,c Delete \\r in line feed and \\032, Add \\r in line feed\n");
5520 fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n");
5521 fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
5522 fprintf(stderr,"long name options\n");
5523 fprintf(stderr," --ic=<input codeset> --oc=<output codeset> set the input or output codeset\n");
5524 fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n");
5525 fprintf(stderr," --jis,--euc,--sjis,--utf8,--utf16,--mime,--base64 convert for the code\n");
5526 fprintf(stderr," --hiragana, --katakana Hiragana/Katakana Conversion\n");
5527 fprintf(stderr," --prefix= Insert escape before troublesome characters of Shift_JIS\n");
5529 fprintf(stderr," --cap-input, --url-input Convert hex after ':' or '%%'\n");
5531 #ifdef NUMCHAR_OPTION
5532 fprintf(stderr," --numchar-input Convert Unicode Character Reference\n");
5534 #ifdef UTF8_INPUT_ENABLE
5535 fprintf(stderr," --fb-{skip, html, xml, perl, java, subchar}\n");
5536 fprintf(stderr," set the way nkf handles unassigned characters\n");
5539 fprintf(stderr," --overwrite Overwrite original listed files by filtered result\n");
5541 fprintf(stderr," -g, --guess Guess the input code\n");
5542 fprintf(stderr," --help,--version\n");
5549 fprintf(stderr,"Network Kanji Filter Version %s (%s) "
5550 #if defined(MSDOS) && !defined(__WIN32__) && !defined(__WIN16__)
5553 #if defined(MSDOS) && defined(__WIN16__)
5556 #if defined(MSDOS) && defined(__WIN32__)
5562 ,NKF_VERSION,NKF_RELEASE_DATE);
5563 fprintf(stderr,"\n%s\n",CopyRight);
5568 **
\e$B%Q%C%A@):n<T
\e(B
5569 ** void@merope.pleiades.or.jp (Kusakabe Youichi)
5570 ** NIDE Naoyuki <nide@ics.nara-wu.ac.jp>
5571 ** ohta@src.ricoh.co.jp (Junn Ohta)
5572 ** inouet@strl.nhk.or.jp (Tomoyuki Inoue)
5573 ** kiri@pulser.win.or.jp (Tetsuaki Kiriyama)
5574 ** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp>
5575 ** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe)
5576 ** kono@ie.u-ryukyu.ac.jp (Shinji Kono)
5577 ** GHG00637@nifty-serve.or.jp (COW)