1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
5 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
6 ** Copyright (C) 1996,1998
8 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
9 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
10 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
11 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
13 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
14 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
15 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
16 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
17 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
18 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
20 ** Everyone is permitted to do anything on this program
21 ** including copying, modifying, improving,
22 ** as long as you don't try to pretend that you wrote it.
23 ** i.e., the above copyright notice has to appear in all copies.
24 ** Binary distribution requires original version messages.
25 ** You don't have to ask before copying, redistribution or publishing.
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
29 /***********************************************************************
30 ** UTF-8
\e$B%5%]!<%H$K$D$$$F
\e(B
31 **
\e$B=>Mh$N
\e(B nkf
\e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9
\e(B
32 ** nkf -e
\e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G
\e(B UTF-8
\e$B$HH=Dj$5$l$l$P!"
\e(B
33 **
\e$B$=$N$^$^
\e(B euc-jp
\e$B$KJQ49$5$l$^$9
\e(B
35 **
\e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#
\e(B
36 ** (
\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O
\e(B)
38 **
\e$B2?$+LdBj$r8+$D$1$?$i!"
\e(B
39 ** E-Mail: furukawa@tcp-ip.or.jp
40 **
\e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#
\e(B
41 ***********************************************************************/
42 /* $Id: nkf.c,v 1.79 2005/11/05 03:44:32 naruse Exp $ */
43 #define NKF_VERSION "2.0.5"
44 #define NKF_RELEASE_DATE "2005-10-28"
48 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2005 Kono, Furukawa, Naruse"
55 ** USAGE: nkf [flags] [file]
58 ** b Output is buffered (DEFAULT)
59 ** u Output is unbuffered
63 ** j Outout code is JIS 7 bit (DEFAULT SELECT)
64 ** s Output code is MS Kanji (DEFAULT SELECT)
65 ** e Output code is AT&T JIS (DEFAULT SELECT)
66 ** w Output code is AT&T JIS (DEFAULT SELECT)
67 ** l Output code is JIS 7bit and ISO8859-1 Latin-1
69 ** m MIME conversion for ISO-2022-JP
70 ** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
71 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
72 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
73 ** M MIME output conversion
75 ** r {de/en}crypt ROT13/47
79 ** T Text mode output (for MS-DOS)
81 ** x Do not convert X0201 kana into X0208
82 ** Z Convert X0208 alphabet to ASCII
87 ** B try to fix broken JIS, missing Escape
88 ** B[1-9] broken level
90 ** O Output to 'nkf.out' file or last file name
91 ** d Delete \r in line feed
92 ** c Add \r in line feed
93 ** -- other long option
94 ** -- ignore following option (don't use with -O )
98 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__)) && !defined(MSDOS)
100 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
116 #if defined(MSDOS) || defined(__OS2__)
123 #define setbinmode(fp) fsetbin(fp)
124 #else /* Microsoft C, Turbo C */
125 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
127 #else /* UNIX,OS/2 */
128 #define setbinmode(fp)
131 #ifdef _IOFBF /* SysV and MSDOS, Windows */
132 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
134 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
137 /*Borland C++ 4.5 EasyWin*/
138 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
147 /* added by satoru@isoternet.org */
148 #include <sys/stat.h>
149 #ifndef MSDOS /* UNIX, OS/2 */
152 #else /* defined(MSDOS) */
154 #ifdef __BORLANDC__ /* BCC32 */
156 #else /* !defined(__BORLANDC__) */
157 #include <sys/utime.h>
158 #endif /* (__BORLANDC__) */
159 #else /* !defined(__WIN32__) */
160 #if defined(_MSC_VER) || defined(__MINGW32__) /* VC++, MinGW */
161 #include <sys/utime.h>
162 #elif defined(__TURBOC__) /* BCC */
164 #elif defined(LSI_C) /* LSI C */
165 #endif /* (__WIN32__) */
177 /* state of output_mode and input_mode
195 /* Input Assumption */
199 #define LATIN1_INPUT 6
201 #define STRICT_MIME 8
206 #define JAPANESE_EUC 10
210 #define UTF8_INPUT 13
211 #define UTF16BE_INPUT 14
212 #define UTF16LE_INPUT 15
232 #define is_alnum(c) \
233 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
235 #define HOLD_SIZE 1024
236 #define IOBUF_SIZE 16384
238 #define DEFAULT_J 'B'
239 #define DEFAULT_R 'B'
241 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
242 #define SJ6394 0x0161 /* 63 - 94 ku offset */
244 #define RANGE_NUM_MAX 18
249 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
250 #define sizeof_euc_utf8 94
251 #define sizeof_euc_to_utf8_1byte 94
252 #define sizeof_euc_to_utf8_2bytes 94
253 #define sizeof_utf8_to_euc_C2 64
254 #define sizeof_utf8_to_euc_E5B8 64
255 #define sizeof_utf8_to_euc_2bytes 112
256 #define sizeof_utf8_to_euc_3bytes 112
259 /* MIME preprocessor */
261 #ifdef EASYWIN /*Easy Win */
262 extern POINT _BufferSize;
265 /* function prototype */
267 #ifdef ANSI_C_PROTOTYPE
269 #define STATIC static
283 void (*status_func)PROTO((struct input_code *, int));
284 int (*iconv_func)PROTO((int c2, int c1, int c0));
288 STATIC char *input_codename = "";
291 STATIC const char *CopyRight = COPY_RIGHT;
293 #if !defined(PERL_XS) && !defined(WIN32DLL)
294 STATIC int noconvert PROTO((FILE *f));
296 STATIC int kanji_convert PROTO((FILE *f));
297 STATIC int h_conv PROTO((FILE *f,int c2,int c1));
298 STATIC int push_hold_buf PROTO((int c2));
299 STATIC void set_iconv PROTO((int f, int (*iconv_func)(int c2,int c1,int c0)));
300 STATIC int s_iconv PROTO((int c2,int c1,int c0));
301 STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
302 STATIC int e_iconv PROTO((int c2,int c1,int c0));
303 #ifdef UTF8_INPUT_ENABLE
304 STATIC void encode_fallback_html PROTO((int c));
305 STATIC void encode_fallback_xml PROTO((int c));
306 STATIC void encode_fallback_java PROTO((int c));
307 STATIC void encode_fallback_perl PROTO((int c));
308 STATIC void encode_fallback_subchar PROTO((int c));
309 STATIC void (*encode_fallback)PROTO((int c)) = NULL;
310 STATIC int w2e_conv PROTO((int c2,int c1,int c0,int *p2,int *p1));
311 STATIC int w_iconv PROTO((int c2,int c1,int c0));
312 STATIC int w_iconv16 PROTO((int c2,int c1,int c0));
313 STATIC int w_iconv_common PROTO((int c1,int c0,const unsigned short *const *pp,int psize,int *p2,int *p1));
314 STATIC int ww16_conv PROTO((int c2, int c1, int c0));
316 #ifdef UTF8_OUTPUT_ENABLE
317 STATIC int e2w_conv PROTO((int c2,int c1));
318 STATIC void w_oconv PROTO((int c2,int c1));
319 STATIC void w_oconv16 PROTO((int c2,int c1));
321 STATIC void e_oconv PROTO((int c2,int c1));
322 STATIC int e2s_conv PROTO((int c2, int c1, int *p2, int *p1));
323 STATIC void s_oconv PROTO((int c2,int c1));
324 STATIC void j_oconv PROTO((int c2,int c1));
325 STATIC void fold_conv PROTO((int c2,int c1));
326 STATIC void cr_conv PROTO((int c2,int c1));
327 STATIC void z_conv PROTO((int c2,int c1));
328 STATIC void rot_conv PROTO((int c2,int c1));
329 STATIC void hira_conv PROTO((int c2,int c1));
330 STATIC void base64_conv PROTO((int c2,int c1));
331 STATIC void iso2022jp_check_conv PROTO((int c2,int c1));
332 STATIC void no_connection PROTO((int c2,int c1));
333 STATIC int no_connection2 PROTO((int c2,int c1,int c0));
335 STATIC void code_score PROTO((struct input_code *ptr));
336 STATIC void code_status PROTO((int c));
338 STATIC void std_putc PROTO((int c));
339 STATIC int std_getc PROTO((FILE *f));
340 STATIC int std_ungetc PROTO((int c,FILE *f));
342 STATIC int broken_getc PROTO((FILE *f));
343 STATIC int broken_ungetc PROTO((int c,FILE *f));
345 STATIC int mime_begin PROTO((FILE *f));
346 STATIC int mime_getc PROTO((FILE *f));
347 STATIC int mime_ungetc PROTO((int c,FILE *f));
349 STATIC int mime_begin_strict PROTO((FILE *f));
350 STATIC int mime_getc_buf PROTO((FILE *f));
351 STATIC int mime_ungetc_buf PROTO((int c,FILE *f));
352 STATIC int mime_integrity PROTO((FILE *f,const unsigned char *p));
354 STATIC int base64decode PROTO((int c));
355 STATIC void mime_prechar PROTO((int c2, int c1));
356 STATIC void mime_putc PROTO((int c));
357 STATIC void open_mime PROTO((int c));
358 STATIC void close_mime PROTO(());
360 STATIC void usage PROTO(());
361 STATIC void version PROTO(());
363 STATIC void options PROTO((unsigned char *c));
364 #if defined(PERL_XS) || defined(WIN32DLL)
365 STATIC void reinit PROTO(());
370 #if !defined(PERL_XS) && !defined(WIN32DLL)
371 STATIC unsigned char stdibuf[IOBUF_SIZE];
372 STATIC unsigned char stdobuf[IOBUF_SIZE];
374 STATIC unsigned char hold_buf[HOLD_SIZE*2];
375 STATIC int hold_count;
377 /* MIME preprocessor fifo */
379 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
380 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
381 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
382 STATIC unsigned char mime_buf[MIME_BUF_SIZE];
383 STATIC unsigned int mime_top = 0;
384 STATIC unsigned int mime_last = 0; /* decoded */
385 STATIC unsigned int mime_input = 0; /* undecoded */
386 STATIC int (*mime_iconv_back)PROTO((int c2,int c1,int c0)) = NULL;
389 STATIC int unbuf_f = FALSE;
390 STATIC int estab_f = FALSE;
391 STATIC int nop_f = FALSE;
392 STATIC int binmode_f = TRUE; /* binary mode */
393 STATIC int rot_f = FALSE; /* rot14/43 mode */
394 STATIC int hira_f = FALSE; /* hira/kata henkan */
395 STATIC int input_f = FALSE; /* non fixed input code */
396 STATIC int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
397 STATIC int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
398 STATIC int mime_decode_f = FALSE; /* mime decode is explicitly on */
399 STATIC int mimebuf_f = FALSE; /* MIME buffered input */
400 STATIC int broken_f = FALSE; /* convert ESC-less broken JIS */
401 STATIC int iso8859_f = FALSE; /* ISO8859 through */
402 STATIC int mimeout_f = FALSE; /* base64 mode */
403 #if defined(MSDOS) || defined(__OS2__)
404 STATIC int x0201_f = TRUE; /* Assume JISX0201 kana */
406 STATIC int x0201_f = NO_X0201; /* Assume NO JISX0201 */
408 STATIC int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
409 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
410 STATIC int internal_unicode_f = FALSE; /* Internal Unicode Processing */
412 #ifdef UTF8_OUTPUT_ENABLE
413 STATIC int unicode_bom_f= 0; /* Output Unicode BOM */
414 STATIC int w_oconv16_LE = 0; /* utf-16 little endian */
415 STATIC int ms_ucs_map_f = FALSE; /* Microsoft UCS Mapping Compatible */
416 STATIC int unicode_subchar = '?'; /* the regular substitution character */
419 #ifdef UNICODE_NORMALIZATION
420 STATIC int nfc_f = FALSE;
421 STATIC int (*i_nfc_getc)PROTO((FILE *)) = std_getc; /* input of ugetc */
422 STATIC int (*i_nfc_ungetc)PROTO((int c ,FILE *f)) = std_ungetc;
423 STATIC int nfc_getc PROTO((FILE *f));
424 STATIC int nfc_ungetc PROTO((int c,FILE *f));
428 STATIC int cap_f = FALSE;
429 STATIC int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */
430 STATIC int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc;
431 STATIC int cap_getc PROTO((FILE *f));
432 STATIC int cap_ungetc PROTO((int c,FILE *f));
434 STATIC int url_f = FALSE;
435 STATIC int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
436 STATIC int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc;
437 STATIC int url_getc PROTO((FILE *f));
438 STATIC int url_ungetc PROTO((int c,FILE *f));
441 #ifdef NUMCHAR_OPTION
442 #define CLASS_MASK 0x0f000000
443 #define CLASS_UTF16 0x01000000
444 STATIC int numchar_f = FALSE;
445 STATIC int (*i_ngetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
446 STATIC int (*i_nungetc)PROTO((int c ,FILE *f)) = std_ungetc;
447 STATIC int numchar_getc PROTO((FILE *f));
448 STATIC int numchar_ungetc PROTO((int c,FILE *f));
452 STATIC int noout_f = FALSE;
453 STATIC void no_putc PROTO((int c));
454 STATIC int debug_f = FALSE;
455 STATIC void debug PROTO((const char *str));
456 STATIC int (*iconv_for_check)() = 0;
459 STATIC int guess_f = FALSE;
461 STATIC void print_guessed_code PROTO((char *filename));
463 STATIC void set_input_codename PROTO((char *codename));
464 STATIC int is_inputcode_mixed = FALSE;
465 STATIC int is_inputcode_set = FALSE;
468 STATIC int exec_f = 0;
471 #ifdef SHIFTJIS_CP932
472 STATIC int cp932_f = TRUE;
473 #define CP932_TABLE_BEGIN (0xfa)
474 #define CP932_TABLE_END (0xfc)
476 STATIC int cp932inv_f = TRUE;
477 #define CP932INV_TABLE_BEGIN (0xed)
478 #define CP932INV_TABLE_END (0xee)
480 /* STATIC int cp932_conv PROTO((int c2, int c1)); */
481 #endif /* SHIFTJIS_CP932 */
484 STATIC int x0212_f = FALSE;
485 STATIC int x0212_shift PROTO((int c));
486 STATIC int x0212_unshift PROTO((int c));
489 STATIC unsigned char prefix_table[256];
491 STATIC void e_status PROTO((struct input_code *, int));
492 STATIC void s_status PROTO((struct input_code *, int));
494 #ifdef UTF8_INPUT_ENABLE
495 STATIC void w_status PROTO((struct input_code *, int));
496 STATIC void w16_status PROTO((struct input_code *, int));
497 STATIC int utf16_mode = UTF16BE_INPUT;
500 struct input_code input_code_list[] = {
501 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
502 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
503 #ifdef UTF8_INPUT_ENABLE
504 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
505 {"UTF-16", 0, 0, 0, {0, 0, 0}, w16_status, w_iconv16, 0},
510 STATIC int mimeout_mode = 0;
511 STATIC int base64_count = 0;
513 /* X0208 -> ASCII converter */
516 STATIC int f_line = 0; /* chars in line */
517 STATIC int f_prev = 0;
518 STATIC int fold_preserve_f = FALSE; /* preserve new lines */
519 STATIC int fold_f = FALSE;
520 STATIC int fold_len = 0;
523 STATIC unsigned char kanji_intro = DEFAULT_J;
524 STATIC unsigned char ascii_intro = DEFAULT_R;
528 #define FOLD_MARGIN 10
529 #define DEFAULT_FOLD 60
531 STATIC int fold_margin = FOLD_MARGIN;
535 #ifdef DEFAULT_CODE_JIS
536 # define DEFAULT_CONV j_oconv
538 #ifdef DEFAULT_CODE_SJIS
539 # define DEFAULT_CONV s_oconv
541 #ifdef DEFAULT_CODE_EUC
542 # define DEFAULT_CONV e_oconv
544 #ifdef DEFAULT_CODE_UTF8
545 # define DEFAULT_CONV w_oconv
548 /* process default */
549 STATIC void (*output_conv)PROTO((int c2,int c1)) = DEFAULT_CONV;
551 STATIC void (*oconv)PROTO((int c2,int c1)) = no_connection;
552 /* s_iconv or oconv */
553 STATIC int (*iconv)PROTO((int c2,int c1,int c0)) = no_connection2;
555 STATIC void (*o_zconv)PROTO((int c2,int c1)) = no_connection;
556 STATIC void (*o_fconv)PROTO((int c2,int c1)) = no_connection;
557 STATIC void (*o_crconv)PROTO((int c2,int c1)) = no_connection;
558 STATIC void (*o_rot_conv)PROTO((int c2,int c1)) = no_connection;
559 STATIC void (*o_hira_conv)PROTO((int c2,int c1)) = no_connection;
560 STATIC void (*o_base64conv)PROTO((int c2,int c1)) = no_connection;
561 STATIC void (*o_iso2022jp_check_conv)PROTO((int c2,int c1)) = no_connection;
563 /* STATIC redirections */
565 STATIC void (*o_putc)PROTO((int c)) = std_putc;
567 STATIC int (*i_getc)PROTO((FILE *f)) = std_getc; /* general input */
568 STATIC int (*i_ungetc)PROTO((int c,FILE *f)) =std_ungetc;
570 STATIC int (*i_bgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
571 STATIC int (*i_bungetc)PROTO((int c ,FILE *f)) = std_ungetc;
573 STATIC void (*o_mputc)PROTO((int c)) = std_putc ; /* output of mputc */
575 STATIC int (*i_mgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
576 STATIC int (*i_mungetc)PROTO((int c ,FILE *f)) = std_ungetc;
578 /* for strict mime */
579 STATIC int (*i_mgetc_buf)PROTO((FILE *)) = std_getc; /* input of mgetc_buf */
580 STATIC int (*i_mungetc_buf)PROTO((int c,FILE *f)) = std_ungetc;
583 STATIC int output_mode = ASCII, /* output kanji mode */
584 input_mode = ASCII, /* input kanji mode */
585 shift_mode = FALSE; /* TRUE shift out, or X0201 */
586 STATIC int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
588 /* X0201 / X0208 conversion tables */
590 /* X0201 kana conversion table */
593 unsigned char cv[]= {
594 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
595 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
596 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
597 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
598 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
599 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
600 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
601 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
602 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
603 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
604 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
605 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
606 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
607 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
608 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
609 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
613 /* X0201 kana conversion table for daguten */
616 unsigned char dv[]= {
617 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
618 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
619 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
620 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
621 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
622 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
623 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
624 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
625 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
626 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
627 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
628 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
629 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
630 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
631 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
632 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
635 /* X0201 kana conversion table for han-daguten */
638 unsigned char ev[]= {
639 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
640 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
641 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
642 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
643 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
644 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
645 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
646 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
647 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
648 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
649 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
650 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
651 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
652 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
653 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
654 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
658 /* X0208 kigou conversion table */
659 /* 0x8140 - 0x819e */
661 unsigned char fv[] = {
663 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
664 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
665 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
666 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
667 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
668 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
669 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
670 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
671 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
672 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
673 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
674 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
680 STATIC int file_out = FALSE;
682 STATIC int overwrite = FALSE;
685 STATIC int crmode_f = 0; /* CR, NL, CRLF */
686 #ifdef EASYWIN /*Easy Win */
687 STATIC int end_check;
690 #define STD_GC_BUFSIZE (256)
691 int std_gc_buf[STD_GC_BUFSIZE];
695 #include "nkf32dll.c"
696 #elif defined(PERL_XS)
706 char *outfname = NULL;
709 #ifdef EASYWIN /*Easy Win */
710 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
713 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
714 cp = (unsigned char *)*argv;
719 if (pipe(fds) < 0 || (pid = fork()) < 0){
730 execvp(argv[1], &argv[1]);
744 if(x0201_f == WISH_TRUE)
745 x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
747 if (binmode_f == TRUE)
749 if (freopen("","wb",stdout) == NULL)
756 setbuf(stdout, (char *) NULL);
758 setvbuffer(stdout, stdobuf, IOBUF_SIZE);
761 if (binmode_f == TRUE)
763 if (freopen("","rb",stdin) == NULL) return (-1);
767 setvbuffer(stdin, stdibuf, IOBUF_SIZE);
771 kanji_convert(stdin);
772 if (guess_f) print_guessed_code(NULL);
777 is_inputcode_mixed = FALSE;
778 is_inputcode_set = FALSE;
783 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
792 /* reopen file for stdout */
793 if (file_out == TRUE) {
796 outfname = malloc(strlen(origfname)
797 + strlen(".nkftmpXXXXXX")
803 strcpy(outfname, origfname);
807 for (i = strlen(outfname); i; --i){
808 if (outfname[i - 1] == '/'
809 || outfname[i - 1] == '\\'){
815 strcat(outfname, "ntXXXXXX");
817 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC,
820 strcat(outfname, ".nkftmpXXXXXX");
821 fd = mkstemp(outfname);
824 || (fd_backup = dup(fileno(stdout))) < 0
825 || dup2(fd, fileno(stdout)) < 0
836 outfname = "nkf.out";
839 if(freopen(outfname, "w", stdout) == NULL) {
843 if (binmode_f == TRUE) {
845 if (freopen("","wb",stdout) == NULL)
852 if (binmode_f == TRUE)
854 if (freopen("","rb",fin) == NULL)
859 setvbuffer(fin, stdibuf, IOBUF_SIZE);
863 char *filename = NULL;
865 if (nfiles > 1) filename = origfname;
866 if (guess_f) print_guessed_code(filename);
872 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
880 if (dup2(fd_backup, fileno(stdout)) < 0){
883 if (stat(origfname, &sb)) {
884 fprintf(stderr, "Can't stat %s\n", origfname);
886 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
887 if (chmod(outfname, sb.st_mode)) {
888 fprintf(stderr, "Can't set permission %s\n", outfname);
891 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
892 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
893 tb[0] = tb[1] = sb.st_mtime;
894 if (utime(outfname, tb)) {
895 fprintf(stderr, "Can't set timestamp %s\n", outfname);
898 tb.actime = sb.st_atime;
899 tb.modtime = sb.st_mtime;
900 if (utime(outfname, &tb)) {
901 fprintf(stderr, "Can't set timestamp %s\n", outfname);
905 if (unlink(origfname)){
909 if (rename(outfname, origfname)) {
911 fprintf(stderr, "Can't rename %s to %s\n",
912 outfname, origfname);
920 #ifdef EASYWIN /*Easy Win */
921 if (file_out == FALSE)
922 scanf("%d",&end_check);
925 #else /* for Other OS */
926 if (file_out == TRUE)
931 #endif /* WIN32DLL */
956 {"katakana-hiragana","h3"},
963 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
964 {"internal-unicode", ""},
966 #ifdef UTF8_OUTPUT_ENABLE
977 #ifdef UTF8_INPUT_ENABLE
979 {"utf16-input", "W16"},
981 #ifdef UNICODE_NORMALIZATION
982 {"utf8mac-input", ""},
991 #ifdef NUMCHAR_OPTION
992 {"numchar-input", ""},
998 #ifdef SHIFTJIS_CP932
1008 STATIC int option_mode = 0;
1015 unsigned char *p = NULL;
1019 while(*cp && *cp++!='-');
1023 case '-': /* literal options */
1024 if (!*cp) { /* ignore the rest of arguments */
1028 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1030 p = (unsigned char *)long_option[i].name;
1031 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1032 if (*p == cp[j] || cp[j] == ' '){
1039 cp = (unsigned char *)long_option[i].alias;
1043 if (strcmp(long_option[i].name, "overwrite") == 0){
1050 if (strcmp(long_option[i].name, "cap-input") == 0){
1054 if (strcmp(long_option[i].name, "url-input") == 0){
1059 #ifdef NUMCHAR_OPTION
1060 if (strcmp(long_option[i].name, "numchar-input") == 0){
1066 if (strcmp(long_option[i].name, "no-output") == 0){
1070 if (strcmp(long_option[i].name, "debug") == 0){
1075 if (strcmp(long_option[i].name, "cp932") == 0){
1076 #ifdef SHIFTJIS_CP932
1080 #ifdef UTF8_OUTPUT_ENABLE
1081 ms_ucs_map_f = TRUE;
1085 if (strcmp(long_option[i].name, "no-cp932") == 0){
1086 #ifdef SHIFTJIS_CP932
1090 #ifdef UTF8_OUTPUT_ENABLE
1091 ms_ucs_map_f = FALSE;
1095 #ifdef SHIFTJIS_CP932
1096 if (strcmp(long_option[i].name, "cp932inv") == 0){
1103 if (strcmp(long_option[i].name, "x0212") == 0){
1110 if (strcmp(long_option[i].name, "exec-in") == 0){
1114 if (strcmp(long_option[i].name, "exec-out") == 0){
1119 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1120 if (strcmp(long_option[i].name, "internal-unicode") == 0){
1121 internal_unicode_f = TRUE;
1124 if (strcmp(long_option[i].name, "fb-skip") == 0){
1125 encode_fallback = NULL;
1128 if (strcmp(long_option[i].name, "fb-html") == 0){
1129 encode_fallback = encode_fallback_html;
1132 if (strcmp(long_option[i].name, "fb-xml" ) == 0){
1133 encode_fallback = encode_fallback_xml;
1136 if (strcmp(long_option[i].name, "fb-java") == 0){
1137 encode_fallback = encode_fallback_java;
1140 if (strcmp(long_option[i].name, "fb-perl") == 0){
1141 encode_fallback = encode_fallback_perl;
1144 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1145 encode_fallback = encode_fallback_subchar;
1149 #ifdef UTF8_OUTPUT_ENABLE
1150 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1151 ms_ucs_map_f = TRUE;
1155 #ifdef UNICODE_NORMALIZATION
1156 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1157 input_f = UTF8_INPUT;
1162 if (strcmp(long_option[i].name, "prefix=") == 0){
1163 if (*p == '=' && ' ' < p[1] && p[1] < 128){
1164 for (i = 2; ' ' < p[i] && p[i] < 128; i++){
1165 prefix_table[p[i]] = p[1];
1172 case 'b': /* buffered mode */
1175 case 'u': /* non bufferd mode */
1178 case 't': /* transparent mode */
1181 case 'j': /* JIS output */
1183 output_conv = j_oconv;
1185 case 'e': /* AT&T EUC output */
1186 output_conv = e_oconv;
1188 case 's': /* SJIS output */
1189 output_conv = s_oconv;
1191 case 'l': /* ISO8859 Latin-1 support, no conversion */
1192 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
1193 input_f = LATIN1_INPUT;
1195 case 'i': /* Kanji IN ESC-$-@/B */
1196 if (*cp=='@'||*cp=='B')
1197 kanji_intro = *cp++;
1199 case 'o': /* ASCII IN ESC-(-J/B */
1200 if (*cp=='J'||*cp=='B'||*cp=='H')
1201 ascii_intro = *cp++;
1205 bit:1 katakana->hiragana
1206 bit:2 hiragana->katakana
1208 if ('9'>= *cp && *cp>='0')
1209 hira_f |= (*cp++ -'0');
1216 #if defined(MSDOS) || defined(__OS2__)
1231 #ifdef UTF8_OUTPUT_ENABLE
1232 case 'w': /* UTF-8 output */
1233 if ('1'== cp[0] && '6'==cp[1]) {
1234 output_conv = w_oconv16; cp+=2;
1236 unicode_bom_f=2; cp++;
1239 unicode_bom_f=1; cp++;
1241 } else if (cp[0] == 'B') {
1242 unicode_bom_f=2; cp++;
1244 unicode_bom_f=1; cp++;
1247 } else if (cp[0] == '8') {
1248 output_conv = w_oconv; cp++;
1251 unicode_bom_f=1; cp++;
1254 output_conv = w_oconv;
1257 #ifdef UTF8_INPUT_ENABLE
1258 case 'W': /* UTF-8 input */
1259 if ('1'== cp[0] && '6'==cp[1]) {
1260 input_f = UTF16BE_INPUT;
1261 utf16_mode = UTF16BE_INPUT;
1265 input_f = UTF16LE_INPUT;
1266 utf16_mode = UTF16LE_INPUT;
1267 } else if (cp[0] == 'B') {
1269 input_f = UTF16BE_INPUT;
1270 utf16_mode = UTF16BE_INPUT;
1272 } else if (cp[0] == '8') {
1274 input_f = UTF8_INPUT;
1276 input_f = UTF8_INPUT;
1279 /* Input code assumption */
1280 case 'J': /* JIS input */
1281 case 'E': /* AT&T EUC input */
1282 input_f = JIS_INPUT;
1284 case 'S': /* MS Kanji input */
1285 input_f = SJIS_INPUT;
1286 if (x0201_f==NO_X0201) x0201_f=TRUE;
1288 case 'Z': /* Convert X0208 alphabet to asii */
1289 /* bit:0 Convert X0208
1290 bit:1 Convert Kankaku to one space
1291 bit:2 Convert Kankaku to two spaces
1292 bit:3 Convert HTML Entity
1294 if ('9'>= *cp && *cp>='0')
1295 alpha_f |= 1<<(*cp++ -'0');
1299 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
1300 x0201_f = FALSE; /* No X0201->X0208 conversion */
1302 ESC-(-I in JIS, EUC, MS Kanji
1303 SI/SO in JIS, EUC, MS Kanji
1304 SSO in EUC, JIS, not in MS Kanji
1305 MS Kanji (0xa0-0xdf)
1307 ESC-(-I in JIS (0x20-0x5f)
1308 SSO in EUC (0xa0-0xdf)
1309 0xa0-0xd in MS Kanji (0xa0-0xdf)
1312 case 'X': /* Assume X0201 kana */
1313 /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1316 case 'F': /* prserve new lines */
1317 fold_preserve_f = TRUE;
1318 case 'f': /* folding -f60 or -f */
1321 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1323 fold_len += *cp++ - '0';
1325 if (!(0<fold_len && fold_len<BUFSIZ))
1326 fold_len = DEFAULT_FOLD;
1330 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1332 fold_margin += *cp++ - '0';
1336 case 'm': /* MIME support */
1337 /* mime_decode_f = TRUE; */ /* this has too large side effects... */
1338 if (*cp=='B'||*cp=='Q') {
1339 mime_decode_mode = *cp++;
1340 mimebuf_f = FIXED_MIME;
1341 } else if (*cp=='N') {
1342 mime_f = TRUE; cp++;
1343 } else if (*cp=='S') {
1344 mime_f = STRICT_MIME; cp++;
1345 } else if (*cp=='0') {
1346 mime_decode_f = FALSE;
1347 mime_f = FALSE; cp++;
1350 case 'M': /* MIME output */
1353 mimeout_f = FIXED_MIME; cp++;
1354 } else if (*cp=='Q') {
1356 mimeout_f = FIXED_MIME; cp++;
1361 case 'B': /* Broken JIS support */
1363 bit:1 allow any x on ESC-(-x or ESC-$-x
1364 bit:2 reset to ascii on NL
1366 if ('9'>= *cp && *cp>='0')
1367 broken_f |= 1<<(*cp++ -'0');
1372 case 'O':/* for Output file */
1376 case 'c':/* add cr code */
1379 case 'd':/* delete cr code */
1382 case 'I': /* ISO-2022-JP output */
1385 case 'L': /* line mode */
1386 if (*cp=='u') { /* unix */
1387 crmode_f = NL; cp++;
1388 } else if (*cp=='m') { /* mac */
1389 crmode_f = CR; cp++;
1390 } else if (*cp=='w') { /* windows */
1391 crmode_f = CRLF; cp++;
1392 } else if (*cp=='0') { /* no conversion */
1402 /* module muliple options in a string are allowed for Perl moudle */
1403 while(*cp && *cp++!='-');
1406 /* bogus option but ignored */
1412 #ifdef ANSI_C_PROTOTYPE
1413 struct input_code * find_inputcode_byfunc(int (*iconv_func)(int c2,int c1,int c0))
1415 struct input_code * find_inputcode_byfunc(iconv_func)
1416 int (*iconv_func)();
1420 struct input_code *p = input_code_list;
1422 if (iconv_func == p->iconv_func){
1431 #ifdef ANSI_C_PROTOTYPE
1432 void set_iconv(int f, int (*iconv_func)(int c2,int c1,int c0))
1434 void set_iconv(f, iconv_func)
1436 int (*iconv_func)();
1439 #ifdef INPUT_CODE_FIX
1447 #ifdef INPUT_CODE_FIX
1448 && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1454 if (estab_f && iconv_for_check != iconv){
1455 struct input_code *p = find_inputcode_byfunc(iconv);
1457 set_input_codename(p->name);
1458 debug(input_codename);
1460 iconv_for_check = iconv;
1465 #define SCORE_L2 (1) /*
\e$BBh
\e(B2
\e$B?e=`4A;z
\e(B */
1466 #define SCORE_KANA (SCORE_L2 << 1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
1467 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
1468 #ifdef SHIFTJIS_CP932
1469 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932
\e$B$K$h$kFI$_49$(
\e(B */
1470 #define SCORE_NO_EXIST (SCORE_CP932 << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1472 #define SCORE_NO_EXIST (SCORE_DEPEND << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1474 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME
\e$B$K$h$k;XDj
\e(B */
1475 #define SCORE_ERROR (SCORE_iMIME << 1) /*
\e$B%(%i!<
\e(B */
1477 #define SCORE_INIT (SCORE_iMIME)
1479 const int score_table_A0[] = {
1482 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1483 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1486 const int score_table_F0[] = {
1487 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1488 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1489 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1490 SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1493 void set_code_score(ptr, score)
1494 struct input_code *ptr;
1498 ptr->score |= score;
1502 void clr_code_score(ptr, score)
1503 struct input_code *ptr;
1507 ptr->score &= ~score;
1511 void code_score(ptr)
1512 struct input_code *ptr;
1514 int c2 = ptr->buf[0];
1515 #ifdef UTF8_OUTPUT_ENABLE
1516 int c1 = ptr->buf[1];
1519 set_code_score(ptr, SCORE_ERROR);
1520 }else if (c2 == SSO){
1521 set_code_score(ptr, SCORE_KANA);
1522 #ifdef UTF8_OUTPUT_ENABLE
1523 }else if (!e2w_conv(c2, c1)){
1524 set_code_score(ptr, SCORE_NO_EXIST);
1526 }else if ((c2 & 0x70) == 0x20){
1527 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1528 }else if ((c2 & 0x70) == 0x70){
1529 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1530 }else if ((c2 & 0x70) >= 0x50){
1531 set_code_score(ptr, SCORE_L2);
1535 void status_disable(ptr)
1536 struct input_code *ptr;
1541 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1544 void status_push_ch(ptr, c)
1545 struct input_code *ptr;
1548 ptr->buf[ptr->index++] = c;
1551 void status_clear(ptr)
1552 struct input_code *ptr;
1558 void status_reset(ptr)
1559 struct input_code *ptr;
1562 ptr->score = SCORE_INIT;
1565 void status_reinit(ptr)
1566 struct input_code *ptr;
1569 ptr->_file_stat = 0;
1572 void status_check(ptr, c)
1573 struct input_code *ptr;
1576 if (c <= DEL && estab_f){
1581 void s_status(ptr, c)
1582 struct input_code *ptr;
1587 status_check(ptr, c);
1592 #ifdef NUMCHAR_OPTION
1593 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1596 }else if (0xa1 <= c && c <= 0xdf){
1597 status_push_ch(ptr, SSO);
1598 status_push_ch(ptr, c);
1601 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
1603 status_push_ch(ptr, c);
1604 #ifdef SHIFTJIS_CP932
1606 && CP932_TABLE_BEGIN <= c && c <= CP932_TABLE_END){
1608 status_push_ch(ptr, c);
1609 #endif /* SHIFTJIS_CP932 */
1611 }else if (x0212_f && 0xf0 <= c && c <= 0xfc){
1613 status_push_ch(ptr, c);
1614 #endif /* X0212_ENABLE */
1616 status_disable(ptr);
1620 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1621 status_push_ch(ptr, c);
1622 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
1626 status_disable(ptr);
1630 #ifdef SHIFTJIS_CP932
1631 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1632 status_push_ch(ptr, c);
1633 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
1634 set_code_score(ptr, SCORE_CP932);
1639 #endif /* SHIFTJIS_CP932 */
1640 #ifndef X0212_ENABLE
1641 status_disable(ptr);
1647 void e_status(ptr, c)
1648 struct input_code *ptr;
1653 status_check(ptr, c);
1658 #ifdef NUMCHAR_OPTION
1659 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1662 }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
1664 status_push_ch(ptr, c);
1666 }else if (0x8f == c){
1668 status_push_ch(ptr, c);
1669 #endif /* X0212_ENABLE */
1671 status_disable(ptr);
1675 if (0xa1 <= c && c <= 0xfe){
1676 status_push_ch(ptr, c);
1680 status_disable(ptr);
1685 if (0xa1 <= c && c <= 0xfe){
1687 status_push_ch(ptr, c);
1689 status_disable(ptr);
1691 #endif /* X0212_ENABLE */
1695 #ifdef UTF8_INPUT_ENABLE
1696 void w16_status(ptr, c)
1697 struct input_code *ptr;
1704 if (ptr->_file_stat == 0){
1705 if (c == 0xfe || c == 0xff){
1707 status_push_ch(ptr, c);
1708 ptr->_file_stat = 1;
1710 status_disable(ptr);
1711 ptr->_file_stat = -1;
1713 }else if (ptr->_file_stat > 0){
1715 status_push_ch(ptr, c);
1716 }else if (ptr->_file_stat < 0){
1717 status_disable(ptr);
1723 status_disable(ptr);
1724 ptr->_file_stat = -1;
1726 status_push_ch(ptr, c);
1733 if (ptr->stat != c && (c == 0xfe || c == 0xff)){
1734 status_push_ch(ptr, c);
1737 status_disable(ptr);
1738 ptr->_file_stat = -1;
1744 void w_status(ptr, c)
1745 struct input_code *ptr;
1750 status_check(ptr, c);
1755 #ifdef NUMCHAR_OPTION
1756 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1759 }else if (0xc0 <= c && c <= 0xdf){
1761 status_push_ch(ptr, c);
1762 }else if (0xe0 <= c && c <= 0xef){
1764 status_push_ch(ptr, c);
1766 status_disable(ptr);
1771 if (0x80 <= c && c <= 0xbf){
1772 status_push_ch(ptr, c);
1773 if (ptr->index > ptr->stat){
1774 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
1775 && ptr->buf[2] == 0xbf);
1776 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
1777 &ptr->buf[0], &ptr->buf[1]);
1784 status_disable(ptr);
1795 int action_flag = 1;
1796 struct input_code *result = 0;
1797 struct input_code *p = input_code_list;
1799 (p->status_func)(p, c);
1802 }else if(p->stat == 0){
1813 if (result && !estab_f){
1814 set_iconv(TRUE, result->iconv_func);
1815 }else if (c <= DEL){
1816 struct input_code *ptr = input_code_list;
1831 return std_gc_buf[--std_gc_ndx];
1842 if (std_gc_ndx == STD_GC_BUFSIZE){
1845 std_gc_buf[std_gc_ndx++] = c;
1859 #if !defined(PERL_XS) && !defined(WIN32DLL)
1866 while ((c = (*i_getc)(f)) != EOF)
1875 oconv = output_conv;
1878 /* replace continucation module, from output side */
1880 /* output redicrection */
1882 if (noout_f || guess_f){
1889 if (mimeout_f == TRUE) {
1890 o_base64conv = oconv; oconv = base64_conv;
1892 /* base64_count = 0; */
1896 o_crconv = oconv; oconv = cr_conv;
1899 o_rot_conv = oconv; oconv = rot_conv;
1902 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
1905 o_hira_conv = oconv; oconv = hira_conv;
1908 o_fconv = oconv; oconv = fold_conv;
1911 if (alpha_f || x0201_f) {
1912 o_zconv = oconv; oconv = z_conv;
1916 i_ungetc = std_ungetc;
1917 /* input redicrection */
1920 i_cgetc = i_getc; i_getc = cap_getc;
1921 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
1924 i_ugetc = i_getc; i_getc = url_getc;
1925 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
1928 #ifdef NUMCHAR_OPTION
1930 i_ngetc = i_getc; i_getc = numchar_getc;
1931 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
1934 #ifdef UNICODE_NORMALIZATION
1935 if (nfc_f && input_f == UTF8_INPUT){
1936 i_nfc_getc = i_getc; i_getc = nfc_getc;
1937 i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
1940 if (mime_f && mimebuf_f==FIXED_MIME) {
1941 i_mgetc = i_getc; i_getc = mime_getc;
1942 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
1945 i_bgetc = i_getc; i_getc = broken_getc;
1946 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
1948 if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
1949 set_iconv(-TRUE, e_iconv);
1950 } else if (input_f == SJIS_INPUT) {
1951 set_iconv(-TRUE, s_iconv);
1952 #ifdef UTF8_INPUT_ENABLE
1953 } else if (input_f == UTF8_INPUT) {
1954 set_iconv(-TRUE, w_iconv);
1955 } else if (input_f == UTF16BE_INPUT) {
1956 set_iconv(-TRUE, w_iconv16);
1957 } else if (input_f == UTF16LE_INPUT) {
1958 set_iconv(-TRUE, w_iconv16);
1961 set_iconv(FALSE, e_iconv);
1965 struct input_code *p = input_code_list;
1973 Conversion main loop. Code detection only.
1982 int is_8bit = FALSE;
1984 module_connection();
1989 output_mode = ASCII;
1992 #define NEXT continue /* no output, get next */
1993 #define SEND ; /* output c1 and c2, get next */
1994 #define LAST break /* end of loop, go closing */
1996 while ((c1 = (*i_getc)(f)) != EOF) {
2001 /* in case of 8th bit is on */
2002 if (!estab_f&&!mime_decode_mode) {
2003 /* in case of not established yet */
2004 /* It is still ambiguious */
2005 if (h_conv(f, c2, c1)==EOF)
2011 /* in case of already established */
2013 /* ignore bogus code */
2019 /* second byte, 7 bit code */
2020 /* it might be kanji shitfted */
2021 if ((c1 == DEL) || (c1 <= SPACE)) {
2022 /* ignore bogus first code */
2030 #ifdef UTF8_INPUT_ENABLE
2039 #ifdef NUMCHAR_OPTION
2040 } else if ((c1 & CLASS_MASK) == CLASS_UTF16){
2043 } else if (c1 > DEL) {
2045 if (!estab_f && !iso8859_f) {
2046 /* not established yet */
2047 if (!is_8bit) is_8bit = TRUE;
2050 } else { /* estab_f==TRUE */
2055 } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2056 /* SJIS X0201 Case... */
2057 if(iso2022jp_f && x0201_f==NO_X0201) {
2058 (*oconv)(GETA1, GETA2);
2065 } else if (c1==SSO && iconv != s_iconv) {
2066 /* EUC X0201 Case */
2067 c1 = (*i_getc)(f); /* skip SSO */
2069 if (SSP<=c1 && c1<0xe0) {
2070 if(iso2022jp_f && x0201_f==NO_X0201) {
2071 (*oconv)(GETA1, GETA2);
2078 } else { /* bogus code, skip SSO and one byte */
2082 /* already established */
2087 } else if ((c1 > SPACE) && (c1 != DEL)) {
2088 /* in case of Roman characters */
2090 /* output 1 shifted byte */
2094 } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
2095 /* output 1 shifted byte */
2096 if(iso2022jp_f && x0201_f==NO_X0201) {
2097 (*oconv)(GETA1, GETA2);
2104 /* look like bogus code */
2107 } else if (input_mode == X0208) {
2108 /* in case of Kanji shifted */
2111 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
2112 /* Check MIME code */
2113 if ((c1 = (*i_getc)(f)) == EOF) {
2116 } else if (c1 == '?') {
2117 /* =? is mime conversion start sequence */
2118 if(mime_f == STRICT_MIME) {
2119 /* check in real detail */
2120 if (mime_begin_strict(f) == EOF)
2124 } else if (mime_begin(f) == EOF)
2134 /* normal ASCII code */
2137 } else if (!is_8bit && c1 == SI) {
2140 } else if (!is_8bit && c1 == SO) {
2143 } else if (!is_8bit && c1 == ESC ) {
2144 if ((c1 = (*i_getc)(f)) == EOF) {
2145 /* (*oconv)(0, ESC); don't send bogus code */
2147 } else if (c1 == '$') {
2148 if ((c1 = (*i_getc)(f)) == EOF) {
2150 (*oconv)(0, ESC); don't send bogus code
2151 (*oconv)(0, '$'); */
2153 } else if (c1 == '@'|| c1 == 'B') {
2154 /* This is kanji introduction */
2157 set_input_codename("ISO-2022-JP");
2159 debug(input_codename);
2162 } else if (c1 == '(') {
2163 if ((c1 = (*i_getc)(f)) == EOF) {
2164 /* don't send bogus code
2170 } else if (c1 == '@'|| c1 == 'B') {
2171 /* This is kanji introduction */
2176 } else if (c1 == 'D'){
2180 #endif /* X0212_ENABLE */
2182 /* could be some special code */
2189 } else if (broken_f&0x2) {
2190 /* accept any ESC-(-x as broken code ... */
2200 } else if (c1 == '(') {
2201 if ((c1 = (*i_getc)(f)) == EOF) {
2202 /* don't send bogus code
2204 (*oconv)(0, '('); */
2208 /* This is X0201 kana introduction */
2209 input_mode = X0201; shift_mode = X0201;
2211 } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2212 /* This is X0208 kanji introduction */
2213 input_mode = ASCII; shift_mode = FALSE;
2215 } else if (broken_f&0x2) {
2216 input_mode = ASCII; shift_mode = FALSE;
2221 /* maintain various input_mode here */
2225 } else if ( c1 == 'N' || c1 == 'n' ){
2227 c3 = (*i_getc)(f); /* skip SS2 */
2228 if ( (SPACE<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2243 } else if ((c1 == NL || c1 == CR) && broken_f&4) {
2244 input_mode = ASCII; set_iconv(FALSE, 0);
2246 } else if (c1 == NL && mime_decode_f && !mime_decode_mode ) {
2247 if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2255 } else if (c1 == CR && mime_decode_f && !mime_decode_mode ) {
2256 if ((c1=(*i_getc)(f))!=EOF) {
2260 } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2276 if (input_mode == X0208)
2277 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2279 else if (input_mode == X0212)
2280 (*oconv)((0x8f << 8) | c2, c1);
2281 #endif /* X0212_ENABLE */
2282 else if (input_mode)
2283 (*oconv)(input_mode, c1); /* other special case */
2284 else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */
2285 int c0 = (*i_getc)(f);
2288 (*iconv)(c2, c1, c0);
2294 /* goto next_word */
2298 (*iconv)(EOF, 0, 0);
2299 if (!is_inputcode_set)
2302 struct input_code *p = input_code_list;
2303 struct input_code *result = p;
2305 if (p->score < result->score) result = p;
2308 set_input_codename(result->name);
2323 /** it must NOT be in the kanji shifte sequence */
2324 /** it must NOT be written in JIS7 */
2325 /** and it must be after 2 byte 8bit code */
2332 while ((c1 = (*i_getc)(f)) != EOF) {
2338 if (push_hold_buf(c1) == EOF || estab_f){
2344 struct input_code *p = input_code_list;
2345 struct input_code *result = p;
2350 if (p->score < result->score){
2355 set_iconv(FALSE, result->iconv_func);
2360 ** 1) EOF is detected, or
2361 ** 2) Code is established, or
2362 ** 3) Buffer is FULL (but last word is pushed)
2364 ** in 1) and 3) cases, we continue to use
2365 ** Kanji codes by oconv and leave estab_f unchanged.
2370 while (wc < hold_count){
2371 c2 = hold_buf[wc++];
2373 #ifdef NUMCHAR_OPTION
2374 || (c2 & CLASS_MASK) == CLASS_UTF16
2379 }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
2380 (*iconv)(X0201, c2, 0);
2383 if (wc < hold_count){
2384 c1 = hold_buf[wc++];
2393 if ((*iconv)(c2, c1, 0) < 0){
2395 if (wc < hold_count){
2396 c0 = hold_buf[wc++];
2405 (*iconv)(c2, c1, c0);
2418 if (hold_count >= HOLD_SIZE*2)
2420 hold_buf[hold_count++] = c2;
2421 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
2424 int s2e_conv(c2, c1, p2, p1)
2428 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
2431 #ifdef SHIFTJIS_CP932
2432 if (cp932_f && CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){
2433 extern const unsigned short shiftjis_cp932[3][189];
2434 val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
2440 #endif /* SHIFTJIS_CP932 */
2442 if (x0212_f && 0xfa <= c2 && c2 <= 0xfc){
2443 extern const unsigned short shiftjis_x0212[3][189];
2444 val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
2447 c2 = (0x8f << 8) | (val >> 8);
2460 c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
2462 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
2471 c2 = x0212_unshift(c2);
2486 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2489 int ret = s2e_conv(c2, c1, &c2, &c1);
2490 if (ret) return ret;
2504 }else if (c2 == 0x8f){
2508 c2 = (c2 << 8) | (c1 & 0x7f);
2510 #ifdef SHIFTJIS_CP932
2513 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2514 s2e_conv(s2, s1, &c2, &c1);
2515 if ((c2 & 0xff00) == 0){
2521 #endif /* SHIFTJIS_CP932 */
2522 #endif /* X0212_ENABLE */
2523 } else if (c2 == SSO){
2526 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2536 #ifdef UTF8_INPUT_ENABLE
2538 w2e_conv(c2, c1, c0, p2, p1)
2542 extern const unsigned short *const utf8_to_euc_2bytes[];
2543 extern const unsigned short *const *const utf8_to_euc_3bytes[];
2546 if (0xc0 <= c2 && c2 <= 0xef) {
2547 const unsigned short *const *pp;
2550 if (c0 == 0) return -1;
2551 pp = utf8_to_euc_3bytes[c2 - 0x80];
2552 ret = w_iconv_common(c1, c0, pp, sizeof_utf8_to_euc_C2, p2, p1);
2554 ret = w_iconv_common(c2, c1, utf8_to_euc_2bytes, sizeof_utf8_to_euc_2bytes, p2, p1);
2556 #ifdef NUMCHAR_OPTION
2559 if (p1) *p1 = CLASS_UTF16 | ww16_conv(c2, c1, c0);
2564 } else if (c2 == X0201) {
2580 if (c2 == 0) /* 0x00-0x7f */
2582 else if ((c2 & 0xe0) == 0xc0) /* 0xc0-0xdf */
2584 else if ((c2 & 0xf0) == 0xe0) /* 0xe0-0xef */
2585 return -1; /* 3bytes */
2587 else if (0xf0 <= c2)
2588 return 0; /* 4,5,6bytes */
2589 else if ((c2 & 0xc0) == 0x80) /* 0x80-0xbf */
2590 return 0; /* trail byte */
2595 else if (c2 == 0xef && c1 == 0xbb && c0 == 0xbf) {
2596 return 0; /* throw BOM */
2597 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
2598 } else if (internal_unicode_f && (output_conv == w_oconv || output_conv == w_oconv16)){
2599 unsigned short val = 0;
2604 val = ww16_conv(c2, c1, c0);
2605 c2 = (val >> 8) & 0xff;
2609 ret = w2e_conv(c2, c1, c0, &c2, &c1);
2618 w16w_conv(val, p2, p1, p0)
2626 }else if (val < 0x800){
2627 *p2 = 0xc0 | (val >> 6);
2628 *p1 = 0x80 | (val & 0x3f);
2631 *p2 = 0xe0 | (val >> 12);
2632 *p1 = 0x80 | ((val >> 6) & 0x3f);
2633 *p0 = 0x80 | (val & 0x3f);
2638 ww16_conv(c2, c1, c0)
2643 val = (c2 & 0x0f) << 12;
2644 val |= (c1 & 0x3f) << 6;
2646 }else if (c2 >= 0xc0){
2647 val = (c2 & 0x1f) << 6;
2656 w16e_conv(val, p2, p1)
2660 extern const unsigned short *const utf8_to_euc_2bytes[];
2661 extern const unsigned short *const *const utf8_to_euc_3bytes[];
2663 const unsigned short *const *pp;
2667 w16w_conv(val, &c2, &c1, &c0);
2670 pp = utf8_to_euc_3bytes[c2 - 0x80];
2671 psize = sizeof_utf8_to_euc_C2;
2672 ret = w_iconv_common(c1, c0, pp, psize, p2, p1);
2674 pp = utf8_to_euc_2bytes;
2675 psize = sizeof_utf8_to_euc_2bytes;
2676 ret = w_iconv_common(c2, c1, pp, psize, p2, p1);
2678 #ifdef NUMCHAR_OPTION
2681 *p1 = CLASS_UTF16 | val;
2693 w_iconv16(c2, c1, c0)
2698 if (c2==0376 && c1==0377){
2699 utf16_mode = UTF16BE_INPUT;
2701 } else if (c2==0377 && c1==0376){
2702 utf16_mode = UTF16LE_INPUT;
2705 if (c2 != EOF && utf16_mode == UTF16LE_INPUT) {
2707 tmp=c1; c1=c2; c2=tmp;
2709 if ((c2==0 && c1 < 0x80) || c2==EOF) {
2713 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
2714 if (internal_unicode_f && (output_conv == w_oconv || output_conv == w_oconv16));
2716 else ret = w16e_conv(((c2<<8)&0xff00) + c1, &c2, &c1);
2717 if (ret) return ret;
2723 w_iconv_common(c1, c0, pp, psize, p2, p1)
2725 const unsigned short *const *pp;
2730 const unsigned short *p;
2733 /* CP932/CP51932: U+00A6 (BROKEN BAR) -> not 0x8fa2c3, but 0x7c */
2734 if (cp932_f && c1 == 0xC2 && c0 == 0xA6){
2740 if (pp == 0) return 1;
2743 if (c1 < 0 || psize <= c1) return 1;
2745 if (p == 0) return 1;
2748 if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1;
2750 if (val == 0) return 1;
2757 if (c2 == SO) c2 = X0201;
2766 #ifdef UTF8_OUTPUT_ENABLE
2768 nkf_each_char_to_hex(f, c)
2769 void (*f)PROTO((int c));
2772 const char *hex = "0123456789ABCDEF";
2778 (*f)(hex[(c>>shift)&0xF]);
2789 encode_fallback_html(c)
2796 (*o_putc)(0x30+(c/1000000)%10);
2798 (*o_putc)(0x30+(c/100000 )%10);
2800 (*o_putc)(0x30+(c/10000 )%10);
2802 (*o_putc)(0x30+(c/1000 )%10);
2804 (*o_putc)(0x30+(c/100 )%10);
2806 (*o_putc)(0x30+(c/10 )%10);
2808 (*o_putc)(0x30+ c %10);
2814 encode_fallback_xml(c)
2820 nkf_each_char_to_hex(o_putc, c);
2826 encode_fallback_java(c)
2829 const char *hex = "0123456789ABCDEF";
2831 if((c&0x00FFFFFF) > 0xFFFF){
2835 (*o_putc)(hex[(c>>20)&0xF]);
2836 (*o_putc)(hex[(c>>16)&0xF]);
2840 (*o_putc)(hex[(c>>12)&0xF]);
2841 (*o_putc)(hex[(c>> 8)&0xF]);
2842 (*o_putc)(hex[(c>> 4)&0xF]);
2843 (*o_putc)(hex[ c &0xF]);
2848 encode_fallback_perl(c)
2854 nkf_each_char_to_hex(o_putc, c);
2860 encode_fallback_subchar(c)
2863 c = unicode_subchar;
2868 (*o_putc)((c>>shift)&0xFF);
2882 extern const unsigned short euc_to_utf8_1byte[];
2883 extern const unsigned short *const euc_to_utf8_2bytes[];
2884 extern const unsigned short *const euc_to_utf8_2bytes_ms[];
2885 const unsigned short *p;
2888 p = euc_to_utf8_1byte;
2890 } else if (c2 >> 8 == 0x8f){
2891 extern const unsigned short *const x0212_to_utf8_2bytes[];
2892 c2 = (c2&0x7f) - 0x21;
2893 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2894 p = x0212_to_utf8_2bytes[c2];
2900 c2 = (c2&0x7f) - 0x21;
2901 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2902 p = ms_ucs_map_f ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
2907 c1 = (c1 & 0x7f) - 0x21;
2908 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
2925 if (unicode_bom_f==2) {
2932 #ifdef NUMCHAR_OPTION
2933 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2934 w16w_conv(c1, &c2, &c1, &c0);
2938 if (c0) (*o_putc)(c0);
2945 output_mode = ASCII;
2947 } else if (c2 == ISO8859_1) {
2948 output_mode = ISO8859_1;
2949 (*o_putc)(c1 | 0x080);
2952 if (internal_unicode_f && (iconv == w_iconv || iconv == w_iconv16))
2953 val = ((c2<<8)&0xff00) + c1;
2954 else val = e2w_conv(c2, c1);
2956 w16w_conv(val, &c2, &c1, &c0);
2960 if (c0) (*o_putc)(c0);
2976 if (unicode_bom_f==2) {
2978 (*o_putc)((unsigned char)'\377');
2982 (*o_putc)((unsigned char)'\377');
2987 if (internal_unicode_f && (iconv == w_iconv || iconv == w_iconv16)){
2988 } else if (c2 == ISO8859_1) {
2991 #ifdef NUMCHAR_OPTION
2992 } else if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16) {
2993 c2 = (c1 >> 8) & 0xff;
2997 unsigned short val = e2w_conv(c2, c1);
2998 c2 = (val >> 8) & 0xff;
3017 #ifdef NUMCHAR_OPTION
3018 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3019 w16e_conv(c1, &c2, &c1);
3020 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3021 if(encode_fallback)(*encode_fallback)(c1);
3029 } else if (c2 == 0) {
3030 output_mode = ASCII;
3032 } else if (c2 == X0201) {
3033 output_mode = JAPANESE_EUC;
3034 (*o_putc)(SSO); (*o_putc)(c1|0x80);
3035 } else if (c2 == ISO8859_1) {
3036 output_mode = ISO8859_1;
3037 (*o_putc)(c1 | 0x080);
3039 } else if ((c2 & 0xff00) >> 8 == 0x8f){
3040 output_mode = JAPANESE_EUC;
3041 #ifdef SHIFTJIS_CP932
3044 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3045 s2e_conv(s2, s1, &c2, &c1);
3050 output_mode = ASCII;
3052 }else if ((c2 & 0xff00) >> 8 == 0x8f){
3055 (*o_putc)((c2 & 0x7f) | 0x080);
3056 (*o_putc)(c1 | 0x080);
3059 (*o_putc)((c2 & 0x7f) | 0x080);
3060 (*o_putc)(c1 | 0x080);
3064 if ((c1<0x21 || 0x7e<c1) ||
3065 (c2<0x21 || 0x7e<c2)) {
3066 set_iconv(FALSE, 0);
3067 return; /* too late to rescue this char */
3069 output_mode = JAPANESE_EUC;
3070 (*o_putc)(c2 | 0x080);
3071 (*o_putc)(c1 | 0x080);
3081 if ((ret & 0xff00) == 0x8f00){
3082 if (0x75 <= c && c <= 0x7f){
3083 ret = c + (0x109 - 0x75);
3086 if (0x75 <= c && c <= 0x7f){
3087 ret = c + (0x113 - 0x75);
3094 int x0212_unshift(c)
3098 if (0x7f <= c && c <= 0x88){
3099 ret = c + (0x75 - 0x7f);
3100 }else if (0x89 <= c && c <= 0x92){
3101 ret = (0x8f << 8) | 0x80 | (c + (0x75 - 0x89));
3105 #endif /* X0212_ENABLE */
3108 e2s_conv(c2, c1, p2, p1)
3109 int c2, c1, *p2, *p1;
3113 const unsigned short *ptr;
3115 extern const unsigned short *const x0212_shiftjis[];
3116 if ((c2 & 0xff00) == 0x8f00){
3118 if (0x21 <= ndx && ndx <= 0x7e){
3119 ptr = x0212_shiftjis[ndx - 0x21];
3121 val = ptr[(c1 & 0x7f) - 0x21];
3131 c2 = x0212_shift(c2);
3133 #endif /* X0212_ENABLE */
3134 if ((c2 & 0xff00) == 0x8f00){
3137 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
3138 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
3147 #ifdef NUMCHAR_OPTION
3148 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3149 w16e_conv(c1, &c2, &c1);
3150 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
3151 if(encode_fallback)(*encode_fallback)(c1);
3159 } else if (c2 == 0) {
3160 output_mode = ASCII;
3162 } else if (c2 == X0201) {
3163 output_mode = SHIFT_JIS;
3165 } else if (c2 == ISO8859_1) {
3166 output_mode = ISO8859_1;
3167 (*o_putc)(c1 | 0x080);
3169 } else if ((c2 & 0xff00) >> 8 == 0x8f){
3170 output_mode = SHIFT_JIS;
3171 if (e2s_conv(c2, c1, &c2, &c1) == 0){
3177 if ((c1<0x20 || 0x7e<c1) ||
3178 (c2<0x20 || 0x7e<c2)) {
3179 set_iconv(FALSE, 0);
3180 return; /* too late to rescue this char */
3182 output_mode = SHIFT_JIS;
3183 e2s_conv(c2, c1, &c2, &c1);
3185 #ifdef SHIFTJIS_CP932
3187 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3188 extern const unsigned short cp932inv[2][189];
3189 int c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3195 #endif /* SHIFTJIS_CP932 */
3198 if (prefix_table[(unsigned char)c1]){
3199 (*o_putc)(prefix_table[(unsigned char)c1]);
3210 #ifdef NUMCHAR_OPTION
3211 if ((c1 & CLASS_MASK) == CLASS_UTF16){
3212 w16e_conv(c1, &c2, &c1);
3216 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
3219 (*o_putc)(ascii_intro);
3220 output_mode = ASCII;
3224 } else if ((c2 & 0xff00) >> 8 == 0x8f){
3225 if (output_mode!=X0212) {
3226 output_mode = X0212;
3232 (*o_putc)(c2 & 0x7f);
3235 } else if (c2==X0201) {
3236 if (output_mode!=X0201) {
3237 output_mode = X0201;
3243 } else if (c2==ISO8859_1) {
3244 /* iso8859 introduction, or 8th bit on */
3245 /* Can we convert in 7bit form using ESC-'-'-A ?
3247 output_mode = ISO8859_1;
3249 } else if (c2 == 0) {
3250 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
3253 (*o_putc)(ascii_intro);
3254 output_mode = ASCII;
3258 if (output_mode != X0208) {
3259 output_mode = X0208;
3262 (*o_putc)(kanji_intro);
3264 if (c1<0x20 || 0x7e<c1)
3266 if (c2<0x20 || 0x7e<c2)
3278 mime_prechar(c2, c1);
3279 (*o_base64conv)(c2,c1);
3283 STATIC int broken_buf[3];
3284 STATIC int broken_counter = 0;
3285 STATIC int broken_last = 0;
3292 if (broken_counter>0) {
3293 return broken_buf[--broken_counter];
3296 if (c=='$' && broken_last != ESC
3297 && (input_mode==ASCII || input_mode==X0201)) {
3300 if (c1=='@'|| c1=='B') {
3301 broken_buf[0]=c1; broken_buf[1]=c;
3308 } else if (c=='(' && broken_last != ESC
3309 && (input_mode==X0208 || input_mode==X0201)) { /* ) */
3312 if (c1=='J'|| c1=='B') {
3313 broken_buf[0]=c1; broken_buf[1]=c;
3331 if (broken_counter<2)
3332 broken_buf[broken_counter++]=c;
3336 STATIC int prev_cr = 0;
3344 if (! (c2==0&&c1==NL) ) {
3350 } else if (c1=='\r') {
3352 } else if (c1=='\n') {
3353 if (crmode_f==CRLF) {
3354 (*o_crconv)(0,'\r');
3355 } else if (crmode_f==CR) {
3356 (*o_crconv)(0,'\r');
3360 } else if (c1!='\032' || crmode_f!=NL){
3366 Return value of fold_conv()
3368 \n add newline and output char
3369 \r add newline and output nothing
3372 1 (or else) normal output
3374 fold state in prev (previous character)
3376 >0x80 Japanese (X0208/X0201)
3381 This fold algorthm does not preserve heading space in a line.
3382 This is the main difference from fmt.
3385 #define char_size(c2,c1) (c2?2:1)
3394 if (c1== '\r' && !fold_preserve_f) {
3395 fold_state=0; /* ignore cr */
3396 }else if (c1== '\n'&&f_prev=='\r' && fold_preserve_f) {
3398 fold_state=0; /* ignore cr */
3399 } else if (c1== BS) {
3400 if (f_line>0) f_line--;
3402 } else if (c2==EOF && f_line != 0) { /* close open last line */
3404 } else if ((c1=='\n' && !fold_preserve_f)
3405 || ((c1=='\r'||(c1=='\n'&&f_prev!='\r'))
3406 && fold_preserve_f)) {
3408 if (fold_preserve_f) {
3412 } else if ((f_prev == c1 && !fold_preserve_f)
3413 || (f_prev == '\n' && fold_preserve_f)
3414 ) { /* duplicate newline */
3417 fold_state = '\n'; /* output two newline */
3423 if (f_prev&0x80) { /* Japanese? */
3425 fold_state = 0; /* ignore given single newline */
3426 } else if (f_prev==' ') {
3430 if (++f_line<=fold_len)
3434 fold_state = '\r'; /* fold and output nothing */
3438 } else if (c1=='\f') {
3443 fold_state = '\n'; /* output newline and clear */
3444 } else if ( (c2==0 && c1==' ')||
3445 (c2==0 && c1=='\t')||
3446 (c2=='!'&& c1=='!')) {
3447 /* X0208 kankaku or ascii space */
3448 if (f_prev == ' ') {
3449 fold_state = 0; /* remove duplicate spaces */
3452 if (++f_line<=fold_len)
3453 fold_state = ' '; /* output ASCII space only */
3455 f_prev = ' '; f_line = 0;
3456 fold_state = '\r'; /* fold and output nothing */
3460 prev0 = f_prev; /* we still need this one... , but almost done */
3462 if (c2 || c2==X0201)
3463 f_prev |= 0x80; /* this is Japanese */
3464 f_line += char_size(c2,c1);
3465 if (f_line<=fold_len) { /* normal case */
3468 if (f_line>fold_len+fold_margin) { /* too many kinsoku suspension */
3469 f_line = char_size(c2,c1);
3470 fold_state = '\n'; /* We can't wait, do fold now */
3471 } else if (c2==X0201) {
3472 /* simple kinsoku rules return 1 means no folding */
3473 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
3474 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
3475 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
3476 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
3477 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
3478 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
3479 else if (SPACE<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
3481 fold_state = '\n';/* add one new f_line before this character */
3484 fold_state = '\n';/* add one new f_line before this character */
3487 /* kinsoku point in ASCII */
3488 if ( c1==')'|| /* { [ ( */
3499 /* just after special */
3500 } else if (!is_alnum(prev0)) {
3501 f_line = char_size(c2,c1);
3503 } else if ((prev0==' ') || /* ignored new f_line */
3504 (prev0=='\n')|| /* ignored new f_line */
3505 (prev0&0x80)) { /* X0208 - ASCII */
3506 f_line = char_size(c2,c1);
3507 fold_state = '\n';/* add one new f_line before this character */
3509 fold_state = 1; /* default no fold in ASCII */
3513 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
3514 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
3515 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
3516 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
3517 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
3518 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
3519 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
3520 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
3521 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
3522 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
3523 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
3524 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
3525 /* default no fold in kinsoku */
3528 f_line = char_size(c2,c1);
3529 /* add one new f_line before this character */
3532 f_line = char_size(c2,c1);
3534 /* add one new f_line before this character */
3539 /* terminator process */
3540 switch(fold_state) {
3559 int z_prev2=0,z_prev1=0;
3566 /* if (c2) c1 &= 0x7f; assertion */
3568 if (x0201_f && z_prev2==X0201) { /* X0201 */
3569 if (c1==(0xde&0x7f)) { /*
\e$BByE@
\e(B */
3571 (*o_zconv)(dv[(z_prev1-SPACE)*2],dv[(z_prev1-SPACE)*2+1]);
3573 } else if (c1==(0xdf&0x7f)&&ev[(z_prev1-SPACE)*2]) { /*
\e$BH>ByE@
\e(B */
3575 (*o_zconv)(ev[(z_prev1-SPACE)*2],ev[(z_prev1-SPACE)*2+1]);
3579 (*o_zconv)(cv[(z_prev1-SPACE)*2],cv[(z_prev1-SPACE)*2+1]);
3588 if (x0201_f && c2==X0201) {
3589 if (dv[(c1-SPACE)*2]||ev[(c1-SPACE)*2]) {
3590 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
3591 z_prev1 = c1; z_prev2 = c2;
3594 (*o_zconv)(cv[(c1-SPACE)*2],cv[(c1-SPACE)*2+1]);
3599 /* JISX0208 Alphabet */
3600 if (alpha_f && c2 == 0x23 ) {
3602 } else if (alpha_f && c2 == 0x21 ) {
3603 /* JISX0208 Kigou */
3608 } else if (alpha_f&0x4) {
3613 } else if (0x20<c1 && c1<0x7f && fv[c1-0x20]) {
3619 case '>': entity = ">"; break;
3620 case '<': entity = "<"; break;
3621 case '\"': entity = """; break;
3622 case '&': entity = "&"; break;
3625 while (*entity) (*o_zconv)(0, *entity++);
3635 #define rot13(c) ( \
3637 (c <= 'M') ? (c + 13): \
3638 (c <= 'Z') ? (c - 13): \
3640 (c <= 'm') ? (c + 13): \
3641 (c <= 'z') ? (c - 13): \
3645 #define rot47(c) ( \
3647 ( c <= 'O' ) ? (c + 47) : \
3648 ( c <= '~' ) ? (c - 47) : \
3656 if (c2==0 || c2==X0201 || c2==ISO8859_1) {
3662 (*o_rot_conv)(c2,c1);
3669 if ((hira_f & 1) && c2==0x25 && 0x20<c1 && c1<0x74) {
3671 } else if ((hira_f & 2) && c2==0x24 && 0x20<c1 && c1<0x74) {
3674 (*o_hira_conv)(c2,c1);
3679 iso2022jp_check_conv(c2,c1)
3682 STATIC const int range[RANGE_NUM_MAX][2] = {
3705 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
3709 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
3714 for (i = 0; i < RANGE_NUM_MAX; i++) {
3715 start = range[i][0];
3718 if (c >= start && c <= end) {
3723 (*o_iso2022jp_check_conv)(c2,c1);
3727 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
3729 const unsigned char *mime_pattern[] = {
3730 (const unsigned char *)"\075?EUC-JP?B?",
3731 (const unsigned char *)"\075?SHIFT_JIS?B?",
3732 (const unsigned char *)"\075?ISO-8859-1?Q?",
3733 (const unsigned char *)"\075?ISO-8859-1?B?",
3734 (const unsigned char *)"\075?ISO-2022-JP?B?",
3735 (const unsigned char *)"\075?ISO-2022-JP?Q?",
3736 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3737 (const unsigned char *)"\075?UTF-8?B?",
3738 (const unsigned char *)"\075?UTF-8?Q?",
3740 (const unsigned char *)"\075?US-ASCII?Q?",
3745 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
3746 int (*mime_priority_func[])PROTO((int c2, int c1, int c0)) = {
3747 e_iconv, s_iconv, 0, 0, 0, 0,
3748 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3754 const int mime_encode[] = {
3755 JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201,
3756 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3763 const int mime_encode_method[] = {
3764 'B', 'B','Q', 'B', 'B', 'Q',
3765 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3773 #define MAXRECOVER 20
3775 /* I don't trust portablity of toupper */
3776 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
3777 #define nkf_isdigit(c) ('0'<=c && c<='9')
3778 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
3779 #define nkf_isblank(c) (c == SPACE || c == TAB)
3780 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == NL)
3781 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
3782 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
3787 if (i_getc!=mime_getc) {
3788 i_mgetc = i_getc; i_getc = mime_getc;
3789 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
3790 if(mime_f==STRICT_MIME) {
3791 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
3792 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
3798 unswitch_mime_getc()
3800 if(mime_f==STRICT_MIME) {
3801 i_mgetc = i_mgetc_buf;
3802 i_mungetc = i_mungetc_buf;
3805 i_ungetc = i_mungetc;
3806 if(mime_iconv_back)set_iconv(FALSE, mime_iconv_back);
3807 mime_iconv_back = NULL;
3811 mime_begin_strict(f)
3816 const unsigned char *p,*q;
3817 int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
3819 mime_decode_mode = FALSE;
3820 /* =? has been checked */
3822 p = mime_pattern[j];
3825 for(i=2;p[i]>' ';i++) { /* start at =? */
3826 if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) {
3827 /* pattern fails, try next one */
3829 while ((p = mime_pattern[++j])) {
3830 for(k=2;k<i;k++) /* assume length(p) > i */
3831 if (p[k]!=q[k]) break;
3832 if (k==i && nkf_toupper(c1)==p[k]) break;
3834 if (p) continue; /* found next one, continue */
3835 /* all fails, output from recovery buffer */
3843 mime_decode_mode = p[i-2];
3845 mime_iconv_back = iconv;
3846 set_iconv(FALSE, mime_priority_func[j]);
3847 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
3849 if (mime_decode_mode=='B') {
3850 mimebuf_f = unbuf_f;
3852 /* do MIME integrity check */
3853 return mime_integrity(f,mime_pattern[j]);
3865 /* we don't keep eof of Fifo, becase it contains ?= as
3866 a terminator. It was checked in mime_integrity. */
3867 return ((mimebuf_f)?
3868 (*i_mgetc_buf)(f):Fifo(mime_input++));
3872 mime_ungetc_buf(c,f)
3877 (*i_mungetc_buf)(c,f);
3879 Fifo(--mime_input)=c;
3890 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
3891 /* re-read and convert again from mime_buffer. */
3893 /* =? has been checked */
3895 Fifo(mime_last++)='='; Fifo(mime_last++)='?';
3896 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
3897 /* We accept any character type even if it is breaked by new lines */
3898 c1 = (*i_getc)(f); Fifo(mime_last++)= c1 ;
3899 if (c1=='\n'||c1==' '||c1=='\r'||
3900 c1=='-'||c1=='_'||is_alnum(c1) ) continue;
3902 /* Failed. But this could be another MIME preemble */
3910 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3911 if (!(++i<MAXRECOVER) || c1==EOF) break;
3912 if (c1=='b'||c1=='B') {
3913 mime_decode_mode = 'B';
3914 } else if (c1=='q'||c1=='Q') {
3915 mime_decode_mode = 'Q';
3919 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3920 if (!(++i<MAXRECOVER) || c1==EOF) break;
3922 mime_decode_mode = FALSE;
3928 if (!mime_decode_mode) {
3929 /* false MIME premble, restart from mime_buffer */
3930 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
3931 /* Since we are in MIME mode until buffer becomes empty, */
3932 /* we never go into mime_begin again for a while. */
3935 /* discard mime preemble, and goto MIME mode */
3937 /* do no MIME integrity check */
3938 return c1; /* used only for checking EOF */
3953 fprintf(stderr, "%s\n", str);
3959 set_input_codename (codename)
3964 strcmp(codename, "") != 0 &&
3965 strcmp(codename, input_codename) != 0)
3967 is_inputcode_mixed = TRUE;
3969 input_codename = codename;
3970 is_inputcode_set = TRUE;
3973 #if !defined(PERL_XS) && !defined(WIN32DLL)
3975 print_guessed_code (filename)
3978 char *codename = "BINARY";
3979 if (!is_inputcode_mixed) {
3980 if (strcmp(input_codename, "") == 0) {
3983 codename = input_codename;
3986 if (filename != NULL) printf("%s:", filename);
3987 printf("%s\n", codename);
3995 if (nkf_isdigit(x)) return x - '0';
3996 return nkf_toupper(x) - 'A' + 10;
4001 #ifdef ANSI_C_PROTOTYPE
4002 int hex_getc(int ch, FILE *f, int (*g)(FILE *f), int (*u)(int c, FILE *f))
4005 hex_getc(ch, f, g, u)
4018 if (!nkf_isxdigit(c2)){
4023 if (!nkf_isxdigit(c3)){
4028 return (hex2bin(c2) << 4) | hex2bin(c3);
4035 return hex_getc(':', f, i_cgetc, i_cungetc);
4043 return (*i_cungetc)(c, f);
4050 return hex_getc('%', f, i_ugetc, i_uungetc);
4058 return (*i_uungetc)(c, f);
4062 #ifdef NUMCHAR_OPTION
4067 int (*g)() = i_ngetc;
4068 int (*u)() = i_nungetc;
4079 if (buf[i] == 'x' || buf[i] == 'X'){
4080 for (j = 0; j < 5; j++){
4082 if (!nkf_isxdigit(buf[i])){
4089 c |= hex2bin(buf[i]);
4092 for (j = 0; j < 6; j++){
4096 if (!nkf_isdigit(buf[i])){
4103 c += hex2bin(buf[i]);
4109 return CLASS_UTF16 | c;
4119 numchar_ungetc(c, f)
4123 return (*i_nungetc)(c, f);
4127 #ifdef UNICODE_NORMALIZATION
4129 /* Normalization Form C */
4134 int (*g)() = i_nfc_getc;
4135 int (*u)() = i_nfc_ungetc;
4136 int i=0, j, k=1, lower, upper;
4138 const int *array = NULL;
4139 extern const struct normalization_pair normalization_table[];
4142 while (k > 0 && ((buf[i] & 0xc0) != 0x80)){
4143 lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
4144 while (upper >= lower) {
4145 j = (lower+upper) / 2;
4146 array = normalization_table[j].nfd;
4147 for (k=0; k < NORMALIZATION_TABLE_NFD_LENGTH && array[k]; k++){
4148 if (array[k] != buf[k]){
4149 array[k] < buf[k] ? (lower = j + 1) : (upper = j - 1);
4156 array = normalization_table[j].nfc;
4157 for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++)
4174 return (*i_nfc_ungetc)(c, f);
4176 #endif /* UNICODE_NORMALIZATION */
4183 int c1, c2, c3, c4, cc;
4184 int t1, t2, t3, t4, mode, exit_mode;
4188 int lwsp_size = 128;
4190 if (mime_top != mime_last) { /* Something is in FIFO */
4191 return Fifo(mime_top++);
4193 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
4194 mime_decode_mode=FALSE;
4195 unswitch_mime_getc();
4196 return (*i_getc)(f);
4199 if (mimebuf_f == FIXED_MIME)
4200 exit_mode = mime_decode_mode;
4203 if (mime_decode_mode == 'Q') {
4204 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4206 if (c1=='_') return ' ';
4207 if (c1<=' ' || DEL<=c1) {
4208 mime_decode_mode = exit_mode; /* prepare for quit */
4211 if (c1!='=' && c1!='?') {
4215 mime_decode_mode = exit_mode; /* prepare for quit */
4216 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
4217 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
4218 /* end Q encoding */
4219 input_mode = exit_mode;
4221 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
4222 if (lwsp_buf==NULL) {
4223 perror("can't malloc");
4226 while ((c1=(*i_getc)(f))!=EOF) {
4231 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4239 if ((c1=(*i_getc)(f))!=EOF && c1 == NL) {
4240 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4255 lwsp_buf[lwsp_count] = c1;
4256 if (lwsp_count++>lwsp_size){
4258 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4259 if (lwsp_buf_new==NULL) {
4262 perror("can't realloc");
4265 lwsp_buf = lwsp_buf_new;
4271 if (lwsp_count > 0) {
4272 if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) {
4276 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4277 i_ungetc(lwsp_buf[lwsp_count],f);
4285 if (c1=='='&&c2<' ') { /* this is soft wrap */
4286 while((c1 = (*i_mgetc)(f)) <=' ') {
4287 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4289 mime_decode_mode = 'Q'; /* still in MIME */
4290 goto restart_mime_q;
4293 mime_decode_mode = 'Q'; /* still in MIME */
4297 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
4298 if (c2<=' ') return c2;
4299 mime_decode_mode = 'Q'; /* still in MIME */
4300 #define hex(c) (('0'<=c&&c<='9')?(c-'0'):\
4301 ('A'<=c&&c<='F')?(c-'A'+10):('a'<=c&&c<='f')?(c-'a'+10):0)
4302 return ((hex(c2)<<4) + hex(c3));
4305 if (mime_decode_mode != 'B') {
4306 mime_decode_mode = FALSE;
4307 return (*i_mgetc)(f);
4311 /* Base64 encoding */
4313 MIME allows line break in the middle of
4314 Base64, but we are very pessimistic in decoding
4315 in unbuf mode because MIME encoded code may broken by
4316 less or editor's control sequence (such as ESC-[-K in unbuffered
4317 mode. ignore incomplete MIME.
4319 mode = mime_decode_mode;
4320 mime_decode_mode = exit_mode; /* prepare for quit */
4322 while ((c1 = (*i_mgetc)(f))<=' ') {
4327 if ((c2 = (*i_mgetc)(f))<=' ') {
4330 if (mime_f != STRICT_MIME) goto mime_c2_retry;
4331 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4334 if ((c1 == '?') && (c2 == '=')) {
4337 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
4338 if (lwsp_buf==NULL) {
4339 perror("can't malloc");
4342 while ((c1=(*i_getc)(f))!=EOF) {
4347 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4355 if ((c1=(*i_getc)(f))!=EOF) {
4359 } else if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4374 lwsp_buf[lwsp_count] = c1;
4375 if (lwsp_count++>lwsp_size){
4377 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4378 if (lwsp_buf_new==NULL) {
4381 perror("can't realloc");
4384 lwsp_buf = lwsp_buf_new;
4390 if (lwsp_count > 0) {
4391 if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) {
4395 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4396 i_ungetc(lwsp_buf[lwsp_count],f);
4405 if ((c3 = (*i_mgetc)(f))<=' ') {
4408 if (mime_f != STRICT_MIME) goto mime_c3_retry;
4409 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4413 if ((c4 = (*i_mgetc)(f))<=' ') {
4416 if (mime_f != STRICT_MIME) goto mime_c4_retry;
4417 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4421 mime_decode_mode = mode; /* still in MIME sigh... */
4423 /* BASE 64 decoding */
4425 t1 = 0x3f & base64decode(c1);
4426 t2 = 0x3f & base64decode(c2);
4427 t3 = 0x3f & base64decode(c3);
4428 t4 = 0x3f & base64decode(c4);
4429 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
4431 Fifo(mime_last++) = cc;
4432 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
4434 Fifo(mime_last++) = cc;
4435 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
4437 Fifo(mime_last++) = cc;
4442 return Fifo(mime_top++);
4450 Fifo(--mime_top) = c;
4457 const unsigned char *p;
4461 /* In buffered mode, read until =? or NL or buffer full
4463 mime_input = mime_top;
4464 mime_last = mime_top;
4466 while(*p) Fifo(mime_input++) = *p++;
4469 while((c=(*i_getc)(f))!=EOF) {
4470 if (((mime_input-mime_top)&MIME_BUF_MASK)==0) {
4471 break; /* buffer full */
4473 if (c=='=' && d=='?') {
4474 /* checked. skip header, start decode */
4475 Fifo(mime_input++) = c;
4476 /* mime_last_input = mime_input; */
4481 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
4483 /* Should we check length mod 4? */
4484 Fifo(mime_input++) = c;
4487 /* In case of Incomplete MIME, no MIME decode */
4488 Fifo(mime_input++) = c;
4489 mime_last = mime_input; /* point undecoded buffer */
4490 mime_decode_mode = 1; /* no decode on Fifo last in mime_getc */
4491 switch_mime_getc(); /* anyway we need buffered getc */
4502 i = c - 'A'; /* A..Z 0-25 */
4504 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
4506 } else if (c > '/') {
4507 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
4508 } else if (c == '+') {
4509 i = '>' /* 62 */ ; /* + 62 */
4511 i = '?' /* 63 */ ; /* / 63 */
4516 STATIC const char basis_64[] =
4517 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
4520 #define MIMEOUT_BUF_LENGTH (60)
4521 char mimeout_buf[MIMEOUT_BUF_LENGTH+1];
4522 int mimeout_buf_count = 0;
4523 int mimeout_preserve_space = 0;
4524 #define itoh4(c) (c>=10?c+'A'-10:c+'0')
4530 const unsigned char *p;
4533 p = mime_pattern[0];
4534 for(i=0;mime_encode[i];i++) {
4535 if (mode == mime_encode[i]) {
4536 p = mime_pattern[i];
4540 mimeout_mode = mime_encode_method[i];
4543 if (base64_count>45) {
4544 if (mimeout_buf_count>0 && nkf_isblank(mimeout_buf[i])){
4545 (*o_mputc)(mimeout_buf[i]);
4551 if (!mimeout_preserve_space && mimeout_buf_count>0
4552 && (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
4553 || mimeout_buf[i]==CR || mimeout_buf[i]==NL )) {
4557 if (!mimeout_preserve_space) {
4558 for (;i<mimeout_buf_count;i++) {
4559 if (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
4560 || mimeout_buf[i]==CR || mimeout_buf[i]==NL ) {
4561 (*o_mputc)(mimeout_buf[i]);
4568 mimeout_preserve_space = FALSE;
4574 j = mimeout_buf_count;
4575 mimeout_buf_count = 0;
4577 mime_putc(mimeout_buf[i]);
4593 switch(mimeout_mode) {
4598 (*o_mputc)(basis_64[((b64c & 0x3)<< 4)]);
4604 (*o_mputc)(basis_64[((b64c & 0xF) << 2)]);
4610 if (mimeout_f!=FIXED_MIME) {
4612 } else if (mimeout_mode != 'Q')
4621 switch(mimeout_mode) {
4626 } else if (c==CR||c==NL) {
4629 } else if(c<SPACE||c=='='||c=='?'||c=='_'||DEL<=c) {
4631 (*o_mputc)(itoh4(((c>>4)&0xf)));
4632 (*o_mputc)(itoh4((c&0xf)));
4641 (*o_mputc)(basis_64[c>>2]);
4646 (*o_mputc)(basis_64[((b64c & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
4652 (*o_mputc)(basis_64[((b64c & 0xF) << 2) | ((c & 0xC0) >>6)]);
4653 (*o_mputc)(basis_64[c & 0x3F]);
4664 int mime_lastchar2, mime_lastchar1;
4666 void mime_prechar(c2, c1)
4671 if (base64_count + mimeout_buf_count/3*4> 66){
4672 (*o_base64conv)(EOF,0);
4673 (*o_base64conv)(0,NL);
4674 (*o_base64conv)(0,SPACE);
4676 }/*else if (mime_lastchar2){
4677 if (c1 <=DEL && !nkf_isspace(c1)){
4678 (*o_base64conv)(0,SPACE);
4682 if (c2 && mime_lastchar2 == 0
4683 && mime_lastchar1 && !nkf_isspace(mime_lastchar1)){
4684 (*o_base64conv)(0,SPACE);
4687 mime_lastchar2 = c2;
4688 mime_lastchar1 = c1;
4699 if (mimeout_f == FIXED_MIME){
4700 if (mimeout_mode == 'Q'){
4701 if (base64_count > 71){
4702 if (c!=CR && c!=NL) {
4709 if (base64_count > 71){
4714 if (c == EOF) { /* c==EOF */
4718 if (c != EOF) { /* c==EOF */
4724 /* mimeout_f != FIXED_MIME */
4726 if (c == EOF) { /* c==EOF */
4727 j = mimeout_buf_count;
4728 mimeout_buf_count = 0;
4731 /*if (nkf_isspace(mimeout_buf[i])){
4734 mimeout_addchar(mimeout_buf[i]);
4738 (*o_mputc)(mimeout_buf[i]);
4744 if (mimeout_mode=='Q') {
4745 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
4757 if (mimeout_buf_count > 0){
4758 lastchar = mimeout_buf[mimeout_buf_count - 1];
4763 if (!mimeout_mode) {
4764 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1)) {
4765 if (nkf_isspace(c)) {
4766 if (c==CR || c==NL) {
4769 for (i=0;i<mimeout_buf_count;i++) {
4770 (*o_mputc)(mimeout_buf[i]);
4771 if (mimeout_buf[i] == CR || mimeout_buf[i] == NL){
4778 mimeout_buf_count = 1;
4780 if (base64_count > 1
4781 && base64_count + mimeout_buf_count > 76){
4784 if (!nkf_isspace(mimeout_buf[0])){
4789 mimeout_buf[mimeout_buf_count++] = c;
4790 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
4791 open_mime(output_mode);
4796 if (lastchar==CR || lastchar == NL){
4797 for (i=0;i<mimeout_buf_count;i++) {
4798 (*o_mputc)(mimeout_buf[i]);
4801 mimeout_buf_count = 0;
4803 if (lastchar==SPACE) {
4804 for (i=0;i<mimeout_buf_count-1;i++) {
4805 (*o_mputc)(mimeout_buf[i]);
4808 mimeout_buf[0] = SPACE;
4809 mimeout_buf_count = 1;
4811 open_mime(output_mode);
4814 /* mimeout_mode == 'B', 1, 2 */
4815 if ( c<=DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
4816 if (lastchar == CR || lastchar == NL){
4817 if (nkf_isblank(c)) {
4818 for (i=0;i<mimeout_buf_count;i++) {
4819 mimeout_addchar(mimeout_buf[i]);
4821 mimeout_buf_count = 0;
4822 } else if (SPACE<c && c<DEL) {
4824 for (i=0;i<mimeout_buf_count;i++) {
4825 (*o_mputc)(mimeout_buf[i]);
4828 mimeout_buf_count = 0;
4831 if (c==SPACE || c==TAB || c==CR || c==NL) {
4832 for (i=0;i<mimeout_buf_count;i++) {
4833 if (SPACE<mimeout_buf[i] && mimeout_buf[i]<DEL) {
4835 for (i=0;i<mimeout_buf_count;i++) {
4836 (*o_mputc)(mimeout_buf[i]);
4839 mimeout_buf_count = 0;
4842 mimeout_buf[mimeout_buf_count++] = c;
4843 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
4845 for (i=0;i<mimeout_buf_count;i++) {
4846 (*o_mputc)(mimeout_buf[i]);
4849 mimeout_buf_count = 0;
4853 if (mimeout_buf_count>0 && SPACE<c && c!='=') {
4854 mimeout_buf[mimeout_buf_count++] = c;
4855 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
4856 j = mimeout_buf_count;
4857 mimeout_buf_count = 0;
4859 mimeout_addchar(mimeout_buf[i]);
4866 if (mimeout_buf_count>0) {
4867 j = mimeout_buf_count;
4868 mimeout_buf_count = 0;
4870 if (mimeout_buf[i]==CR || mimeout_buf[i]==NL)
4872 mimeout_addchar(mimeout_buf[i]);
4878 (*o_mputc)(mimeout_buf[i]);
4880 open_mime(output_mode);
4887 #if defined(PERL_XS) || defined(WIN32DLL)
4892 struct input_code *p = input_code_list;
4905 mime_f = STRICT_MIME;
4906 mime_decode_f = FALSE;
4911 #if defined(MSDOS) || defined(__OS2__)
4916 iso2022jp_f = FALSE;
4917 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
4918 internal_unicode_f = FALSE;
4920 #ifdef UTF8_OUTPUT_ENABLE
4923 ms_ucs_map_f = FALSE;
4924 encode_fallback = NULL;
4925 unicode_subchar = '?';
4927 #ifdef UNICODE_NORMALIZATION
4940 is_inputcode_mixed = FALSE;
4941 is_inputcode_set = FALSE;
4945 #ifdef SHIFTJIS_CP932
4954 for (i = 0; i < 256; i++){
4955 prefix_table[i] = 0;
4958 #ifdef UTF8_INPUT_ENABLE
4959 utf16_mode = UTF16BE_INPUT;
4961 mimeout_buf_count = 0;
4966 fold_preserve_f = FALSE;
4969 kanji_intro = DEFAULT_J;
4970 ascii_intro = DEFAULT_R;
4971 fold_margin = FOLD_MARGIN;
4972 output_conv = DEFAULT_CONV;
4973 oconv = DEFAULT_CONV;
4974 o_zconv = no_connection;
4975 o_fconv = no_connection;
4976 o_crconv = no_connection;
4977 o_rot_conv = no_connection;
4978 o_hira_conv = no_connection;
4979 o_base64conv = no_connection;
4980 o_iso2022jp_check_conv = no_connection;
4983 i_ungetc = std_ungetc;
4985 i_bungetc = std_ungetc;
4988 i_mungetc = std_ungetc;
4989 i_mgetc_buf = std_getc;
4990 i_mungetc_buf = std_ungetc;
4991 output_mode = ASCII;
4994 mime_decode_mode = FALSE;
5000 z_prev2=0,z_prev1=0;
5002 iconv_for_check = 0;
5004 input_codename = "";
5012 no_connection(c2,c1)
5015 no_connection2(c2,c1,0);
5019 no_connection2(c2,c1,c0)
5022 fprintf(stderr,"nkf internal module connection failure.\n");
5024 return 0; /* LINT */
5029 #define fprintf dllprintf
5034 fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
5035 fprintf(stderr,"Flags:\n");
5036 fprintf(stderr,"b,u Output is buffered (DEFAULT),Output is unbuffered\n");
5037 #ifdef DEFAULT_CODE_SJIS
5038 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC), UTF-8N\n");
5040 #ifdef DEFAULT_CODE_JIS
5041 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC), UTF-8N\n");
5043 #ifdef DEFAULT_CODE_EUC
5044 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT), UTF-8N\n");
5046 #ifdef DEFAULT_CODE_UTF8
5047 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC), UTF-8N (DEFAULT)\n");
5049 #ifdef UTF8_OUTPUT_ENABLE
5050 fprintf(stderr," After 'w' you can add more options. (80?|16((B|L)0?)?) \n");
5052 fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC), UTF-8\n");
5053 #ifdef UTF8_INPUT_ENABLE
5054 fprintf(stderr," After 'W' you can add more options. (8|16(B|L)?) \n");
5056 fprintf(stderr,"t no conversion\n");
5057 fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n");
5058 fprintf(stderr,"r {de/en}crypt ROT13/47\n");
5059 fprintf(stderr,"h 1 katakana->hiragana, 2 hiragana->katakana, 3 both\n");
5060 fprintf(stderr,"v Show this usage. V: show version\n");
5061 fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
5062 fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
5063 fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
5064 fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
5065 fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces,\n");
5066 fprintf(stderr," 3: Convert HTML Entity\n");
5067 fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
5068 fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
5070 fprintf(stderr,"T Text mode output\n");
5072 fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
5073 fprintf(stderr,"d,c Delete \\r in line feed and \\032, Add \\r in line feed\n");
5074 fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n");
5075 fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
5076 fprintf(stderr,"long name options\n");
5077 fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n");
5078 fprintf(stderr," --jis,--euc,--sjis,--utf8,--utf16,--mime,--base64 convert for the code\n");
5079 fprintf(stderr," --hiragana, --katakana Hiragana/Katakana Conversion\n");
5080 fprintf(stderr," --x0212 Convert JISX0212\n");
5081 fprintf(stderr," --cp932, --no-cp932 CP932 compatibility\n");
5082 fprintf(stderr," --prefix= Insert escape before troublesome characters of Shift_JIS\n");
5084 fprintf(stderr," --cap-input, --url-input Convert hex after ':' or '%%'\n");
5086 #ifdef NUMCHAR_OPTION
5087 fprintf(stderr," --numchar-input Convert Unicode Character Reference\n");
5089 #ifdef UNICODE_NORMALIZATION
5090 fprintf(stderr," --utf8mac-input UTF-8-MAC input\n");
5092 #ifdef UTF8_OUTPUT_ENABLE
5093 fprintf(stderr," --ms-ucs-map Microsoft UCS Mapping Compatible\n");
5096 fprintf(stderr," --overwrite Overwrite original listed files by filtered result\n");
5098 fprintf(stderr," -g, --guess Guess the input code\n");
5099 fprintf(stderr," --help,--version\n");
5106 fprintf(stderr,"Network Kanji Filter Version %s (%s) "
5107 #if defined(MSDOS) && !defined(__WIN32__) && !defined(__WIN16__)
5110 #if defined(MSDOS) && defined(__WIN16__)
5113 #if defined(MSDOS) && defined(__WIN32__)
5119 ,NKF_VERSION,NKF_RELEASE_DATE);
5120 fprintf(stderr,"\n%s\n",CopyRight);
5125 **
\e$B%Q%C%A@):n<T
\e(B
5126 ** void@merope.pleiades.or.jp (Kusakabe Youichi)
5127 ** NIDE Naoyuki <nide@ics.nara-wu.ac.jp>
5128 ** ohta@src.ricoh.co.jp (Junn Ohta)
5129 ** inouet@strl.nhk.or.jp (Tomoyuki Inoue)
5130 ** kiri@pulser.win.or.jp (Tetsuaki Kiriyama)
5131 ** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp>
5132 ** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe)
5133 ** kono@ie.u-ryukyu.ac.jp (Shinji Kono)
5134 ** GHG00637@nifty-serve.or.jp (COW)