1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
5 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
6 ** Copyright (C) 1996,1998
8 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
9 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
10 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
11 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
13 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
14 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
15 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
16 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
17 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
18 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
20 ** Everyone is permitted to do anything on this program
21 ** including copying, modifying, improving,
22 ** as long as you don't try to pretend that you wrote it.
23 ** i.e., the above copyright notice has to appear in all copies.
24 ** Binary distribution requires original version messages.
25 ** You don't have to ask before copying, redistribution or publishing.
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
29 /***********************************************************************
30 ** UTF-8
\e$B%5%]!<%H$K$D$$$F
\e(B
31 **
\e$B=>Mh$N
\e(B nkf
\e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9
\e(B
32 ** nkf -e
\e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G
\e(B UTF-8
\e$B$HH=Dj$5$l$l$P!"
\e(B
33 **
\e$B$=$N$^$^
\e(B euc-jp
\e$B$KJQ49$5$l$^$9
\e(B
35 **
\e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#
\e(B
36 ** (
\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O
\e(B)
38 **
\e$B2?$+LdBj$r8+$D$1$?$i!"
\e(B
39 ** E-Mail: furukawa@tcp-ip.or.jp
40 **
\e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#
\e(B
41 ***********************************************************************/
42 /* $Id: nkf.c,v 1.44 2004/11/21 00:57:24 rei_furukawa Exp $ */
43 #define NKF_VERSION "2.0.4"
44 #define NKF_RELEASE_DATE "2004-11-15"
47 static char *CopyRight =
48 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2004 Kono, Furukawa";
55 ** USAGE: nkf [flags] [file]
58 ** b Output is buffered (DEFAULT)
59 ** u Output is unbuffered
63 ** j Outout code is JIS 7 bit (DEFAULT SELECT)
64 ** s Output code is MS Kanji (DEFAULT SELECT)
65 ** e Output code is AT&T JIS (DEFAULT SELECT)
66 ** w Output code is AT&T JIS (DEFAULT SELECT)
67 ** l Output code is JIS 7bit and ISO8859-1 Latin-1
69 ** m MIME conversion for ISO-2022-JP
70 ** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
71 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
72 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
73 ** M MIME output conversion
75 ** r {de/en}crypt ROT13/47
79 ** T Text mode output (for MS-DOS)
81 ** x Do not convert X0201 kana into X0208
82 ** Z Convert X0208 alphabet to ASCII
87 ** B try to fix broken JIS, missing Escape
88 ** B[1-9] broken level
90 ** O Output to 'nkf.out' file or last file name
91 ** d Delete \r in line feed
92 ** c Add \r in line feed
93 ** -- other long option
94 ** -- ignore following option (don't use with -O )
98 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__)) && !defined(MSDOS)
100 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
115 #if defined(MSDOS) || defined(__OS2__)
122 #define setbinmode(fp) fsetbin(fp)
123 #else /* Microsoft C, Turbo C */
124 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
126 #else /* UNIX,OS/2 */
127 #define setbinmode(fp)
130 #ifdef _IOFBF /* SysV and MSDOS, Windows */
131 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
133 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
136 /*Borland C++ 4.5 EasyWin*/
137 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
146 /* added by satoru@isoternet.org */
148 #include <sys/stat.h>
149 #ifndef MSDOS /* UNIX, OS/2 */
153 #if defined(_MSC_VER) || defined(__MINGW32__) /* VC++, MinGW */
154 #include <sys/utime.h>
155 #elif defined(__TURBOC__) /* BCC */
157 #elif defined(LSI_C) /* LSI C */
169 /* state of output_mode and input_mode
186 /* Input Assumption */
190 #define LATIN1_INPUT 6
192 #define STRICT_MIME 8
197 #define JAPANESE_EUC 10
201 #define UTF8_INPUT 13
202 #define UTF16LE_INPUT 14
203 #define UTF16BE_INPUT 15
223 #define is_alnum(c) \
224 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
226 #define HOLD_SIZE 1024
227 #define IOBUF_SIZE 16384
229 #define DEFAULT_J 'B'
230 #define DEFAULT_R 'B'
232 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
233 #define SJ6394 0x0161 /* 63 - 94 ku offset */
235 #define RANGE_NUM_MAX 18
240 #if defined( UTF8_OUTPUT_ENABLE ) || defined( UTF8_INPUT_ENABLE )
241 #define sizeof_euc_utf8 94
242 #define sizeof_euc_to_utf8_1byte 94
243 #define sizeof_euc_to_utf8_2bytes 94
244 #define sizeof_utf8_to_euc_C2 64
245 #define sizeof_utf8_to_euc_E5B8 64
246 #define sizeof_utf8_to_euc_2bytes 112
247 #define sizeof_utf8_to_euc_3bytes 112
250 /* MIME preprocessor */
253 #ifdef EASYWIN /*Easy Win */
254 extern POINT _BufferSize;
257 /* function prototype */
259 #ifdef ANSI_C_PROTOTYPE
261 #define STATIC static
273 void (*status_func)PROTO((struct input_code *, int));
274 int (*iconv_func)PROTO((int c2, int c1, int c0));
278 STATIC char *input_codename = "";
280 STATIC int noconvert PROTO((FILE *f));
281 STATIC int kanji_convert PROTO((FILE *f));
282 STATIC int h_conv PROTO((FILE *f,int c2,int c1));
283 STATIC int push_hold_buf PROTO((int c2));
284 STATIC void set_iconv PROTO((int f, int (*iconv_func)()));
285 STATIC int s_iconv PROTO((int c2,int c1,int c0));
286 STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
287 STATIC int e_iconv PROTO((int c2,int c1,int c0));
288 #ifdef UTF8_INPUT_ENABLE
289 STATIC int w2e_conv PROTO((int c2,int c1,int c0,int *p2,int *p1));
290 STATIC int w_iconv PROTO((int c2,int c1,int c0));
291 STATIC int w_iconv16 PROTO((int c2,int c1,int c0));
292 STATIC int w_iconv_common PROTO((int c1,int c0,unsigned short **pp,int psize,int *p2,int *p1));
293 STATIC int ww16_conv PROTO((int c2, int c1, int c0));
295 #ifdef UTF8_OUTPUT_ENABLE
296 STATIC int e2w_conv PROTO((int c2,int c1));
297 STATIC void w_oconv PROTO((int c2,int c1));
298 STATIC void w_oconv16 PROTO((int c2,int c1));
300 STATIC void e_oconv PROTO((int c2,int c1));
301 STATIC void e2s_conv PROTO((int c2, int c1, int *p2, int *p1));
302 STATIC void s_oconv PROTO((int c2,int c1));
303 STATIC void j_oconv PROTO((int c2,int c1));
304 STATIC void fold_conv PROTO((int c2,int c1));
305 STATIC void cr_conv PROTO((int c2,int c1));
306 STATIC void z_conv PROTO((int c2,int c1));
307 STATIC void rot_conv PROTO((int c2,int c1));
308 STATIC void hira_conv PROTO((int c2,int c1));
309 STATIC void base64_conv PROTO((int c2,int c1));
310 STATIC void iso2022jp_check_conv PROTO((int c2,int c1));
311 STATIC void no_connection PROTO((int c2,int c1));
312 STATIC int no_connection2 PROTO((int c2,int c1,int c0));
314 STATIC void code_score PROTO((struct input_code *ptr));
315 STATIC void code_status PROTO((int c));
317 STATIC void std_putc PROTO((int c));
318 STATIC int std_getc PROTO((FILE *f));
319 STATIC int std_ungetc PROTO((int c,FILE *f));
321 STATIC int broken_getc PROTO((FILE *f));
322 STATIC int broken_ungetc PROTO((int c,FILE *f));
324 STATIC int mime_begin PROTO((FILE *f));
325 STATIC int mime_getc PROTO((FILE *f));
326 STATIC int mime_ungetc PROTO((int c,FILE *f));
328 STATIC int mime_begin_strict PROTO((FILE *f));
329 STATIC int mime_getc_buf PROTO((FILE *f));
330 STATIC int mime_ungetc_buf PROTO((int c,FILE *f));
331 STATIC int mime_integrity PROTO((FILE *f,unsigned char *p));
333 STATIC int base64decode PROTO((int c));
334 STATIC void mime_putc PROTO((int c));
335 STATIC void open_mime PROTO((int c));
336 STATIC void close_mime PROTO(());
337 STATIC void usage PROTO(());
338 STATIC void version PROTO(());
339 STATIC void options PROTO((unsigned char *c));
341 STATIC void reinit PROTO(());
346 static unsigned char stdibuf[IOBUF_SIZE];
347 static unsigned char stdobuf[IOBUF_SIZE];
348 static unsigned char hold_buf[HOLD_SIZE*2];
349 static int hold_count;
351 /* MIME preprocessor fifo */
353 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
354 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
355 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
356 static unsigned char mime_buf[MIME_BUF_SIZE];
357 static unsigned int mime_top = 0;
358 static unsigned int mime_last = 0; /* decoded */
359 static unsigned int mime_input = 0; /* undecoded */
362 static int unbuf_f = FALSE;
363 static int estab_f = FALSE;
364 static int nop_f = FALSE;
365 static int binmode_f = TRUE; /* binary mode */
366 static int rot_f = FALSE; /* rot14/43 mode */
367 static int hira_f = FALSE; /* hira/kata henkan */
368 static int input_f = FALSE; /* non fixed input code */
369 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
370 static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
371 static int mimebuf_f = FALSE; /* MIME buffered input */
372 static int broken_f = FALSE; /* convert ESC-less broken JIS */
373 static int iso8859_f = FALSE; /* ISO8859 through */
374 static int mimeout_f = FALSE; /* base64 mode */
375 #if defined(MSDOS) || defined(__OS2__)
376 static int x0201_f = TRUE; /* Assume JISX0201 kana */
378 static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
380 static int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
381 #ifdef UTF8_OUTPUT_ENABLE
382 static int unicode_bom_f= 0; /* Output Unicode BOM */
383 static int w_oconv16_LE = 0; /* utf-16 little endian */
384 static int ms_ucs_map_f = FALSE; /* Microsoft UCS Mapping Compatible */
388 #ifdef NUMCHAR_OPTION
390 #define CLASS_MASK 0x0f000000
391 #define CLASS_UTF16 0x01000000
395 static int cap_f = FALSE;
396 static int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */
397 static int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc;
398 STATIC int cap_getc PROTO((FILE *f));
399 STATIC int cap_ungetc PROTO((int c,FILE *f));
401 static int url_f = FALSE;
402 static int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
403 static int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc;
404 STATIC int url_getc PROTO((FILE *f));
405 STATIC int url_ungetc PROTO((int c,FILE *f));
407 static int numchar_f = FALSE;
408 static int (*i_ngetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
409 static int (*i_nungetc)PROTO((int c ,FILE *f)) = std_ungetc;
410 STATIC int numchar_getc PROTO((FILE *f));
411 STATIC int numchar_ungetc PROTO((int c,FILE *f));
415 static int noout_f = FALSE;
416 STATIC void no_putc PROTO((int c));
417 static int debug_f = FALSE;
418 STATIC void debug PROTO((char *str));
421 static int guess_f = FALSE;
422 STATIC void print_guessed_code PROTO((char *filename));
423 STATIC void set_input_codename PROTO((char *codename));
424 static int is_inputcode_mixed = FALSE;
425 static int is_inputcode_set = FALSE;
428 static int exec_f = 0;
431 #ifdef SHIFTJIS_CP932
432 STATIC int cp932_f = TRUE;
433 #define CP932_TABLE_BEGIN (0xfa)
434 #define CP932_TABLE_END (0xfc)
436 STATIC int cp932inv_f = FALSE;
437 #define CP932INV_TABLE_BEGIN (0xed)
438 #define CP932INV_TABLE_END (0xee)
440 #endif /* SHIFTJIS_CP932 */
442 STATIC unsigned char prefix_table[256];
444 STATIC void e_status PROTO((struct input_code *, int));
445 STATIC void s_status PROTO((struct input_code *, int));
447 #ifdef UTF8_INPUT_ENABLE
448 STATIC void w_status PROTO((struct input_code *, int));
449 STATIC void w16_status PROTO((struct input_code *, int));
450 static int utf16_mode = UTF16LE_INPUT;
453 struct input_code input_code_list[] = {
454 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
455 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
456 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
457 {"UTF-16", 0, 0, 0, {0, 0, 0}, w16_status, w_iconv16, 0},
461 static int mimeout_mode = 0;
462 static int base64_count = 0;
464 /* X0208 -> ASCII converter */
467 static int f_line = 0; /* chars in line */
468 static int f_prev = 0;
469 static int fold_preserve_f = FALSE; /* preserve new lines */
470 static int fold_f = FALSE;
471 static int fold_len = 0;
474 static unsigned char kanji_intro = DEFAULT_J;
475 static unsigned char ascii_intro = DEFAULT_R;
479 #define FOLD_MARGIN 10
480 #define DEFAULT_FOLD 60
482 static int fold_margin = FOLD_MARGIN;
486 #ifdef DEFAULT_CODE_JIS
487 # define DEFAULT_CONV j_oconv
489 #ifdef DEFAULT_CODE_SJIS
490 # define DEFAULT_CONV s_oconv
492 #ifdef DEFAULT_CODE_EUC
493 # define DEFAULT_CONV e_oconv
495 #ifdef DEFAULT_CODE_UTF8
496 # define DEFAULT_CONV w_oconv
499 /* process default */
500 static void (*output_conv)PROTO((int c2,int c1)) = DEFAULT_CONV;
502 static void (*oconv)PROTO((int c2,int c1)) = no_connection;
503 /* s_iconv or oconv */
504 static int (*iconv)PROTO((int c2,int c1,int c0)) = no_connection2;
506 static void (*o_zconv)PROTO((int c2,int c1)) = no_connection;
507 static void (*o_fconv)PROTO((int c2,int c1)) = no_connection;
508 static void (*o_crconv)PROTO((int c2,int c1)) = no_connection;
509 static void (*o_rot_conv)PROTO((int c2,int c1)) = no_connection;
510 static void (*o_hira_conv)PROTO((int c2,int c1)) = no_connection;
511 static void (*o_base64conv)PROTO((int c2,int c1)) = no_connection;
512 static void (*o_iso2022jp_check_conv)PROTO((int c2,int c1)) = no_connection;
514 /* static redirections */
516 static void (*o_putc)PROTO((int c)) = std_putc;
518 static int (*i_getc)PROTO((FILE *f)) = std_getc; /* general input */
519 static int (*i_ungetc)PROTO((int c,FILE *f)) =std_ungetc;
521 static int (*i_bgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
522 static int (*i_bungetc)PROTO((int c ,FILE *f)) = std_ungetc;
524 static void (*o_mputc)PROTO((int c)) = std_putc ; /* output of mputc */
526 static int (*i_mgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
527 static int (*i_mungetc)PROTO((int c ,FILE *f)) = std_ungetc;
529 /* for strict mime */
530 static int (*i_mgetc_buf)PROTO((FILE *)) = std_getc; /* input of mgetc_buf */
531 static int (*i_mungetc_buf)PROTO((int c,FILE *f)) = std_ungetc;
534 static int output_mode = ASCII, /* output kanji mode */
535 input_mode = ASCII, /* input kanji mode */
536 shift_mode = FALSE; /* TRUE shift out, or X0201 */
537 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
539 /* X0201 / X0208 conversion tables */
541 /* X0201 kana conversion table */
544 unsigned char cv[]= {
545 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
546 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
547 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
548 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
549 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
550 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
551 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
552 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
553 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
554 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
555 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
556 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
557 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
558 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
559 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
560 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
564 /* X0201 kana conversion table for daguten */
567 unsigned char dv[]= {
568 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
569 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
570 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
571 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
572 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
573 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
574 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
575 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
576 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
577 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
578 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
579 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
580 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
581 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
582 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
583 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
586 /* X0201 kana conversion table for han-daguten */
589 unsigned char ev[]= {
590 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
591 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
592 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
593 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
594 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
595 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
596 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
597 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
598 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
601 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
602 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
603 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
604 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
605 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609 /* X0208 kigou conversion table */
610 /* 0x8140 - 0x819e */
612 unsigned char fv[] = {
614 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
615 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
616 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
617 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
618 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
619 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
620 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
621 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
622 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
623 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
624 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
625 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
631 static int file_out = FALSE;
633 static int overwrite = FALSE;
636 static int crmode_f = 0; /* CR, NL, CRLF */
637 #ifdef EASYWIN /*Easy Win */
638 static int end_check;
653 #ifdef EASYWIN /*Easy Win */
654 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
657 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
658 cp = (unsigned char *)*argv;
663 if (pipe(fds) < 0 || (pid = fork()) < 0){
674 execvp(argv[1], &argv[1]);
688 if(x0201_f == WISH_TRUE)
689 x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
691 if (binmode_f == TRUE)
693 if (freopen("","wb",stdout) == NULL)
700 setbuf(stdout, (char *) NULL);
702 setvbuffer(stdout, stdobuf, IOBUF_SIZE);
705 if (binmode_f == TRUE)
707 if (freopen("","rb",stdin) == NULL) return (-1);
711 setvbuffer(stdin, stdibuf, IOBUF_SIZE);
715 kanji_convert(stdin);
716 if (guess_f) print_guessed_code(NULL);
721 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
730 /* reopen file for stdout */
731 if (file_out == TRUE) {
734 outfname = malloc(strlen(origfname)
735 + strlen(".nkftmpXXXXXX")
741 strcpy(outfname, origfname);
745 for (i = strlen(outfname); i; --i){
746 if (outfname[i - 1] == '/'
747 || outfname[i - 1] == '\\'){
753 strcat(outfname, "ntXXXXXX");
755 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC,
758 strcat(outfname, ".nkftmpXXXXXX");
759 fd = mkstemp(outfname);
762 || (fd_backup = dup(fileno(stdout))) < 0
763 || dup2(fd, fileno(stdout)) < 0
774 outfname = "nkf.out";
777 if(freopen(outfname, "w", stdout) == NULL) {
781 if (binmode_f == TRUE) {
783 if (freopen("","wb",stdout) == NULL)
790 if (binmode_f == TRUE)
792 if (freopen("","rb",fin) == NULL)
797 setvbuffer(fin, stdibuf, IOBUF_SIZE);
801 char *filename = NULL;
803 if (nfiles > 1) filename = origfname;
804 if (guess_f) print_guessed_code(filename);
810 #if defined(MSDOS) && !defined(__MINGW32__)
818 if (dup2(fd_backup, fileno(stdout)) < 0){
821 if (stat(origfname, &sb)) {
822 fprintf(stderr, "Can't stat %s\n", origfname);
824 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
825 if (chmod(outfname, sb.st_mode)) {
826 fprintf(stderr, "Can't set permission %s\n", outfname);
829 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
830 #if defined(MSDOS) && !defined(__MINGW32__)
831 tb[0] = tb[1] = sb.st_mtime;
832 if (utime(outfname, tb)) {
833 fprintf(stderr, "Can't set timestamp %s\n", outfname);
836 tb.actime = sb.st_atime;
837 tb.modtime = sb.st_mtime;
838 if (utime(outfname, &tb)) {
839 fprintf(stderr, "Can't set timestamp %s\n", outfname);
843 if (unlink(origfname)){
847 if (rename(outfname, origfname)) {
849 fprintf(stderr, "Can't rename %s to %s\n",
850 outfname, origfname);
858 #ifdef EASYWIN /*Easy Win */
859 if (file_out == FALSE)
860 scanf("%d",&end_check);
863 #else /* for Other OS */
864 if (file_out == TRUE)
894 {"katakana-hiragana","h3"},
896 #ifdef UTF8_OUTPUT_ENABLE
901 #ifdef UTF8_INPUT_ENABLE
903 {"utf16-input", "W16"},
912 #ifdef NUMCHAR_OPTION
913 {"numchar-input", ""},
919 #ifdef SHIFTJIS_CP932
930 static int option_mode = 0;
949 case '-': /* literal options */
950 if (!*cp) { /* ignore the rest of arguments */
954 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
956 p = (unsigned char *)long_option[i].name;
957 for (j=0;*p && (*p != '=') && *p == cp[j];p++, j++);
958 if (!*p || *p == cp[j]){
965 cp = (unsigned char *)long_option[i].alias;
968 if (strcmp(long_option[i].name, "overwrite") == 0){
975 if (strcmp(long_option[i].name, "cap-input") == 0){
979 if (strcmp(long_option[i].name, "url-input") == 0){
984 #ifdef NUMCHAR_OPTION
985 if (strcmp(long_option[i].name, "numchar-input") == 0){
991 if (strcmp(long_option[i].name, "no-output") == 0){
995 if (strcmp(long_option[i].name, "debug") == 0){
1000 #ifdef SHIFTJIS_CP932
1001 if (strcmp(long_option[i].name, "no-cp932") == 0){
1005 if (strcmp(long_option[i].name, "cp932inv") == 0){
1011 if (strcmp(long_option[i].name, "exec-in") == 0){
1015 if (strcmp(long_option[i].name, "exec-out") == 0){
1020 #ifdef UTF8_OUTPUT_ENABLE
1021 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1022 ms_ucs_map_f = TRUE;
1026 if (strcmp(long_option[i].name, "prefix=") == 0){
1027 if (*p == '=' && ' ' < p[1] && p[1] < 128){
1028 for (i = 2; ' ' < p[i] && p[i] < 128; i++){
1029 prefix_table[p[i]] = p[1];
1036 case 'b': /* buffered mode */
1039 case 'u': /* non bufferd mode */
1042 case 't': /* transparent mode */
1045 case 'j': /* JIS output */
1047 output_conv = j_oconv;
1049 case 'e': /* AT&T EUC output */
1050 output_conv = e_oconv;
1052 case 's': /* SJIS output */
1053 output_conv = s_oconv;
1055 case 'l': /* ISO8859 Latin-1 support, no conversion */
1056 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
1057 input_f = LATIN1_INPUT;
1059 case 'i': /* Kanji IN ESC-$-@/B */
1060 if (*cp=='@'||*cp=='B')
1061 kanji_intro = *cp++;
1063 case 'o': /* ASCII IN ESC-(-J/B */
1064 if (*cp=='J'||*cp=='B'||*cp=='H')
1065 ascii_intro = *cp++;
1072 if ('9'>= *cp && *cp>='0')
1073 hira_f |= (*cp++ -'0');
1080 #if defined(MSDOS) || defined(__OS2__)
1095 #ifdef UTF8_OUTPUT_ENABLE
1096 case 'w': /* UTF-8 output */
1097 if ('1'== cp[0] && '6'==cp[1]) {
1098 output_conv = w_oconv16; cp+=2;
1100 unicode_bom_f=2; cp++;
1103 unicode_bom_f=1; cp++;
1105 } else if (cp[0] == 'B') {
1106 unicode_bom_f=2; cp++;
1108 unicode_bom_f=1; cp++;
1111 } else if (cp[0] == '8') {
1112 output_conv = w_oconv; cp++;
1115 unicode_bom_f=1; cp++;
1118 output_conv = w_oconv;
1121 #ifdef UTF8_INPUT_ENABLE
1122 case 'W': /* UTF-8 input */
1123 if ('1'== cp[0] && '6'==cp[1]) {
1124 input_f = UTF16LE_INPUT;
1127 } else if (cp[0] == 'B') {
1129 input_f = UTF16BE_INPUT;
1131 } else if (cp[0] == '8') {
1133 input_f = UTF8_INPUT;
1135 input_f = UTF8_INPUT;
1138 /* Input code assumption */
1139 case 'J': /* JIS input */
1140 case 'E': /* AT&T EUC input */
1141 input_f = JIS_INPUT;
1143 case 'S': /* MS Kanji input */
1144 input_f = SJIS_INPUT;
1145 if (x0201_f==NO_X0201) x0201_f=TRUE;
1147 case 'Z': /* Convert X0208 alphabet to asii */
1148 /* bit:0 Convert X0208
1149 bit:1 Convert Kankaku to one space
1150 bit:2 Convert Kankaku to two spaces
1151 bit:3 Convert HTML Entity
1153 if ('9'>= *cp && *cp>='0')
1154 alpha_f |= 1<<(*cp++ -'0');
1158 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
1159 x0201_f = FALSE; /* No X0201->X0208 conversion */
1161 ESC-(-I in JIS, EUC, MS Kanji
1162 SI/SO in JIS, EUC, MS Kanji
1163 SSO in EUC, JIS, not in MS Kanji
1164 MS Kanji (0xa0-0xdf)
1166 ESC-(-I in JIS (0x20-0x5f)
1167 SSO in EUC (0xa0-0xdf)
1168 0xa0-0xd in MS Kanji (0xa0-0xdf)
1171 case 'X': /* Assume X0201 kana */
1172 /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1175 case 'F': /* prserve new lines */
1176 fold_preserve_f = TRUE;
1177 case 'f': /* folding -f60 or -f */
1180 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1182 fold_len += *cp++ - '0';
1184 if (!(0<fold_len && fold_len<BUFSIZ))
1185 fold_len = DEFAULT_FOLD;
1189 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1191 fold_margin += *cp++ - '0';
1195 case 'm': /* MIME support */
1196 if (*cp=='B'||*cp=='Q') {
1197 mime_decode_mode = *cp++;
1198 mimebuf_f = FIXED_MIME;
1199 } else if (*cp=='N') {
1200 mime_f = TRUE; cp++;
1201 } else if (*cp=='S') {
1202 mime_f = STRICT_MIME; cp++;
1203 } else if (*cp=='0') {
1204 mime_f = FALSE; cp++;
1207 case 'M': /* MIME output */
1210 mimeout_f = FIXED_MIME; cp++;
1211 } else if (*cp=='Q') {
1213 mimeout_f = FIXED_MIME; cp++;
1218 case 'B': /* Broken JIS support */
1220 bit:1 allow any x on ESC-(-x or ESC-$-x
1221 bit:2 reset to ascii on NL
1223 if ('9'>= *cp && *cp>='0')
1224 broken_f |= 1<<(*cp++ -'0');
1229 case 'O':/* for Output file */
1233 case 'c':/* add cr code */
1236 case 'd':/* delete cr code */
1239 case 'I': /* ISO-2022-JP output */
1242 case 'L': /* line mode */
1243 if (*cp=='u') { /* unix */
1244 crmode_f = NL; cp++;
1245 } else if (*cp=='m') { /* mac */
1246 crmode_f = CR; cp++;
1247 } else if (*cp=='w') { /* windows */
1248 crmode_f = CRLF; cp++;
1249 } else if (*cp=='0') { /* no conversion */
1259 /* module muliple options in a string are allowed for Perl moudle */
1260 while(*cp && *cp!='-') cp++;
1264 /* bogus option but ignored */
1270 #ifdef ANSI_C_PROTOTYPE
1271 struct input_code * find_inputcode_byfunc(int (*iconv_func)(int c2,int c1,int c0))
1273 struct input_code * find_inputcode_byfunc(iconv_func)
1274 int (*iconv_func)();
1278 struct input_code *p = input_code_list;
1280 if (iconv_func == p->iconv_func){
1289 #ifdef ANSI_C_PROTOTYPE
1290 void set_iconv(int f, int (*iconv_func)(int c2,int c1,int c0))
1292 void set_iconv(f, iconv_func)
1294 int (*iconv_func)();
1298 static int (*iconv_for_check)() = 0;
1300 #ifdef INPUT_CODE_FIX
1308 #ifdef INPUT_CODE_FIX
1309 && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1315 if (estab_f && iconv_for_check != iconv){
1316 struct input_code *p = find_inputcode_byfunc(iconv);
1318 set_input_codename(p->name);
1319 debug(input_codename);
1321 iconv_for_check = iconv;
1326 #define SCORE_L2 (1) /*
\e$BBh
\e(B2
\e$B?e=`4A;z
\e(B */
1327 #define SCORE_KANA (SCORE_L2 << 1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
1328 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
1329 #ifdef SHIFTJIS_CP932
1330 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932
\e$B$K$h$kFI$_49$(
\e(B */
1331 #define SCORE_NO_EXIST (SCORE_CP932 << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1333 #define SCORE_NO_EXIST (SCORE_DEPEND << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1335 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME
\e$B$K$h$k;XDj
\e(B */
1336 #define SCORE_ERROR (SCORE_iMIME << 1) /*
\e$B%(%i!<
\e(B */
1338 #define SCORE_INIT (SCORE_iMIME)
1340 int score_table_A0[] = {
1343 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1344 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1347 int score_table_F0[] = {
1348 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1349 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1350 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1351 SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1354 void set_code_score(ptr, score)
1355 struct input_code *ptr;
1359 ptr->score |= score;
1363 void clr_code_score(ptr, score)
1364 struct input_code *ptr;
1368 ptr->score &= ~score;
1372 void code_score(ptr)
1373 struct input_code *ptr;
1375 int c2 = ptr->buf[0];
1376 int c1 = ptr->buf[1];
1378 set_code_score(ptr, SCORE_ERROR);
1379 }else if (c2 == SSO){
1380 set_code_score(ptr, SCORE_KANA);
1381 #ifdef UTF8_OUTPUT_ENABLE
1382 }else if (!e2w_conv(c2, c1)){
1383 set_code_score(ptr, SCORE_NO_EXIST);
1385 }else if ((c2 & 0x70) == 0x20){
1386 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1387 }else if ((c2 & 0x70) == 0x70){
1388 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1389 }else if ((c2 & 0x70) >= 0x50){
1390 set_code_score(ptr, SCORE_L2);
1394 void status_disable(ptr)
1395 struct input_code *ptr;
1400 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1403 void status_push_ch(ptr, c)
1404 struct input_code *ptr;
1407 ptr->buf[ptr->index++] = c;
1410 void status_clear(ptr)
1411 struct input_code *ptr;
1417 void status_reset(ptr)
1418 struct input_code *ptr;
1421 ptr->score = SCORE_INIT;
1424 void status_reinit(ptr)
1425 struct input_code *ptr;
1428 ptr->_file_stat = 0;
1431 void status_check(ptr, c)
1432 struct input_code *ptr;
1435 if (c <= DEL && estab_f){
1440 void s_status(ptr, c)
1441 struct input_code *ptr;
1446 status_check(ptr, c);
1451 #ifdef NUMCHAR_OPTION
1452 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1455 }else if (0xa1 <= c && c <= 0xdf){
1456 status_push_ch(ptr, SSO);
1457 status_push_ch(ptr, c);
1460 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
1462 status_push_ch(ptr, c);
1463 #ifdef SHIFTJIS_CP932
1465 && CP932_TABLE_BEGIN <= c && c <= CP932_TABLE_END){
1467 status_push_ch(ptr, c);
1468 #endif /* SHIFTJIS_CP932 */
1470 status_disable(ptr);
1474 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1475 status_push_ch(ptr, c);
1476 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
1480 status_disable(ptr);
1483 #ifdef SHIFTJIS_CP932
1485 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1486 status_push_ch(ptr, c);
1487 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
1488 set_code_score(ptr, SCORE_CP932);
1493 status_disable(ptr);
1495 #endif /* SHIFTJIS_CP932 */
1499 void e_status(ptr, c)
1500 struct input_code *ptr;
1505 status_check(ptr, c);
1510 #ifdef NUMCHAR_OPTION
1511 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1514 }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
1516 status_push_ch(ptr, c);
1518 status_disable(ptr);
1522 if (0xa1 <= c && c <= 0xfe){
1523 status_push_ch(ptr, c);
1527 status_disable(ptr);
1533 #ifdef UTF8_INPUT_ENABLE
1534 void w16_status(ptr, c)
1535 struct input_code *ptr;
1542 if (ptr->_file_stat == 0){
1543 if (c == 0xfe || c == 0xff){
1545 status_push_ch(ptr, c);
1546 ptr->_file_stat = 1;
1548 status_disable(ptr);
1549 ptr->_file_stat = -1;
1551 }else if (ptr->_file_stat > 0){
1553 status_push_ch(ptr, c);
1554 }else if (ptr->_file_stat < 0){
1555 status_disable(ptr);
1561 status_disable(ptr);
1562 ptr->_file_stat = -1;
1564 status_push_ch(ptr, c);
1571 if (ptr->stat != c && (c == 0xfe || c == 0xff)){
1572 status_push_ch(ptr, c);
1575 status_disable(ptr);
1576 ptr->_file_stat = -1;
1582 void w_status(ptr, c)
1583 struct input_code *ptr;
1588 status_check(ptr, c);
1593 #ifdef NUMCHAR_OPTION
1594 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1597 }else if (0xc0 <= c && c <= 0xdf){
1599 status_push_ch(ptr, c);
1600 }else if (0xe0 <= c && c <= 0xef){
1602 status_push_ch(ptr, c);
1604 status_disable(ptr);
1609 if (0x80 <= c && c <= 0xbf){
1610 status_push_ch(ptr, c);
1611 if (ptr->index > ptr->stat){
1612 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
1613 && ptr->buf[2] == 0xbf);
1614 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
1615 &ptr->buf[0], &ptr->buf[1]);
1622 status_disable(ptr);
1633 int action_flag = 1;
1634 struct input_code *result = 0;
1635 struct input_code *p = input_code_list;
1637 (p->status_func)(p, c);
1640 }else if(p->stat == 0){
1651 if (result && !estab_f){
1652 set_iconv(TRUE, result->iconv_func);
1653 }else if (c <= DEL){
1654 struct input_code *ptr = input_code_list;
1663 #define STD_GC_BUFSIZE (256)
1664 int std_gc_buf[STD_GC_BUFSIZE];
1672 return std_gc_buf[--std_gc_ndx];
1682 if (std_gc_ndx == STD_GC_BUFSIZE){
1685 std_gc_buf[std_gc_ndx++] = c;
1703 while ((c = (*i_getc)(f)) != EOF)
1712 oconv = output_conv;
1715 /* replace continucation module, from output side */
1717 /* output redicrection */
1719 if (noout_f || guess_f){
1726 if (mimeout_f == TRUE) {
1727 o_base64conv = oconv; oconv = base64_conv;
1729 /* base64_count = 0; */
1733 o_crconv = oconv; oconv = cr_conv;
1736 o_rot_conv = oconv; oconv = rot_conv;
1739 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
1742 o_hira_conv = oconv; oconv = hira_conv;
1745 o_fconv = oconv; oconv = fold_conv;
1748 if (alpha_f || x0201_f) {
1749 o_zconv = oconv; oconv = z_conv;
1753 i_ungetc = std_ungetc;
1754 /* input redicrection */
1757 i_cgetc = i_getc; i_getc = cap_getc;
1758 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
1761 i_ugetc = i_getc; i_getc = url_getc;
1762 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
1765 #ifdef NUMCHAR_OPTION
1767 i_ngetc = i_getc; i_getc = numchar_getc;
1768 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
1771 if (mime_f && mimebuf_f==FIXED_MIME) {
1772 i_mgetc = i_getc; i_getc = mime_getc;
1773 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
1776 i_bgetc = i_getc; i_getc = broken_getc;
1777 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
1779 if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
1780 set_iconv(-TRUE, e_iconv);
1781 } else if (input_f == SJIS_INPUT) {
1782 set_iconv(-TRUE, s_iconv);
1783 #ifdef UTF8_INPUT_ENABLE
1784 } else if (input_f == UTF8_INPUT) {
1785 set_iconv(-TRUE, w_iconv);
1786 } else if (input_f == UTF16LE_INPUT) {
1787 set_iconv(-TRUE, w_iconv16);
1790 set_iconv(FALSE, e_iconv);
1794 struct input_code *p = input_code_list;
1802 Conversion main loop. Code detection only.
1812 module_connection();
1817 output_mode = ASCII;
1820 #define NEXT continue /* no output, get next */
1821 #define SEND ; /* output c1 and c2, get next */
1822 #define LAST break /* end of loop, go closing */
1824 while ((c1 = (*i_getc)(f)) != EOF) {
1829 /* in case of 8th bit is on */
1831 /* in case of not established yet */
1832 /* It is still ambiguious */
1833 if (h_conv(f, c2, c1)==EOF)
1839 /* in case of already established */
1841 /* ignore bogus code */
1847 /* second byte, 7 bit code */
1848 /* it might be kanji shitfted */
1849 if ((c1 == DEL) || (c1 <= SPACE)) {
1850 /* ignore bogus first code */
1858 #ifdef UTF8_INPUT_ENABLE
1867 #ifdef NUMCHAR_OPTION
1868 } else if ((c1 & CLASS_MASK) == CLASS_UTF16){
1871 } else if (c1 > DEL) {
1873 if (!estab_f && !iso8859_f) {
1874 /* not established yet */
1877 } else { /* estab_f==TRUE */
1882 } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
1883 /* SJIS X0201 Case... */
1884 if(iso2022jp_f && x0201_f==NO_X0201) {
1885 (*oconv)(GETA1, GETA2);
1892 } else if (c1==SSO && iconv != s_iconv) {
1893 /* EUC X0201 Case */
1894 c1 = (*i_getc)(f); /* skip SSO */
1896 if (SSP<=c1 && c1<0xe0) {
1897 if(iso2022jp_f && x0201_f==NO_X0201) {
1898 (*oconv)(GETA1, GETA2);
1905 } else { /* bogus code, skip SSO and one byte */
1909 /* already established */
1914 } else if ((c1 > SPACE) && (c1 != DEL)) {
1915 /* in case of Roman characters */
1917 /* output 1 shifted byte */
1921 } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
1922 /* output 1 shifted byte */
1923 if(iso2022jp_f && x0201_f==NO_X0201) {
1924 (*oconv)(GETA1, GETA2);
1931 /* look like bogus code */
1934 } else if (input_mode == X0208) {
1935 /* in case of Kanji shifted */
1938 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
1939 /* Check MIME code */
1940 if ((c1 = (*i_getc)(f)) == EOF) {
1943 } else if (c1 == '?') {
1944 /* =? is mime conversion start sequence */
1945 if(mime_f == STRICT_MIME) {
1946 /* check in real detail */
1947 if (mime_begin_strict(f) == EOF)
1951 } else if (mime_begin(f) == EOF)
1961 /* normal ASCII code */
1964 } else if (c1 == SI) {
1967 } else if (c1 == SO) {
1970 } else if (c1 == ESC ) {
1971 if ((c1 = (*i_getc)(f)) == EOF) {
1972 /* (*oconv)(0, ESC); don't send bogus code */
1974 } else if (c1 == '$') {
1975 if ((c1 = (*i_getc)(f)) == EOF) {
1977 (*oconv)(0, ESC); don't send bogus code
1978 (*oconv)(0, '$'); */
1980 } else if (c1 == '@'|| c1 == 'B') {
1981 /* This is kanji introduction */
1984 set_input_codename("ISO-2022-JP");
1985 debug(input_codename);
1987 } else if (c1 == '(') {
1988 if ((c1 = (*i_getc)(f)) == EOF) {
1989 /* don't send bogus code
1995 } else if (c1 == '@'|| c1 == 'B') {
1996 /* This is kanji introduction */
2001 /* could be some special code */
2008 } else if (broken_f&0x2) {
2009 /* accept any ESC-(-x as broken code ... */
2019 } else if (c1 == '(') {
2020 if ((c1 = (*i_getc)(f)) == EOF) {
2021 /* don't send bogus code
2023 (*oconv)(0, '('); */
2027 /* This is X0201 kana introduction */
2028 input_mode = X0201; shift_mode = X0201;
2030 } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2031 /* This is X0208 kanji introduction */
2032 input_mode = ASCII; shift_mode = FALSE;
2034 } else if (broken_f&0x2) {
2035 input_mode = ASCII; shift_mode = FALSE;
2040 /* maintain various input_mode here */
2044 } else if ( c1 == 'N' || c1 == 'n' ){
2046 c3 = (*i_getc)(f); /* skip SS2 */
2047 if ( (SPACE<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2062 } else if ((c1 == NL || c1 == CR) && broken_f&4) {
2063 input_mode = ASCII; set_iconv(FALSE, 0);
2065 } else if (c1 == NL && mime_f && !mime_decode_mode ) {
2066 if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2074 } else if (c1 == CR && mime_f && !mime_decode_mode ) {
2075 if ((c1=(*i_getc)(f))!=EOF) {
2079 } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2095 if (input_mode == X0208)
2096 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2097 else if (input_mode)
2098 (*oconv)(input_mode, c1); /* other special case */
2099 else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */
2100 int c0 = (*i_getc)(f);
2103 (*iconv)(c2, c1, c0);
2109 /* goto next_word */
2113 (*iconv)(EOF, 0, 0);
2126 /** it must NOT be in the kanji shifte sequence */
2127 /** it must NOT be written in JIS7 */
2128 /** and it must be after 2 byte 8bit code */
2135 while ((c1 = (*i_getc)(f)) != EOF) {
2141 if (push_hold_buf(c1) == EOF || estab_f){
2147 struct input_code *p = input_code_list;
2148 struct input_code *result = p;
2153 if (p->score < result->score){
2158 set_iconv(FALSE, result->iconv_func);
2163 ** 1) EOF is detected, or
2164 ** 2) Code is established, or
2165 ** 3) Buffer is FULL (but last word is pushed)
2167 ** in 1) and 3) cases, we continue to use
2168 ** Kanji codes by oconv and leave estab_f unchanged.
2173 while (wc < hold_count){
2174 c2 = hold_buf[wc++];
2176 #ifdef NUMCHAR_OPTION
2177 || (c2 & CLASS_MASK) == CLASS_UTF16
2182 }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
2183 (*iconv)(X0201, c2, 0);
2186 if (wc < hold_count){
2187 c1 = hold_buf[wc++];
2196 if ((*iconv)(c2, c1, 0) < 0){
2198 if (wc < hold_count){
2199 c0 = hold_buf[wc++];
2208 (*iconv)(c2, c1, c0);
2221 if (hold_count >= HOLD_SIZE*2)
2223 hold_buf[hold_count++] = c2;
2224 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
2227 int s2e_conv(c2, c1, p2, p1)
2231 #ifdef SHIFTJIS_CP932
2232 if (cp932_f && CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){
2233 extern unsigned short shiftjis_cp932[3][189];
2234 c1 = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
2235 if (c1 == 0) return 1;
2239 #endif /* SHIFTJIS_CP932 */
2240 c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
2242 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
2259 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2262 int ret = s2e_conv(c2, c1, &c2, &c1);
2263 if (ret) return ret;
2276 } else if (c2 == SSO){
2279 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2289 #ifdef UTF8_INPUT_ENABLE
2291 w2e_conv(c2, c1, c0, p2, p1)
2295 extern unsigned short * utf8_to_euc_2bytes[];
2296 extern unsigned short ** utf8_to_euc_3bytes[];
2299 if (0xc0 <= c2 && c2 <= 0xef) {
2300 unsigned short **pp;
2303 if (c0 == 0) return -1;
2304 pp = utf8_to_euc_3bytes[c2 - 0x80];
2305 ret = w_iconv_common(c1, c0, pp, sizeof_utf8_to_euc_C2, p2, p1);
2307 ret = w_iconv_common(c2, c1, utf8_to_euc_2bytes, sizeof_utf8_to_euc_2bytes, p2, p1);
2309 #ifdef NUMCHAR_OPTION
2312 if (p1) *p1 = CLASS_UTF16 | ww16_conv(c2, c1, c0);
2317 } else if (c2 == X0201) {
2330 int ret = w2e_conv(c2, c1, c0, &c2, &c1);
2338 w16w_conv(val, p2, p1, p0)
2346 }else if (val < 0x800){
2347 *p2 = 0xc0 | (val >> 6);
2348 *p1 = 0x80 | (val & 0x3f);
2351 *p2 = 0xe0 | (val >> 12);
2352 *p1 = 0x80 | ((val >> 6) & 0x3f);
2353 *p0 = 0x80 | (val & 0x3f);
2358 ww16_conv(c2, c1, c0)
2363 val = (c2 & 0x0f) << 12;
2364 val |= (c1 & 0x3f) << 6;
2366 }else if (c2 >= 0xc0){
2367 val = (c2 & 0x1f) << 6;
2368 val |= (c1 & 0x3f) << 6;
2376 w16e_conv(val, p2, p1)
2380 extern unsigned short * utf8_to_euc_2bytes[];
2381 extern unsigned short ** utf8_to_euc_3bytes[];
2383 unsigned short **pp;
2387 w16w_conv(val, &c2, &c1, &c0);
2390 pp = utf8_to_euc_3bytes[c2 - 0x80];
2391 psize = sizeof_utf8_to_euc_C2;
2392 ret = w_iconv_common(c1, c0, pp, psize, p2, p1);
2394 pp = utf8_to_euc_2bytes;
2395 psize = sizeof_utf8_to_euc_2bytes;
2396 ret = w_iconv_common(c2, c1, pp, psize, p2, p1);
2398 #ifdef NUMCHAR_OPTION
2401 *p1 = CLASS_UTF16 | val;
2410 w_iconv16(c2, c1, c0)
2415 if (c2==0376 && c1==0377){
2416 utf16_mode = UTF16LE_INPUT;
2418 } else if (c2==0377 && c1==0376){
2419 utf16_mode = UTF16BE_INPUT;
2422 if (c2 != EOF && utf16_mode == UTF16BE_INPUT) {
2424 tmp=c1; c1=c2; c2=tmp;
2426 if ((c2==0 && c1 < 0x80) || c2==EOF) {
2430 ret = w16e_conv(((c2<<8)&0xff00) + c1, &c2, &c1);
2431 if (ret) return ret;
2437 w_iconv_common(c1, c0, pp, psize, p2, p1)
2439 unsigned short **pp;
2447 if (pp == 0) return 1;
2450 if (c1 < 0 || psize <= c1) return 1;
2452 if (p == 0) return 1;
2455 if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1;
2457 if (val == 0) return 1;
2460 if (c2 == SO) c2 = X0201;
2469 #ifdef UTF8_OUTPUT_ENABLE
2474 extern unsigned short euc_to_utf8_1byte[];
2475 extern unsigned short * euc_to_utf8_2bytes[];
2476 extern unsigned short * euc_to_utf8_2bytes_ms[];
2480 p = euc_to_utf8_1byte;
2483 c2 = (c2&0x7f) - 0x21;
2484 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2485 p = ms_ucs_map_f ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
2490 c1 = (c1 & 0x7f) - 0x21;
2491 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
2502 #ifdef NUMCHAR_OPTION
2503 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2504 w16w_conv(c1, &c2, &c1, &c0);
2508 if (c0) (*o_putc)(c0);
2517 if (unicode_bom_f==2) {
2525 output_mode = ASCII;
2527 } else if (c2 == ISO8859_1) {
2528 output_mode = ISO8859_1;
2529 (*o_putc)(c1 | 0x080);
2532 w16w_conv((unsigned short)e2w_conv(c2, c1), &c2, &c1, &c0);
2536 if (c0) (*o_putc)(c0);
2551 if (unicode_bom_f==2) {
2553 (*o_putc)((unsigned char)'\377');
2557 (*o_putc)((unsigned char)'\377');
2562 if (c2 == ISO8859_1) {
2565 #ifdef NUMCHAR_OPTION
2566 } else if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16) {
2567 c2 = (c1 >> 8) & 0xff;
2571 unsigned short val = (unsigned short)e2w_conv(c2, c1);
2572 c2 = (val >> 8) & 0xff;
2591 #ifdef NUMCHAR_OPTION
2592 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2593 w16e_conv(c1, &c2, &c1);
2599 } else if (c2 == 0) {
2600 output_mode = ASCII;
2602 } else if (c2 == X0201) {
2603 output_mode = JAPANESE_EUC;
2604 (*o_putc)(SSO); (*o_putc)(c1|0x80);
2605 } else if (c2 == ISO8859_1) {
2606 output_mode = ISO8859_1;
2607 (*o_putc)(c1 | 0x080);
2609 if ((c1<0x21 || 0x7e<c1) ||
2610 (c2<0x21 || 0x7e<c2)) {
2611 set_iconv(FALSE, 0);
2612 return; /* too late to rescue this char */
2614 output_mode = JAPANESE_EUC;
2615 (*o_putc)(c2 | 0x080);
2616 (*o_putc)(c1 | 0x080);
2621 e2s_conv(c2, c1, p2, p1)
2622 int c2, c1, *p2, *p1;
2624 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
2625 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
2633 #ifdef NUMCHAR_OPTION
2634 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2635 w16e_conv(c1, &c2, &c1);
2641 } else if (c2 == 0) {
2642 output_mode = ASCII;
2644 } else if (c2 == X0201) {
2645 output_mode = SHIFT_JIS;
2647 } else if (c2 == ISO8859_1) {
2648 output_mode = ISO8859_1;
2649 (*o_putc)(c1 | 0x080);
2651 if ((c1<0x20 || 0x7e<c1) ||
2652 (c2<0x20 || 0x7e<c2)) {
2653 set_iconv(FALSE, 0);
2654 return; /* too late to rescue this char */
2656 output_mode = SHIFT_JIS;
2657 e2s_conv(c2, c1, &c2, &c1);
2659 #ifdef SHIFTJIS_CP932
2661 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2662 extern unsigned short cp932inv[2][189];
2663 int c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
2669 #endif /* SHIFTJIS_CP932 */
2672 if (prefix_table[(unsigned char)c1]){
2673 (*o_putc)(prefix_table[(unsigned char)c1]);
2684 #ifdef NUMCHAR_OPTION
2685 if ((c1 & CLASS_MASK) == CLASS_UTF16){
2686 w16e_conv(c1, &c2, &c1);
2690 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2693 (*o_putc)(ascii_intro);
2694 output_mode = ASCII;
2697 } else if (c2==X0201) {
2698 if (output_mode!=X0201) {
2699 output_mode = X0201;
2705 } else if (c2==ISO8859_1) {
2706 /* iso8859 introduction, or 8th bit on */
2707 /* Can we convert in 7bit form using ESC-'-'-A ?
2709 output_mode = ISO8859_1;
2711 } else if (c2 == 0) {
2712 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2715 (*o_putc)(ascii_intro);
2716 output_mode = ASCII;
2720 if (output_mode != X0208) {
2721 output_mode = X0208;
2724 (*o_putc)(kanji_intro);
2726 if (c1<0x20 || 0x7e<c1)
2728 if (c2<0x20 || 0x7e<c2)
2740 if (base64_count>50 && !mimeout_mode && c2==0 && c1==SPACE) {
2743 } else if (base64_count>66 && mimeout_mode) {
2744 (*o_base64conv)(EOF,0);
2745 (*o_base64conv)(NL,0);
2746 (*o_base64conv)(SPACE,0);
2748 (*o_base64conv)(c2,c1);
2752 static int broken_buf[3];
2753 static int broken_counter = 0;
2754 static int broken_last = 0;
2761 if (broken_counter>0) {
2762 return broken_buf[--broken_counter];
2765 if (c=='$' && broken_last != ESC
2766 && (input_mode==ASCII || input_mode==X0201)) {
2769 if (c1=='@'|| c1=='B') {
2770 broken_buf[0]=c1; broken_buf[1]=c;
2777 } else if (c=='(' && broken_last != ESC
2778 && (input_mode==X0208 || input_mode==X0201)) { /* ) */
2781 if (c1=='J'|| c1=='B') {
2782 broken_buf[0]=c1; broken_buf[1]=c;
2800 if (broken_counter<2)
2801 broken_buf[broken_counter++]=c;
2805 static int prev_cr = 0;
2813 if (! (c2==0&&c1==NL) ) {
2819 } else if (c1=='\r') {
2821 } else if (c1=='\n') {
2822 if (crmode_f==CRLF) {
2823 (*o_crconv)(0,'\r');
2824 } else if (crmode_f==CR) {
2825 (*o_crconv)(0,'\r');
2829 } else if (c1!='\032' || crmode_f!=NL){
2835 Return value of fold_conv()
2837 \n add newline and output char
2838 \r add newline and output nothing
2841 1 (or else) normal output
2843 fold state in prev (previous character)
2845 >0x80 Japanese (X0208/X0201)
2850 This fold algorthm does not preserve heading space in a line.
2851 This is the main difference from fmt.
2854 #define char_size(c2,c1) (c2?2:1)
2863 if (c1== '\r' && !fold_preserve_f) {
2864 fold_state=0; /* ignore cr */
2865 }else if (c1== '\n'&&f_prev=='\r' && fold_preserve_f) {
2867 fold_state=0; /* ignore cr */
2868 } else if (c1== BS) {
2869 if (f_line>0) f_line--;
2871 } else if (c2==EOF && f_line != 0) { /* close open last line */
2873 } else if ((c1=='\n' && !fold_preserve_f)
2874 || ((c1=='\r'||(c1=='\n'&&f_prev!='\r'))
2875 && fold_preserve_f)) {
2877 if (fold_preserve_f) {
2881 } else if ((f_prev == c1 && !fold_preserve_f)
2882 || (f_prev == '\n' && fold_preserve_f)
2883 ) { /* duplicate newline */
2886 fold_state = '\n'; /* output two newline */
2892 if (f_prev&0x80) { /* Japanese? */
2894 fold_state = 0; /* ignore given single newline */
2895 } else if (f_prev==' ') {
2899 if (++f_line<=fold_len)
2903 fold_state = '\r'; /* fold and output nothing */
2907 } else if (c1=='\f') {
2912 fold_state = '\n'; /* output newline and clear */
2913 } else if ( (c2==0 && c1==' ')||
2914 (c2==0 && c1=='\t')||
2915 (c2=='!'&& c1=='!')) {
2916 /* X0208 kankaku or ascii space */
2917 if (f_prev == ' ') {
2918 fold_state = 0; /* remove duplicate spaces */
2921 if (++f_line<=fold_len)
2922 fold_state = ' '; /* output ASCII space only */
2924 f_prev = ' '; f_line = 0;
2925 fold_state = '\r'; /* fold and output nothing */
2929 prev0 = f_prev; /* we still need this one... , but almost done */
2931 if (c2 || c2==X0201)
2932 f_prev |= 0x80; /* this is Japanese */
2933 f_line += char_size(c2,c1);
2934 if (f_line<=fold_len) { /* normal case */
2937 if (f_line>=fold_len+fold_margin) { /* too many kinsou suspension */
2938 f_line = char_size(c2,c1);
2939 fold_state = '\n'; /* We can't wait, do fold now */
2940 } else if (c2==X0201) {
2941 /* simple kinsoku rules return 1 means no folding */
2942 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
2943 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
2944 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
2945 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
2946 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
2947 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
2948 else if (SPACE<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
2950 fold_state = '\n';/* add one new f_line before this character */
2953 fold_state = '\n';/* add one new f_line before this character */
2956 /* kinsoku point in ASCII */
2957 if ( c1==')'|| /* { [ ( */
2968 /* just after special */
2969 } else if (!is_alnum(prev0)) {
2970 f_line = char_size(c2,c1);
2972 } else if ((prev0==' ') || /* ignored new f_line */
2973 (prev0=='\n')|| /* ignored new f_line */
2974 (prev0&0x80)) { /* X0208 - ASCII */
2975 f_line = char_size(c2,c1);
2976 fold_state = '\n';/* add one new f_line before this character */
2978 fold_state = 1; /* default no fold in ASCII */
2982 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
2983 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
2984 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
2985 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
2986 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
2987 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
2988 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
2989 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
2990 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
2991 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
2992 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
2993 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
2994 /* default no fold in kinsoku */
2997 f_line = char_size(c2,c1);
2998 /* add one new f_line before this character */
3001 f_line = char_size(c2,c1);
3003 /* add one new f_line before this character */
3008 /* terminator process */
3009 switch(fold_state) {
3028 int z_prev2=0,z_prev1=0;
3035 /* if (c2) c1 &= 0x7f; assertion */
3037 if (x0201_f && z_prev2==X0201) { /* X0201 */
3038 if (c1==(0xde&0x7f)) { /*
\e$BByE@
\e(B */
3040 (*o_zconv)(dv[(z_prev1-SPACE)*2],dv[(z_prev1-SPACE)*2+1]);
3042 } else if (c1==(0xdf&0x7f)&&ev[(z_prev1-SPACE)*2]) { /*
\e$BH>ByE@
\e(B */
3044 (*o_zconv)(ev[(z_prev1-SPACE)*2],ev[(z_prev1-SPACE)*2+1]);
3048 (*o_zconv)(cv[(z_prev1-SPACE)*2],cv[(z_prev1-SPACE)*2+1]);
3057 if (x0201_f && c2==X0201) {
3058 if (dv[(c1-SPACE)*2]||ev[(c1-SPACE)*2]) {
3059 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
3060 z_prev1 = c1; z_prev2 = c2;
3063 (*o_zconv)(cv[(c1-SPACE)*2],cv[(c1-SPACE)*2+1]);
3068 /* JISX0208 Alphabet */
3069 if (alpha_f && c2 == 0x23 ) {
3071 } else if (alpha_f && c2 == 0x21 ) {
3072 /* JISX0208 Kigou */
3077 } else if (alpha_f&0x4) {
3082 } else if (0x20<c1 && c1<0x7f && fv[c1-0x20]) {
3088 case '>': entity = ">"; break;
3089 case '<': entity = "<"; break;
3090 case '\"': entity = """; break;
3091 case '&': entity = "&"; break;
3094 while (*entity) (*o_zconv)(0, *entity++);
3104 #define rot13(c) ( \
3106 (c <= 'M') ? (c + 13): \
3107 (c <= 'Z') ? (c - 13): \
3109 (c <= 'm') ? (c + 13): \
3110 (c <= 'z') ? (c - 13): \
3114 #define rot47(c) ( \
3116 ( c <= 'O' ) ? (c + 47) : \
3117 ( c <= '~' ) ? (c - 47) : \
3125 if (c2==0 || c2==X0201 || c2==ISO8859_1) {
3131 (*o_rot_conv)(c2,c1);
3138 if ((hira_f & 1) && c2==0x25 && 0x20<c1 && c1<0x74) {
3140 } else if ((hira_f & 2) && c2==0x24 && 0x20<c1 && c1<0x74) {
3143 (*o_hira_conv)(c2,c1);
3148 iso2022jp_check_conv(c2,c1)
3151 static int range[RANGE_NUM_MAX][2] = {
3174 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
3178 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
3183 for (i = 0; i < RANGE_NUM_MAX; i++) {
3184 start = range[i][0];
3187 if (c >= start && c <= end) {
3192 (*o_iso2022jp_check_conv)(c2,c1);
3196 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
3198 unsigned char *mime_pattern[] = {
3199 (unsigned char *)"\075?EUC-JP?B?",
3200 (unsigned char *)"\075?SHIFT_JIS?B?",
3201 (unsigned char *)"\075?ISO-8859-1?Q?",
3202 (unsigned char *)"\075?ISO-8859-1?B?",
3203 (unsigned char *)"\075?ISO-2022-JP?B?",
3204 (unsigned char *)"\075?ISO-2022-JP?Q?",
3205 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3206 (unsigned char *)"\075?UTF-8?B?",
3207 (unsigned char *)"\075?UTF-8?Q?",
3209 (unsigned char *)"\075?US-ASCII?Q?",
3214 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
3215 int (*mime_priority_func[])PROTO((int c2, int c1, int c0)) = {
3216 e_iconv, s_iconv, 0, 0, 0, 0,
3217 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3223 int mime_encode[] = {
3224 JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201,
3225 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3232 int mime_encode_method[] = {
3233 'B', 'B','Q', 'B', 'B', 'Q',
3234 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3242 #define MAXRECOVER 20
3244 /* I don't trust portablity of toupper */
3245 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
3246 #define nkf_isdigit(c) ('0'<=c && c<='9')
3247 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
3252 if (i_getc!=mime_getc) {
3253 i_mgetc = i_getc; i_getc = mime_getc;
3254 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
3255 if(mime_f==STRICT_MIME) {
3256 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
3257 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
3263 unswitch_mime_getc()
3265 if(mime_f==STRICT_MIME) {
3266 i_mgetc = i_mgetc_buf;
3267 i_mungetc = i_mungetc_buf;
3270 i_ungetc = i_mungetc;
3274 mime_begin_strict(f)
3279 unsigned char *p,*q;
3280 int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
3282 mime_decode_mode = FALSE;
3283 /* =? has been checked */
3285 p = mime_pattern[j];
3288 for(i=2;p[i]>' ';i++) { /* start at =? */
3289 if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) {
3290 /* pattern fails, try next one */
3292 while ((p = mime_pattern[++j])) {
3293 for(k=2;k<i;k++) /* assume length(p) > i */
3294 if (p[k]!=q[k]) break;
3295 if (k==i && nkf_toupper(c1)==p[k]) break;
3297 if (p) continue; /* found next one, continue */
3298 /* all fails, output from recovery buffer */
3306 mime_decode_mode = p[i-2];
3308 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
3310 if (mime_decode_mode=='B') {
3311 mimebuf_f = unbuf_f;
3313 /* do MIME integrity check */
3314 return mime_integrity(f,mime_pattern[j]);
3326 /* we don't keep eof of Fifo, becase it contains ?= as
3327 a terminator. It was checked in mime_integrity. */
3328 return ((mimebuf_f)?
3329 (*i_mgetc_buf)(f):Fifo(mime_input++));
3333 mime_ungetc_buf(c,f)
3338 (*i_mungetc_buf)(c,f);
3340 Fifo(--mime_input)=c;
3351 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
3352 /* re-read and convert again from mime_buffer. */
3354 /* =? has been checked */
3356 Fifo(mime_last++)='='; Fifo(mime_last++)='?';
3357 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
3358 /* We accept any character type even if it is breaked by new lines */
3359 c1 = (*i_getc)(f); Fifo(mime_last++)= c1 ;
3360 if (c1=='\n'||c1==' '||c1=='\r'||
3361 c1=='-'||c1=='_'||is_alnum(c1) ) continue;
3363 /* Failed. But this could be another MIME preemble */
3371 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3372 if (!(++i<MAXRECOVER) || c1==EOF) break;
3373 if (c1=='b'||c1=='B') {
3374 mime_decode_mode = 'B';
3375 } else if (c1=='q'||c1=='Q') {
3376 mime_decode_mode = 'Q';
3380 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3381 if (!(++i<MAXRECOVER) || c1==EOF) break;
3383 mime_decode_mode = FALSE;
3389 if (!mime_decode_mode) {
3390 /* false MIME premble, restart from mime_buffer */
3391 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
3392 /* Since we are in MIME mode until buffer becomes empty, */
3393 /* we never go into mime_begin again for a while. */
3396 /* discard mime preemble, and goto MIME mode */
3398 /* do no MIME integrity check */
3399 return c1; /* used only for checking EOF */
3414 fprintf(stderr, "%s\n", str);
3420 set_input_codename (codename)
3425 strcmp(codename, "") != 0 &&
3426 strcmp(codename, input_codename) != 0)
3428 is_inputcode_mixed = TRUE;
3430 input_codename = codename;
3431 is_inputcode_set = TRUE;
3435 print_guessed_code (filename)
3438 char *codename = "BINARY";
3439 if (!is_inputcode_mixed) {
3440 if (strcmp(input_codename, "") == 0) {
3443 codename = input_codename;
3446 if (filename != NULL) printf("%s:", filename);
3447 printf("%s\n", codename);
3454 if (nkf_isdigit(x)) return x - '0';
3455 return nkf_toupper(x) - 'A' + 10;
3460 #ifdef ANSI_C_PROTOTYPE
3461 int hex_getc(int ch, FILE *f, int (*g)(FILE *f), int (*u)(int c, FILE *f))
3464 hex_getc(ch, f, g, u)
3477 if (!nkf_isxdigit(c2)){
3482 if (!nkf_isxdigit(c3)){
3487 return (hex2bin(c2) << 4) | hex2bin(c3);
3494 return hex_getc(':', f, i_cgetc, i_cungetc);
3502 return (*i_cungetc)(c, f);
3509 return hex_getc('%', f, i_ugetc, i_uungetc);
3517 return (*i_uungetc)(c, f);
3521 #ifdef NUMCHAR_OPTION
3526 int (*g)() = i_ngetc;
3527 int (*u)() = i_nungetc;
3538 if (buf[i] == 'x' || buf[i] == 'X'){
3539 for (j = 0; j < 5; j++){
3541 if (!nkf_isxdigit(buf[i])){
3548 c |= hex2bin(buf[i]);
3551 for (j = 0; j < 6; j++){
3555 if (!nkf_isdigit(buf[i])){
3562 c += hex2bin(buf[i]);
3568 return CLASS_UTF16 | c;
3578 numchar_ungetc(c, f)
3582 return (*i_nungetc)(c, f);
3591 int c1, c2, c3, c4, cc;
3592 int t1, t2, t3, t4, mode, exit_mode;
3596 int lwsp_size = 128;
3598 if (mime_top != mime_last) { /* Something is in FIFO */
3599 return Fifo(mime_top++);
3601 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
3602 mime_decode_mode=FALSE;
3603 unswitch_mime_getc();
3604 return (*i_getc)(f);
3607 if (mimebuf_f == FIXED_MIME)
3608 exit_mode = mime_decode_mode;
3611 if (mime_decode_mode == 'Q') {
3612 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3614 if (c1=='_') return ' ';
3615 if (c1!='=' && c1!='?') {
3619 mime_decode_mode = exit_mode; /* prepare for quit */
3620 if (c1<=' ') return c1;
3621 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
3622 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
3623 /* end Q encoding */
3624 input_mode = exit_mode;
3626 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
3627 if (lwsp_buf==NULL) {
3628 perror("can't malloc");
3631 while ((c1=(*i_getc)(f))!=EOF) {
3636 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
3644 if ((c1=(*i_getc)(f))!=EOF && c1 == NL) {
3645 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
3660 lwsp_buf[lwsp_count] = c1;
3661 if (lwsp_count++>lwsp_size){
3663 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
3664 if (lwsp_buf_new==NULL) {
3667 perror("can't realloc");
3670 lwsp_buf = lwsp_buf_new;
3676 if (lwsp_count > 0) {
3677 if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) {
3681 for(lwsp_count--;lwsp_count>0;lwsp_count--)
3682 i_ungetc(lwsp_buf[lwsp_count],f);
3690 if (c1=='='&&c2<' ') { /* this is soft wrap */
3691 while((c1 = (*i_mgetc)(f)) <=' ') {
3692 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3694 mime_decode_mode = 'Q'; /* still in MIME */
3695 goto restart_mime_q;
3698 mime_decode_mode = 'Q'; /* still in MIME */
3702 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
3703 if (c2<=' ') return c2;
3704 mime_decode_mode = 'Q'; /* still in MIME */
3705 #define hex(c) (('0'<=c&&c<='9')?(c-'0'):\
3706 ('A'<=c&&c<='F')?(c-'A'+10):('a'<=c&&c<='f')?(c-'a'+10):0)
3707 return ((hex(c2)<<4) + hex(c3));
3710 if (mime_decode_mode != 'B') {
3711 mime_decode_mode = FALSE;
3712 return (*i_mgetc)(f);
3716 /* Base64 encoding */
3718 MIME allows line break in the middle of
3719 Base64, but we are very pessimistic in decoding
3720 in unbuf mode because MIME encoded code may broken by
3721 less or editor's control sequence (such as ESC-[-K in unbuffered
3722 mode. ignore incomplete MIME.
3724 mode = mime_decode_mode;
3725 mime_decode_mode = exit_mode; /* prepare for quit */
3727 while ((c1 = (*i_mgetc)(f))<=' ') {
3732 if ((c2 = (*i_mgetc)(f))<=' ') {
3735 if (mime_f != STRICT_MIME) goto mime_c2_retry;
3736 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3739 if ((c1 == '?') && (c2 == '=')) {
3742 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
3743 if (lwsp_buf==NULL) {
3744 perror("can't malloc");
3747 while ((c1=(*i_getc)(f))!=EOF) {
3752 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
3760 if ((c1=(*i_getc)(f))!=EOF) {
3764 } else if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
3779 lwsp_buf[lwsp_count] = c1;
3780 if (lwsp_count++>lwsp_size){
3782 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
3783 if (lwsp_buf_new==NULL) {
3786 perror("can't realloc");
3789 lwsp_buf = lwsp_buf_new;
3795 if (lwsp_count > 0) {
3796 if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) {
3800 for(lwsp_count--;lwsp_count>0;lwsp_count--)
3801 i_ungetc(lwsp_buf[lwsp_count],f);
3810 if ((c3 = (*i_mgetc)(f))<=' ') {
3813 if (mime_f != STRICT_MIME) goto mime_c3_retry;
3814 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3818 if ((c4 = (*i_mgetc)(f))<=' ') {
3821 if (mime_f != STRICT_MIME) goto mime_c4_retry;
3822 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3826 mime_decode_mode = mode; /* still in MIME sigh... */
3828 /* BASE 64 decoding */
3830 t1 = 0x3f & base64decode(c1);
3831 t2 = 0x3f & base64decode(c2);
3832 t3 = 0x3f & base64decode(c3);
3833 t4 = 0x3f & base64decode(c4);
3834 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
3836 Fifo(mime_last++) = cc;
3837 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
3839 Fifo(mime_last++) = cc;
3840 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
3842 Fifo(mime_last++) = cc;
3847 return Fifo(mime_top++);
3855 Fifo(--mime_top) = c;
3866 /* In buffered mode, read until =? or NL or buffer full
3868 mime_input = mime_top;
3869 mime_last = mime_top;
3870 while(*p) Fifo(mime_input++) = *p++;
3873 while((c=(*i_getc)(f))!=EOF) {
3874 if (((mime_input-mime_top)&MIME_BUF_MASK)==0) {
3875 break; /* buffer full */
3877 if (c=='=' && d=='?') {
3878 /* checked. skip header, start decode */
3879 Fifo(mime_input++) = c;
3880 /* mime_last_input = mime_input; */
3885 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
3887 /* Should we check length mod 4? */
3888 Fifo(mime_input++) = c;
3891 /* In case of Incomplete MIME, no MIME decode */
3892 Fifo(mime_input++) = c;
3893 mime_last = mime_input; /* point undecoded buffer */
3894 mime_decode_mode = 1; /* no decode on Fifo last in mime_getc */
3895 switch_mime_getc(); /* anyway we need buffered getc */
3906 i = c - 'A'; /* A..Z 0-25 */
3908 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
3910 } else if (c > '/') {
3911 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
3912 } else if (c == '+') {
3913 i = '>' /* 62 */ ; /* + 62 */
3915 i = '?' /* 63 */ ; /* / 63 */
3920 static char basis_64[] =
3921 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
3924 #define MIMEOUT_BUF_LENGTH (60)
3925 char mimeout_buf[MIMEOUT_BUF_LENGTH+1];
3926 int mimeout_buf_count = 0;
3927 int mimeout_preserve_space = 0;
3928 #define itoh4(c) (c>=10?c+'A'-10:c+'0')
3937 p = mime_pattern[0];
3938 for(i=0;mime_encode[i];i++) {
3939 if (mode == mime_encode[i]) {
3940 p = mime_pattern[i];
3944 mimeout_mode = mime_encode_method[i];
3947 if (base64_count>45) {
3951 if (!mimeout_preserve_space && mimeout_buf_count>0
3952 && (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
3953 || mimeout_buf[i]==CR || mimeout_buf[i]==NL )) {
3957 if (!mimeout_preserve_space) {
3958 for (;i<mimeout_buf_count;i++) {
3959 if (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
3960 || mimeout_buf[i]==CR || mimeout_buf[i]==NL ) {
3961 (*o_mputc)(mimeout_buf[i]);
3968 mimeout_preserve_space = FALSE;
3974 j = mimeout_buf_count;
3975 mimeout_buf_count = 0;
3977 mime_putc(mimeout_buf[i]);
3993 switch(mimeout_mode) {
3998 (*o_mputc)(basis_64[((b64c & 0x3)<< 4)]);
4004 (*o_mputc)(basis_64[((b64c & 0xF) << 2)]);
4010 if (mimeout_f!=FIXED_MIME) {
4012 } else if (mimeout_mode != 'Q')
4021 switch(mimeout_mode) {
4025 (*o_mputc)(itoh4(((c>>4)&0xf)));
4026 (*o_mputc)(itoh4((c&0xf)));
4035 (*o_mputc)(basis_64[c>>2]);
4040 (*o_mputc)(basis_64[((b64c & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
4046 (*o_mputc)(basis_64[((b64c & 0xF) << 2) | ((c & 0xC0) >>6)]);
4047 (*o_mputc)(basis_64[c & 0x3F]);
4061 if (mimeout_f==FIXED_MIME && base64_count>50) {
4065 } else if (c==CR||c==NL) {
4068 if (c!=EOF && mimeout_f!=FIXED_MIME) {
4069 if ( c<=DEL &&(output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
4070 if (mimeout_mode=='Q') {
4079 } else if (mimeout_mode) {
4080 if (base64_count>63) {
4085 mimeout_preserve_space = TRUE;
4087 if (c==SPACE || c==TAB || c==CR || c==NL) {
4088 for (i=0;i<mimeout_buf_count;i++) {
4089 if (SPACE<mimeout_buf[i] && mimeout_buf[i]<DEL) {
4091 for (i=0;i<mimeout_buf_count;i++) {
4092 (*o_mputc)(mimeout_buf[i]);
4095 mimeout_buf_count = 0;
4098 mimeout_buf[mimeout_buf_count++] = c;
4099 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
4102 for (i=0;i<mimeout_buf_count;i++) {
4103 (*o_mputc)(mimeout_buf[i]);
4109 if (mimeout_buf_count>0 && SPACE<c) {
4110 mimeout_buf[mimeout_buf_count++] = c;
4111 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
4116 } else if (!mimeout_mode) {
4117 if (c==SPACE || c==TAB || c==CR || c==NL) {
4118 if ((c==CR || c==NL)
4119 &&(mimeout_buf[mimeout_buf_count-1]==SPACE
4120 || mimeout_buf[mimeout_buf_count-1]==TAB)) {
4121 mimeout_buf_count--;
4123 for (i=0;i<mimeout_buf_count;i++) {
4124 (*o_mputc)(mimeout_buf[i]);
4127 mimeout_buf_count = 0;
4129 mimeout_buf[mimeout_buf_count++] = c;
4130 if (mimeout_buf_count>75) {
4131 open_mime(output_mode);
4135 } else if (!mimeout_mode) {
4136 if (mimeout_buf_count>0 && mimeout_buf[mimeout_buf_count-1]==SPACE) {
4137 for (i=0;i<mimeout_buf_count-1;i++) {
4138 (*o_mputc)(mimeout_buf[i]);
4141 mimeout_buf[0] = SPACE;
4142 mimeout_buf_count = 1;
4144 open_mime(output_mode);
4146 } else { /* c==EOF */
4147 j = mimeout_buf_count;
4150 if (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
4151 || mimeout_buf[i]==CR || mimeout_buf[i]==NL)
4153 (*mime_putc)(mimeout_buf[i]);
4157 (*o_mputc)(mimeout_buf[i]);
4163 if (mimeout_buf_count>0) {
4164 j = mimeout_buf_count;
4165 mimeout_buf_count = 0;
4167 mimeout_addchar(mimeout_buf[i]);
4179 struct input_code *p = input_code_list;
4192 mime_f = STRICT_MIME;
4197 #if defined(MSDOS) || defined(__OS2__)
4202 iso2022jp_f = FALSE;
4203 #ifdef UTF8_OUTPUT_ENABLE
4206 ms_ucs_map_f = FALSE;
4218 is_inputcode_mixed = FALSE;
4219 is_inputcode_set = FALSE;
4223 #ifdef SHIFTJIS_CP932
4229 for (i = 0; i < 256; i++){
4230 prefix_table[i] = 0;
4233 #ifdef UTF8_INPUT_ENABLE
4234 utf16_mode = UTF16LE_INPUT;
4236 mimeout_buf_count = 0;
4241 fold_preserve_f = FALSE;
4244 kanji_intro = DEFAULT_J;
4245 ascii_intro = DEFAULT_R;
4246 fold_margin = FOLD_MARGIN;
4247 output_conv = DEFAULT_CONV;
4248 oconv = DEFAULT_CONV;
4249 o_zconv = no_connection;
4250 o_fconv = no_connection;
4251 o_crconv = no_connection;
4252 o_rot_conv = no_connection;
4253 o_hira_conv = no_connection;
4254 o_base64conv = no_connection;
4255 o_iso2022jp_check_conv = no_connection;
4258 i_ungetc = std_ungetc;
4260 i_bungetc = std_ungetc;
4263 i_mungetc = std_ungetc;
4264 i_mgetc_buf = std_getc;
4265 i_mungetc_buf = std_ungetc;
4266 output_mode = ASCII;
4269 mime_decode_mode = FALSE;
4275 z_prev2=0,z_prev1=0;
4281 no_connection(c2,c1)
4284 no_connection2(c2,c1,0);
4288 no_connection2(c2,c1,c0)
4291 fprintf(stderr,"nkf internal module connection failure.\n");
4299 fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
4300 fprintf(stderr,"Flags:\n");
4301 fprintf(stderr,"b,u Output is buffered (DEFAULT),Output is unbuffered\n");
4302 #ifdef DEFAULT_CODE_SJIS
4303 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC), UTF-8\n");
4305 #ifdef DEFAULT_CODE_JIS
4306 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC), UTF-8\n");
4308 #ifdef DEFAULT_CODE_EUC
4309 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT), UTF-8\n");
4311 #ifdef DEFAULT_CODE_UTF8
4312 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC), UTF-8 (DEFAULT)\n");
4314 #ifdef UTF8_OUTPUT_ENABLE
4315 fprintf(stderr," After 'w' you can add more options. (80?|16((B|L)0?)?) \n");
4317 fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC), UTF-8\n");
4318 #ifdef UTF8_INPUT_ENABLE
4319 fprintf(stderr," After 'W' you can add more options. (8|16(B|L)?) \n");
4321 fprintf(stderr,"t no conversion\n");
4322 fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n");
4323 fprintf(stderr,"r {de/en}crypt ROT13/47\n");
4324 fprintf(stderr,"h 1 hirakana->katakana, 2 katakana->hirakana,3 both\n");
4325 fprintf(stderr,"v Show this usage. V: show version\n");
4326 fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
4327 fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
4328 fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
4329 fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
4330 fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces,\n");
4331 fprintf(stderr," 3: Convert HTML Entity\n");
4332 fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
4333 fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
4335 fprintf(stderr,"T Text mode output\n");
4337 fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
4338 fprintf(stderr,"d,c Delete \\r in line feed and \\032, Add \\r in line feed\n");
4339 fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n");
4340 fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
4341 fprintf(stderr,"long name options\n");
4342 fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n");
4343 fprintf(stderr," --jis,--euc,--sjis,--utf8,--utf16,--mime,--base64 convert for the code\n");
4344 fprintf(stderr," --hiragana, --katakana Hiragana/Katakana Conversion\n");
4346 fprintf(stderr," --cap-input, --url-input Convert hex after ':' or '%%'\n");
4348 #ifdef NUMCHAR_OPTION
4349 fprintf(stderr," --numchar-input Convert Unicode Character Reference\n");
4351 #ifdef SHIFTJIS_CP932
4352 fprintf(stderr," --no-cp932 Don't convert Shift_JIS FAxx-FCxx to equivalnet CP932\n");
4353 fprintf(stderr," --cp932inv convert Shift_JIS EDxx-EFxx to equivalnet CP932 FAxx-FCxx\n");
4355 #ifdef UTF8_OUTPUT_ENABLE
4356 fprintf(stderr," --ms-ucs-map Microsoft UCS Mapping Compatible\n");
4359 fprintf(stderr," --overwrite Overwrite original listed files by filtered result\n");
4361 fprintf(stderr," -g, --guess Guess the input code\n");
4362 fprintf(stderr," --help,--version\n");
4369 fprintf(stderr,"Network Kanji Filter Version %s (%s) "
4370 #if defined(MSDOS) && !defined(__WIN32__) && !defined(__WIN16__)
4373 #if defined(MSDOS) && defined(__WIN16__)
4376 #if defined(MSDOS) && defined(__WIN32__)
4382 ,NKF_VERSION,NKF_RELEASE_DATE);
4383 fprintf(stderr,"\n%s\n",CopyRight);
4388 **
\e$B%Q%C%A@):n<T
\e(B
4389 ** void@merope.pleiades.or.jp (Kusakabe Youichi)
4390 ** NIDE Naoyuki <nide@ics.nara-wu.ac.jp>
4391 ** ohta@src.ricoh.co.jp (Junn Ohta)
4392 ** inouet@strl.nhk.or.jp (Tomoyuki Inoue)
4393 ** kiri@pulser.win.or.jp (Tetsuaki Kiriyama)
4394 ** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp>
4395 ** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe)
4396 ** kono@ie.u-ryukyu.ac.jp (Shinji Kono)
4397 ** GHG00637@nifty-serve.or.jp (COW)