1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
5 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
6 ** Copyright (C) 1996,1998
8 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
9 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
10 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
11 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
13 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
14 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
15 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
16 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
17 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
18 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
20 ** Everyone is permitted to do anything on this program
21 ** including copying, modifying, improving,
22 ** as long as you don't try to pretend that you wrote it.
23 ** i.e., the above copyright notice has to appear in all copies.
24 ** Binary distribution requires original version messages.
25 ** You don't have to ask before copying, redistribution or publishing.
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
29 /***********************************************************************
30 ** UTF-8
\e$B%5%]!<%H$K$D$$$F
\e(B
31 **
\e$B=>Mh$N
\e(B nkf
\e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9
\e(B
32 ** nkf -e
\e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G
\e(B UTF-8
\e$B$HH=Dj$5$l$l$P!"
\e(B
33 **
\e$B$=$N$^$^
\e(B euc-jp
\e$B$KJQ49$5$l$^$9
\e(B
35 **
\e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#
\e(B
36 ** (
\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O
\e(B)
38 **
\e$B2?$+LdBj$r8+$D$1$?$i!"
\e(B
39 ** E-Mail: furukawa@tcp-ip.or.jp
40 **
\e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#
\e(B
41 ***********************************************************************/
42 /* $Id: nkf.c,v 1.47 2004/12/01 04:18:04 naruse Exp $ */
43 #define NKF_VERSION "2.0.4"
44 #define NKF_RELEASE_DATE "2004-12-01"
47 static char *CopyRight =
48 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2004 Kono, Furukawa";
55 ** USAGE: nkf [flags] [file]
58 ** b Output is buffered (DEFAULT)
59 ** u Output is unbuffered
63 ** j Outout code is JIS 7 bit (DEFAULT SELECT)
64 ** s Output code is MS Kanji (DEFAULT SELECT)
65 ** e Output code is AT&T JIS (DEFAULT SELECT)
66 ** w Output code is AT&T JIS (DEFAULT SELECT)
67 ** l Output code is JIS 7bit and ISO8859-1 Latin-1
69 ** m MIME conversion for ISO-2022-JP
70 ** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
71 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
72 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
73 ** M MIME output conversion
75 ** r {de/en}crypt ROT13/47
79 ** T Text mode output (for MS-DOS)
81 ** x Do not convert X0201 kana into X0208
82 ** Z Convert X0208 alphabet to ASCII
87 ** B try to fix broken JIS, missing Escape
88 ** B[1-9] broken level
90 ** O Output to 'nkf.out' file or last file name
91 ** d Delete \r in line feed
92 ** c Add \r in line feed
93 ** -- other long option
94 ** -- ignore following option (don't use with -O )
98 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__)) && !defined(MSDOS)
100 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
115 #if defined(MSDOS) || defined(__OS2__)
122 #define setbinmode(fp) fsetbin(fp)
123 #else /* Microsoft C, Turbo C */
124 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
126 #else /* UNIX,OS/2 */
127 #define setbinmode(fp)
130 #ifdef _IOFBF /* SysV and MSDOS, Windows */
131 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
133 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
136 /*Borland C++ 4.5 EasyWin*/
137 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
146 /* added by satoru@isoternet.org */
148 #include <sys/stat.h>
149 #ifndef MSDOS /* UNIX, OS/2 */
153 #if defined(_MSC_VER) || defined(__MINGW32__) /* VC++, MinGW */
154 #include <sys/utime.h>
155 #elif defined(__TURBOC__) /* BCC */
157 #elif defined(LSI_C) /* LSI C */
169 /* state of output_mode and input_mode
186 /* Input Assumption */
190 #define LATIN1_INPUT 6
192 #define STRICT_MIME 8
197 #define JAPANESE_EUC 10
201 #define UTF8_INPUT 13
202 #define UTF16LE_INPUT 14
203 #define UTF16BE_INPUT 15
223 #define is_alnum(c) \
224 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
226 #define HOLD_SIZE 1024
227 #define IOBUF_SIZE 16384
229 #define DEFAULT_J 'B'
230 #define DEFAULT_R 'B'
232 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
233 #define SJ6394 0x0161 /* 63 - 94 ku offset */
235 #define RANGE_NUM_MAX 18
240 #if defined( UTF8_OUTPUT_ENABLE ) || defined( UTF8_INPUT_ENABLE )
241 #define sizeof_euc_utf8 94
242 #define sizeof_euc_to_utf8_1byte 94
243 #define sizeof_euc_to_utf8_2bytes 94
244 #define sizeof_utf8_to_euc_C2 64
245 #define sizeof_utf8_to_euc_E5B8 64
246 #define sizeof_utf8_to_euc_2bytes 112
247 #define sizeof_utf8_to_euc_3bytes 112
250 /* MIME preprocessor */
253 #ifdef EASYWIN /*Easy Win */
254 extern POINT _BufferSize;
257 /* function prototype */
259 #ifdef ANSI_C_PROTOTYPE
261 #define STATIC static
273 void (*status_func)PROTO((struct input_code *, int));
274 int (*iconv_func)PROTO((int c2, int c1, int c0));
278 STATIC char *input_codename = "";
280 STATIC int noconvert PROTO((FILE *f));
281 STATIC int kanji_convert PROTO((FILE *f));
282 STATIC int h_conv PROTO((FILE *f,int c2,int c1));
283 STATIC int push_hold_buf PROTO((int c2));
284 STATIC void set_iconv PROTO((int f, int (*iconv_func)()));
285 STATIC int s_iconv PROTO((int c2,int c1,int c0));
286 STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
287 STATIC int e_iconv PROTO((int c2,int c1,int c0));
288 #ifdef UTF8_INPUT_ENABLE
289 STATIC int w2e_conv PROTO((int c2,int c1,int c0,int *p2,int *p1));
290 STATIC int w_iconv PROTO((int c2,int c1,int c0));
291 STATIC int w_iconv16 PROTO((int c2,int c1,int c0));
292 STATIC int w_iconv_common PROTO((int c1,int c0,unsigned short **pp,int psize,int *p2,int *p1));
293 STATIC int ww16_conv PROTO((int c2, int c1, int c0));
295 #ifdef UTF8_OUTPUT_ENABLE
296 STATIC int e2w_conv PROTO((int c2,int c1));
297 STATIC void w_oconv PROTO((int c2,int c1));
298 STATIC void w_oconv16 PROTO((int c2,int c1));
300 STATIC void e_oconv PROTO((int c2,int c1));
301 STATIC void e2s_conv PROTO((int c2, int c1, int *p2, int *p1));
302 STATIC void s_oconv PROTO((int c2,int c1));
303 STATIC void j_oconv PROTO((int c2,int c1));
304 STATIC void fold_conv PROTO((int c2,int c1));
305 STATIC void cr_conv PROTO((int c2,int c1));
306 STATIC void z_conv PROTO((int c2,int c1));
307 STATIC void rot_conv PROTO((int c2,int c1));
308 STATIC void hira_conv PROTO((int c2,int c1));
309 STATIC void base64_conv PROTO((int c2,int c1));
310 STATIC void iso2022jp_check_conv PROTO((int c2,int c1));
311 STATIC void no_connection PROTO((int c2,int c1));
312 STATIC int no_connection2 PROTO((int c2,int c1,int c0));
314 STATIC void code_score PROTO((struct input_code *ptr));
315 STATIC void code_status PROTO((int c));
317 STATIC void std_putc PROTO((int c));
318 STATIC int std_getc PROTO((FILE *f));
319 STATIC int std_ungetc PROTO((int c,FILE *f));
321 STATIC int broken_getc PROTO((FILE *f));
322 STATIC int broken_ungetc PROTO((int c,FILE *f));
324 STATIC int mime_begin PROTO((FILE *f));
325 STATIC int mime_getc PROTO((FILE *f));
326 STATIC int mime_ungetc PROTO((int c,FILE *f));
328 STATIC int mime_begin_strict PROTO((FILE *f));
329 STATIC int mime_getc_buf PROTO((FILE *f));
330 STATIC int mime_ungetc_buf PROTO((int c,FILE *f));
331 STATIC int mime_integrity PROTO((FILE *f,unsigned char *p));
333 STATIC int base64decode PROTO((int c));
334 STATIC void mime_putc PROTO((int c));
335 STATIC void open_mime PROTO((int c));
336 STATIC void close_mime PROTO(());
337 STATIC void usage PROTO(());
338 STATIC void version PROTO(());
339 STATIC void options PROTO((unsigned char *c));
341 STATIC void reinit PROTO(());
346 static unsigned char stdibuf[IOBUF_SIZE];
347 static unsigned char stdobuf[IOBUF_SIZE];
348 static unsigned char hold_buf[HOLD_SIZE*2];
349 static int hold_count;
351 /* MIME preprocessor fifo */
353 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
354 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
355 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
356 static unsigned char mime_buf[MIME_BUF_SIZE];
357 static unsigned int mime_top = 0;
358 static unsigned int mime_last = 0; /* decoded */
359 static unsigned int mime_input = 0; /* undecoded */
362 static int unbuf_f = FALSE;
363 static int estab_f = FALSE;
364 static int nop_f = FALSE;
365 static int binmode_f = TRUE; /* binary mode */
366 static int rot_f = FALSE; /* rot14/43 mode */
367 static int hira_f = FALSE; /* hira/kata henkan */
368 static int input_f = FALSE; /* non fixed input code */
369 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
370 static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
371 static int mimebuf_f = FALSE; /* MIME buffered input */
372 static int broken_f = FALSE; /* convert ESC-less broken JIS */
373 static int iso8859_f = FALSE; /* ISO8859 through */
374 static int mimeout_f = FALSE; /* base64 mode */
375 #if defined(MSDOS) || defined(__OS2__)
376 static int x0201_f = TRUE; /* Assume JISX0201 kana */
378 static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
380 static int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
381 #ifdef UTF8_OUTPUT_ENABLE
382 static int unicode_bom_f= 0; /* Output Unicode BOM */
383 static int w_oconv16_LE = 0; /* utf-16 little endian */
384 static int ms_ucs_map_f = FALSE; /* Microsoft UCS Mapping Compatible */
388 #ifdef NUMCHAR_OPTION
390 #define CLASS_MASK 0x0f000000
391 #define CLASS_UTF16 0x01000000
395 static int cap_f = FALSE;
396 static int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */
397 static int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc;
398 STATIC int cap_getc PROTO((FILE *f));
399 STATIC int cap_ungetc PROTO((int c,FILE *f));
401 static int url_f = FALSE;
402 static int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
403 static int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc;
404 STATIC int url_getc PROTO((FILE *f));
405 STATIC int url_ungetc PROTO((int c,FILE *f));
407 static int numchar_f = FALSE;
408 static int (*i_ngetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
409 static int (*i_nungetc)PROTO((int c ,FILE *f)) = std_ungetc;
410 STATIC int numchar_getc PROTO((FILE *f));
411 STATIC int numchar_ungetc PROTO((int c,FILE *f));
415 static int noout_f = FALSE;
416 STATIC void no_putc PROTO((int c));
417 static int debug_f = FALSE;
418 STATIC void debug PROTO((char *str));
421 static int guess_f = FALSE;
422 STATIC void print_guessed_code PROTO((char *filename));
423 STATIC void set_input_codename PROTO((char *codename));
424 static int is_inputcode_mixed = FALSE;
425 static int is_inputcode_set = FALSE;
428 static int exec_f = 0;
431 #ifdef SHIFTJIS_CP932
432 STATIC int cp932_f = TRUE;
433 #define CP932_TABLE_BEGIN (0xfa)
434 #define CP932_TABLE_END (0xfc)
436 STATIC int cp932inv_f = TRUE;
437 #define CP932INV_TABLE_BEGIN (0xed)
438 #define CP932INV_TABLE_END (0xee)
440 #endif /* SHIFTJIS_CP932 */
442 STATIC unsigned char prefix_table[256];
444 STATIC void e_status PROTO((struct input_code *, int));
445 STATIC void s_status PROTO((struct input_code *, int));
447 #ifdef UTF8_INPUT_ENABLE
448 STATIC void w_status PROTO((struct input_code *, int));
449 STATIC void w16_status PROTO((struct input_code *, int));
450 static int utf16_mode = UTF16LE_INPUT;
453 struct input_code input_code_list[] = {
454 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
455 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
456 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
457 {"UTF-16", 0, 0, 0, {0, 0, 0}, w16_status, w_iconv16, 0},
461 static int mimeout_mode = 0;
462 static int base64_count = 0;
464 /* X0208 -> ASCII converter */
467 static int f_line = 0; /* chars in line */
468 static int f_prev = 0;
469 static int fold_preserve_f = FALSE; /* preserve new lines */
470 static int fold_f = FALSE;
471 static int fold_len = 0;
474 static unsigned char kanji_intro = DEFAULT_J;
475 static unsigned char ascii_intro = DEFAULT_R;
479 #define FOLD_MARGIN 10
480 #define DEFAULT_FOLD 60
482 static int fold_margin = FOLD_MARGIN;
486 #ifdef DEFAULT_CODE_JIS
487 # define DEFAULT_CONV j_oconv
489 #ifdef DEFAULT_CODE_SJIS
490 # define DEFAULT_CONV s_oconv
492 #ifdef DEFAULT_CODE_EUC
493 # define DEFAULT_CONV e_oconv
495 #ifdef DEFAULT_CODE_UTF8
496 # define DEFAULT_CONV w_oconv
499 /* process default */
500 static void (*output_conv)PROTO((int c2,int c1)) = DEFAULT_CONV;
502 static void (*oconv)PROTO((int c2,int c1)) = no_connection;
503 /* s_iconv or oconv */
504 static int (*iconv)PROTO((int c2,int c1,int c0)) = no_connection2;
506 static void (*o_zconv)PROTO((int c2,int c1)) = no_connection;
507 static void (*o_fconv)PROTO((int c2,int c1)) = no_connection;
508 static void (*o_crconv)PROTO((int c2,int c1)) = no_connection;
509 static void (*o_rot_conv)PROTO((int c2,int c1)) = no_connection;
510 static void (*o_hira_conv)PROTO((int c2,int c1)) = no_connection;
511 static void (*o_base64conv)PROTO((int c2,int c1)) = no_connection;
512 static void (*o_iso2022jp_check_conv)PROTO((int c2,int c1)) = no_connection;
514 /* static redirections */
516 static void (*o_putc)PROTO((int c)) = std_putc;
518 static int (*i_getc)PROTO((FILE *f)) = std_getc; /* general input */
519 static int (*i_ungetc)PROTO((int c,FILE *f)) =std_ungetc;
521 static int (*i_bgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
522 static int (*i_bungetc)PROTO((int c ,FILE *f)) = std_ungetc;
524 static void (*o_mputc)PROTO((int c)) = std_putc ; /* output of mputc */
526 static int (*i_mgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
527 static int (*i_mungetc)PROTO((int c ,FILE *f)) = std_ungetc;
529 /* for strict mime */
530 static int (*i_mgetc_buf)PROTO((FILE *)) = std_getc; /* input of mgetc_buf */
531 static int (*i_mungetc_buf)PROTO((int c,FILE *f)) = std_ungetc;
534 static int output_mode = ASCII, /* output kanji mode */
535 input_mode = ASCII, /* input kanji mode */
536 shift_mode = FALSE; /* TRUE shift out, or X0201 */
537 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
539 /* X0201 / X0208 conversion tables */
541 /* X0201 kana conversion table */
544 unsigned char cv[]= {
545 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
546 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
547 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
548 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
549 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
550 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
551 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
552 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
553 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
554 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
555 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
556 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
557 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
558 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
559 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
560 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
564 /* X0201 kana conversion table for daguten */
567 unsigned char dv[]= {
568 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
569 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
570 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
571 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
572 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
573 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
574 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
575 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
576 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
577 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
578 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
579 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
580 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
581 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
582 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
583 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
586 /* X0201 kana conversion table for han-daguten */
589 unsigned char ev[]= {
590 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
591 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
592 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
593 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
594 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
595 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
596 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
597 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
598 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
601 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
602 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
603 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
604 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
605 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609 /* X0208 kigou conversion table */
610 /* 0x8140 - 0x819e */
612 unsigned char fv[] = {
614 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
615 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
616 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
617 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
618 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
619 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
620 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
621 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
622 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
623 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
624 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
625 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
631 static int file_out = FALSE;
633 static int overwrite = FALSE;
636 static int crmode_f = 0; /* CR, NL, CRLF */
637 #ifdef EASYWIN /*Easy Win */
638 static int end_check;
650 char *outfname = NULL;
653 #ifdef EASYWIN /*Easy Win */
654 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
657 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
658 cp = (unsigned char *)*argv;
663 if (pipe(fds) < 0 || (pid = fork()) < 0){
674 execvp(argv[1], &argv[1]);
688 if(x0201_f == WISH_TRUE)
689 x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
691 if (binmode_f == TRUE)
693 if (freopen("","wb",stdout) == NULL)
700 setbuf(stdout, (char *) NULL);
702 setvbuffer(stdout, stdobuf, IOBUF_SIZE);
705 if (binmode_f == TRUE)
707 if (freopen("","rb",stdin) == NULL) return (-1);
711 setvbuffer(stdin, stdibuf, IOBUF_SIZE);
715 kanji_convert(stdin);
716 if (guess_f) print_guessed_code(NULL);
721 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
730 /* reopen file for stdout */
731 if (file_out == TRUE) {
734 outfname = malloc(strlen(origfname)
735 + strlen(".nkftmpXXXXXX")
741 strcpy(outfname, origfname);
745 for (i = strlen(outfname); i; --i){
746 if (outfname[i - 1] == '/'
747 || outfname[i - 1] == '\\'){
753 strcat(outfname, "ntXXXXXX");
755 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC,
758 strcat(outfname, ".nkftmpXXXXXX");
759 fd = mkstemp(outfname);
762 || (fd_backup = dup(fileno(stdout))) < 0
763 || dup2(fd, fileno(stdout)) < 0
774 outfname = "nkf.out";
777 if(freopen(outfname, "w", stdout) == NULL) {
781 if (binmode_f == TRUE) {
783 if (freopen("","wb",stdout) == NULL)
790 if (binmode_f == TRUE)
792 if (freopen("","rb",fin) == NULL)
797 setvbuffer(fin, stdibuf, IOBUF_SIZE);
801 char *filename = NULL;
803 if (nfiles > 1) filename = origfname;
804 if (guess_f) print_guessed_code(filename);
810 #if defined(MSDOS) && !defined(__MINGW32__)
818 if (dup2(fd_backup, fileno(stdout)) < 0){
821 if (stat(origfname, &sb)) {
822 fprintf(stderr, "Can't stat %s\n", origfname);
824 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
825 if (chmod(outfname, sb.st_mode)) {
826 fprintf(stderr, "Can't set permission %s\n", outfname);
829 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
830 #if defined(MSDOS) && !defined(__MINGW32__)
831 tb[0] = tb[1] = sb.st_mtime;
832 if (utime(outfname, tb)) {
833 fprintf(stderr, "Can't set timestamp %s\n", outfname);
836 tb.actime = sb.st_atime;
837 tb.modtime = sb.st_mtime;
838 if (utime(outfname, &tb)) {
839 fprintf(stderr, "Can't set timestamp %s\n", outfname);
843 if (unlink(origfname)){
847 if (rename(outfname, origfname)) {
849 fprintf(stderr, "Can't rename %s to %s\n",
850 outfname, origfname);
858 #ifdef EASYWIN /*Easy Win */
859 if (file_out == FALSE)
860 scanf("%d",&end_check);
863 #else /* for Other OS */
864 if (file_out == TRUE)
894 {"katakana-hiragana","h3"},
898 #ifdef UTF8_OUTPUT_ENABLE
903 #ifdef UTF8_INPUT_ENABLE
905 {"utf16-input", "W16"},
914 #ifdef NUMCHAR_OPTION
915 {"numchar-input", ""},
921 #ifdef SHIFTJIS_CP932
931 static int option_mode = 0;
938 unsigned char *p = NULL;
950 case '-': /* literal options */
951 if (!*cp) { /* ignore the rest of arguments */
955 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
957 p = (unsigned char *)long_option[i].name;
958 for (j=0;*p && (*p != '=') && *p == cp[j];p++, j++);
959 if (!*p || *p == cp[j]){
966 cp = (unsigned char *)long_option[i].alias;
969 if (strcmp(long_option[i].name, "overwrite") == 0){
976 if (strcmp(long_option[i].name, "cap-input") == 0){
980 if (strcmp(long_option[i].name, "url-input") == 0){
985 #ifdef NUMCHAR_OPTION
986 if (strcmp(long_option[i].name, "numchar-input") == 0){
992 if (strcmp(long_option[i].name, "no-output") == 0){
996 if (strcmp(long_option[i].name, "debug") == 0){
1001 if (strcmp(long_option[i].name, "cp932") == 0){
1002 #ifdef SHIFTJIS_CP932
1006 #ifdef UTF8_OUTPUT_ENABLE
1007 ms_ucs_map_f = TRUE;
1011 if (strcmp(long_option[i].name, "no-cp932") == 0){
1012 #ifdef SHIFTJIS_CP932
1016 #ifdef UTF8_OUTPUT_ENABLE
1017 ms_ucs_map_f = FALSE;
1021 #ifdef SHIFTJIS_CP932
1022 if (strcmp(long_option[i].name, "cp932inv") == 0){
1028 if (strcmp(long_option[i].name, "exec-in") == 0){
1032 if (strcmp(long_option[i].name, "exec-out") == 0){
1037 #ifdef UTF8_OUTPUT_ENABLE
1038 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1039 ms_ucs_map_f = TRUE;
1043 if (strcmp(long_option[i].name, "prefix=") == 0){
1044 if (*p == '=' && ' ' < p[1] && p[1] < 128){
1045 for (i = 2; ' ' < p[i] && p[i] < 128; i++){
1046 prefix_table[p[i]] = p[1];
1053 case 'b': /* buffered mode */
1056 case 'u': /* non bufferd mode */
1059 case 't': /* transparent mode */
1062 case 'j': /* JIS output */
1064 output_conv = j_oconv;
1066 case 'e': /* AT&T EUC output */
1067 output_conv = e_oconv;
1069 case 's': /* SJIS output */
1070 output_conv = s_oconv;
1072 case 'l': /* ISO8859 Latin-1 support, no conversion */
1073 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
1074 input_f = LATIN1_INPUT;
1076 case 'i': /* Kanji IN ESC-$-@/B */
1077 if (*cp=='@'||*cp=='B')
1078 kanji_intro = *cp++;
1080 case 'o': /* ASCII IN ESC-(-J/B */
1081 if (*cp=='J'||*cp=='B'||*cp=='H')
1082 ascii_intro = *cp++;
1089 if ('9'>= *cp && *cp>='0')
1090 hira_f |= (*cp++ -'0');
1097 #if defined(MSDOS) || defined(__OS2__)
1112 #ifdef UTF8_OUTPUT_ENABLE
1113 case 'w': /* UTF-8 output */
1114 if ('1'== cp[0] && '6'==cp[1]) {
1115 output_conv = w_oconv16; cp+=2;
1117 unicode_bom_f=2; cp++;
1120 unicode_bom_f=1; cp++;
1122 } else if (cp[0] == 'B') {
1123 unicode_bom_f=2; cp++;
1125 unicode_bom_f=1; cp++;
1128 } else if (cp[0] == '8') {
1129 output_conv = w_oconv; cp++;
1132 unicode_bom_f=1; cp++;
1135 output_conv = w_oconv;
1138 #ifdef UTF8_INPUT_ENABLE
1139 case 'W': /* UTF-8 input */
1140 if ('1'== cp[0] && '6'==cp[1]) {
1141 input_f = UTF16LE_INPUT;
1144 } else if (cp[0] == 'B') {
1146 input_f = UTF16BE_INPUT;
1148 } else if (cp[0] == '8') {
1150 input_f = UTF8_INPUT;
1152 input_f = UTF8_INPUT;
1155 /* Input code assumption */
1156 case 'J': /* JIS input */
1157 case 'E': /* AT&T EUC input */
1158 input_f = JIS_INPUT;
1160 case 'S': /* MS Kanji input */
1161 input_f = SJIS_INPUT;
1162 if (x0201_f==NO_X0201) x0201_f=TRUE;
1164 case 'Z': /* Convert X0208 alphabet to asii */
1165 /* bit:0 Convert X0208
1166 bit:1 Convert Kankaku to one space
1167 bit:2 Convert Kankaku to two spaces
1168 bit:3 Convert HTML Entity
1170 if ('9'>= *cp && *cp>='0')
1171 alpha_f |= 1<<(*cp++ -'0');
1175 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
1176 x0201_f = FALSE; /* No X0201->X0208 conversion */
1178 ESC-(-I in JIS, EUC, MS Kanji
1179 SI/SO in JIS, EUC, MS Kanji
1180 SSO in EUC, JIS, not in MS Kanji
1181 MS Kanji (0xa0-0xdf)
1183 ESC-(-I in JIS (0x20-0x5f)
1184 SSO in EUC (0xa0-0xdf)
1185 0xa0-0xd in MS Kanji (0xa0-0xdf)
1188 case 'X': /* Assume X0201 kana */
1189 /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1192 case 'F': /* prserve new lines */
1193 fold_preserve_f = TRUE;
1194 case 'f': /* folding -f60 or -f */
1197 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1199 fold_len += *cp++ - '0';
1201 if (!(0<fold_len && fold_len<BUFSIZ))
1202 fold_len = DEFAULT_FOLD;
1206 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1208 fold_margin += *cp++ - '0';
1212 case 'm': /* MIME support */
1213 if (*cp=='B'||*cp=='Q') {
1214 mime_decode_mode = *cp++;
1215 mimebuf_f = FIXED_MIME;
1216 } else if (*cp=='N') {
1217 mime_f = TRUE; cp++;
1218 } else if (*cp=='S') {
1219 mime_f = STRICT_MIME; cp++;
1220 } else if (*cp=='0') {
1221 mime_f = FALSE; cp++;
1224 case 'M': /* MIME output */
1227 mimeout_f = FIXED_MIME; cp++;
1228 } else if (*cp=='Q') {
1230 mimeout_f = FIXED_MIME; cp++;
1235 case 'B': /* Broken JIS support */
1237 bit:1 allow any x on ESC-(-x or ESC-$-x
1238 bit:2 reset to ascii on NL
1240 if ('9'>= *cp && *cp>='0')
1241 broken_f |= 1<<(*cp++ -'0');
1246 case 'O':/* for Output file */
1250 case 'c':/* add cr code */
1253 case 'd':/* delete cr code */
1256 case 'I': /* ISO-2022-JP output */
1259 case 'L': /* line mode */
1260 if (*cp=='u') { /* unix */
1261 crmode_f = NL; cp++;
1262 } else if (*cp=='m') { /* mac */
1263 crmode_f = CR; cp++;
1264 } else if (*cp=='w') { /* windows */
1265 crmode_f = CRLF; cp++;
1266 } else if (*cp=='0') { /* no conversion */
1276 /* module muliple options in a string are allowed for Perl moudle */
1277 while(*cp && *cp!='-') cp++;
1281 /* bogus option but ignored */
1287 #ifdef ANSI_C_PROTOTYPE
1288 struct input_code * find_inputcode_byfunc(int (*iconv_func)(int c2,int c1,int c0))
1290 struct input_code * find_inputcode_byfunc(iconv_func)
1291 int (*iconv_func)();
1295 struct input_code *p = input_code_list;
1297 if (iconv_func == p->iconv_func){
1306 #ifdef ANSI_C_PROTOTYPE
1307 void set_iconv(int f, int (*iconv_func)(int c2,int c1,int c0))
1309 void set_iconv(f, iconv_func)
1311 int (*iconv_func)();
1315 static int (*iconv_for_check)() = 0;
1317 #ifdef INPUT_CODE_FIX
1325 #ifdef INPUT_CODE_FIX
1326 && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1332 if (estab_f && iconv_for_check != iconv){
1333 struct input_code *p = find_inputcode_byfunc(iconv);
1335 set_input_codename(p->name);
1336 debug(input_codename);
1338 iconv_for_check = iconv;
1343 #define SCORE_L2 (1) /*
\e$BBh
\e(B2
\e$B?e=`4A;z
\e(B */
1344 #define SCORE_KANA (SCORE_L2 << 1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
1345 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
1346 #ifdef SHIFTJIS_CP932
1347 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932
\e$B$K$h$kFI$_49$(
\e(B */
1348 #define SCORE_NO_EXIST (SCORE_CP932 << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1350 #define SCORE_NO_EXIST (SCORE_DEPEND << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1352 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME
\e$B$K$h$k;XDj
\e(B */
1353 #define SCORE_ERROR (SCORE_iMIME << 1) /*
\e$B%(%i!<
\e(B */
1355 #define SCORE_INIT (SCORE_iMIME)
1357 int score_table_A0[] = {
1360 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1361 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1364 int score_table_F0[] = {
1365 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1366 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1367 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1368 SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1371 void set_code_score(ptr, score)
1372 struct input_code *ptr;
1376 ptr->score |= score;
1380 void clr_code_score(ptr, score)
1381 struct input_code *ptr;
1385 ptr->score &= ~score;
1389 void code_score(ptr)
1390 struct input_code *ptr;
1392 int c2 = ptr->buf[0];
1393 int c1 = ptr->buf[1];
1395 set_code_score(ptr, SCORE_ERROR);
1396 }else if (c2 == SSO){
1397 set_code_score(ptr, SCORE_KANA);
1398 #ifdef UTF8_OUTPUT_ENABLE
1399 }else if (!e2w_conv(c2, c1)){
1400 set_code_score(ptr, SCORE_NO_EXIST);
1402 }else if ((c2 & 0x70) == 0x20){
1403 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1404 }else if ((c2 & 0x70) == 0x70){
1405 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1406 }else if ((c2 & 0x70) >= 0x50){
1407 set_code_score(ptr, SCORE_L2);
1411 void status_disable(ptr)
1412 struct input_code *ptr;
1417 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1420 void status_push_ch(ptr, c)
1421 struct input_code *ptr;
1424 ptr->buf[ptr->index++] = c;
1427 void status_clear(ptr)
1428 struct input_code *ptr;
1434 void status_reset(ptr)
1435 struct input_code *ptr;
1438 ptr->score = SCORE_INIT;
1441 void status_reinit(ptr)
1442 struct input_code *ptr;
1445 ptr->_file_stat = 0;
1448 void status_check(ptr, c)
1449 struct input_code *ptr;
1452 if (c <= DEL && estab_f){
1457 void s_status(ptr, c)
1458 struct input_code *ptr;
1463 status_check(ptr, c);
1468 #ifdef NUMCHAR_OPTION
1469 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1472 }else if (0xa1 <= c && c <= 0xdf){
1473 status_push_ch(ptr, SSO);
1474 status_push_ch(ptr, c);
1477 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
1479 status_push_ch(ptr, c);
1480 #ifdef SHIFTJIS_CP932
1482 && CP932_TABLE_BEGIN <= c && c <= CP932_TABLE_END){
1484 status_push_ch(ptr, c);
1485 #endif /* SHIFTJIS_CP932 */
1487 status_disable(ptr);
1491 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1492 status_push_ch(ptr, c);
1493 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
1497 status_disable(ptr);
1500 #ifdef SHIFTJIS_CP932
1502 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1503 status_push_ch(ptr, c);
1504 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
1505 set_code_score(ptr, SCORE_CP932);
1510 status_disable(ptr);
1512 #endif /* SHIFTJIS_CP932 */
1516 void e_status(ptr, c)
1517 struct input_code *ptr;
1522 status_check(ptr, c);
1527 #ifdef NUMCHAR_OPTION
1528 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1531 }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
1533 status_push_ch(ptr, c);
1535 status_disable(ptr);
1539 if (0xa1 <= c && c <= 0xfe){
1540 status_push_ch(ptr, c);
1544 status_disable(ptr);
1550 #ifdef UTF8_INPUT_ENABLE
1551 void w16_status(ptr, c)
1552 struct input_code *ptr;
1559 if (ptr->_file_stat == 0){
1560 if (c == 0xfe || c == 0xff){
1562 status_push_ch(ptr, c);
1563 ptr->_file_stat = 1;
1565 status_disable(ptr);
1566 ptr->_file_stat = -1;
1568 }else if (ptr->_file_stat > 0){
1570 status_push_ch(ptr, c);
1571 }else if (ptr->_file_stat < 0){
1572 status_disable(ptr);
1578 status_disable(ptr);
1579 ptr->_file_stat = -1;
1581 status_push_ch(ptr, c);
1588 if (ptr->stat != c && (c == 0xfe || c == 0xff)){
1589 status_push_ch(ptr, c);
1592 status_disable(ptr);
1593 ptr->_file_stat = -1;
1599 void w_status(ptr, c)
1600 struct input_code *ptr;
1605 status_check(ptr, c);
1610 #ifdef NUMCHAR_OPTION
1611 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1614 }else if (0xc0 <= c && c <= 0xdf){
1616 status_push_ch(ptr, c);
1617 }else if (0xe0 <= c && c <= 0xef){
1619 status_push_ch(ptr, c);
1621 status_disable(ptr);
1626 if (0x80 <= c && c <= 0xbf){
1627 status_push_ch(ptr, c);
1628 if (ptr->index > ptr->stat){
1629 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
1630 && ptr->buf[2] == 0xbf);
1631 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
1632 &ptr->buf[0], &ptr->buf[1]);
1639 status_disable(ptr);
1650 int action_flag = 1;
1651 struct input_code *result = 0;
1652 struct input_code *p = input_code_list;
1654 (p->status_func)(p, c);
1657 }else if(p->stat == 0){
1668 if (result && !estab_f){
1669 set_iconv(TRUE, result->iconv_func);
1670 }else if (c <= DEL){
1671 struct input_code *ptr = input_code_list;
1680 #define STD_GC_BUFSIZE (256)
1681 int std_gc_buf[STD_GC_BUFSIZE];
1689 return std_gc_buf[--std_gc_ndx];
1699 if (std_gc_ndx == STD_GC_BUFSIZE){
1702 std_gc_buf[std_gc_ndx++] = c;
1720 while ((c = (*i_getc)(f)) != EOF)
1729 oconv = output_conv;
1732 /* replace continucation module, from output side */
1734 /* output redicrection */
1736 if (noout_f || guess_f){
1743 if (mimeout_f == TRUE) {
1744 o_base64conv = oconv; oconv = base64_conv;
1746 /* base64_count = 0; */
1750 o_crconv = oconv; oconv = cr_conv;
1753 o_rot_conv = oconv; oconv = rot_conv;
1756 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
1759 o_hira_conv = oconv; oconv = hira_conv;
1762 o_fconv = oconv; oconv = fold_conv;
1765 if (alpha_f || x0201_f) {
1766 o_zconv = oconv; oconv = z_conv;
1770 i_ungetc = std_ungetc;
1771 /* input redicrection */
1774 i_cgetc = i_getc; i_getc = cap_getc;
1775 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
1778 i_ugetc = i_getc; i_getc = url_getc;
1779 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
1782 #ifdef NUMCHAR_OPTION
1784 i_ngetc = i_getc; i_getc = numchar_getc;
1785 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
1788 if (mime_f && mimebuf_f==FIXED_MIME) {
1789 i_mgetc = i_getc; i_getc = mime_getc;
1790 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
1793 i_bgetc = i_getc; i_getc = broken_getc;
1794 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
1796 if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
1797 set_iconv(-TRUE, e_iconv);
1798 } else if (input_f == SJIS_INPUT) {
1799 set_iconv(-TRUE, s_iconv);
1800 #ifdef UTF8_INPUT_ENABLE
1801 } else if (input_f == UTF8_INPUT) {
1802 set_iconv(-TRUE, w_iconv);
1803 } else if (input_f == UTF16LE_INPUT) {
1804 set_iconv(-TRUE, w_iconv16);
1807 set_iconv(FALSE, e_iconv);
1811 struct input_code *p = input_code_list;
1819 Conversion main loop. Code detection only.
1829 module_connection();
1834 output_mode = ASCII;
1837 #define NEXT continue /* no output, get next */
1838 #define SEND ; /* output c1 and c2, get next */
1839 #define LAST break /* end of loop, go closing */
1841 while ((c1 = (*i_getc)(f)) != EOF) {
1846 /* in case of 8th bit is on */
1848 /* in case of not established yet */
1849 /* It is still ambiguious */
1850 if (h_conv(f, c2, c1)==EOF)
1856 /* in case of already established */
1858 /* ignore bogus code */
1864 /* second byte, 7 bit code */
1865 /* it might be kanji shitfted */
1866 if ((c1 == DEL) || (c1 <= SPACE)) {
1867 /* ignore bogus first code */
1875 #ifdef UTF8_INPUT_ENABLE
1884 #ifdef NUMCHAR_OPTION
1885 } else if ((c1 & CLASS_MASK) == CLASS_UTF16){
1888 } else if (c1 > DEL) {
1890 if (!estab_f && !iso8859_f) {
1891 /* not established yet */
1894 } else { /* estab_f==TRUE */
1899 } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
1900 /* SJIS X0201 Case... */
1901 if(iso2022jp_f && x0201_f==NO_X0201) {
1902 (*oconv)(GETA1, GETA2);
1909 } else if (c1==SSO && iconv != s_iconv) {
1910 /* EUC X0201 Case */
1911 c1 = (*i_getc)(f); /* skip SSO */
1913 if (SSP<=c1 && c1<0xe0) {
1914 if(iso2022jp_f && x0201_f==NO_X0201) {
1915 (*oconv)(GETA1, GETA2);
1922 } else { /* bogus code, skip SSO and one byte */
1926 /* already established */
1931 } else if ((c1 > SPACE) && (c1 != DEL)) {
1932 /* in case of Roman characters */
1934 /* output 1 shifted byte */
1938 } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
1939 /* output 1 shifted byte */
1940 if(iso2022jp_f && x0201_f==NO_X0201) {
1941 (*oconv)(GETA1, GETA2);
1948 /* look like bogus code */
1951 } else if (input_mode == X0208) {
1952 /* in case of Kanji shifted */
1955 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
1956 /* Check MIME code */
1957 if ((c1 = (*i_getc)(f)) == EOF) {
1960 } else if (c1 == '?') {
1961 /* =? is mime conversion start sequence */
1962 if(mime_f == STRICT_MIME) {
1963 /* check in real detail */
1964 if (mime_begin_strict(f) == EOF)
1968 } else if (mime_begin(f) == EOF)
1978 /* normal ASCII code */
1981 } else if (c1 == SI) {
1984 } else if (c1 == SO) {
1987 } else if (c1 == ESC ) {
1988 if ((c1 = (*i_getc)(f)) == EOF) {
1989 /* (*oconv)(0, ESC); don't send bogus code */
1991 } else if (c1 == '$') {
1992 if ((c1 = (*i_getc)(f)) == EOF) {
1994 (*oconv)(0, ESC); don't send bogus code
1995 (*oconv)(0, '$'); */
1997 } else if (c1 == '@'|| c1 == 'B') {
1998 /* This is kanji introduction */
2001 set_input_codename("ISO-2022-JP");
2002 debug(input_codename);
2004 } else if (c1 == '(') {
2005 if ((c1 = (*i_getc)(f)) == EOF) {
2006 /* don't send bogus code
2012 } else if (c1 == '@'|| c1 == 'B') {
2013 /* This is kanji introduction */
2018 /* could be some special code */
2025 } else if (broken_f&0x2) {
2026 /* accept any ESC-(-x as broken code ... */
2036 } else if (c1 == '(') {
2037 if ((c1 = (*i_getc)(f)) == EOF) {
2038 /* don't send bogus code
2040 (*oconv)(0, '('); */
2044 /* This is X0201 kana introduction */
2045 input_mode = X0201; shift_mode = X0201;
2047 } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2048 /* This is X0208 kanji introduction */
2049 input_mode = ASCII; shift_mode = FALSE;
2051 } else if (broken_f&0x2) {
2052 input_mode = ASCII; shift_mode = FALSE;
2057 /* maintain various input_mode here */
2061 } else if ( c1 == 'N' || c1 == 'n' ){
2063 c3 = (*i_getc)(f); /* skip SS2 */
2064 if ( (SPACE<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2079 } else if ((c1 == NL || c1 == CR) && broken_f&4) {
2080 input_mode = ASCII; set_iconv(FALSE, 0);
2083 } else if (c1 == NL && mime_f && !mime_decode_mode ) {
2084 if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2092 } else if (c1 == CR && mime_f && !mime_decode_mode ) {
2093 if ((c1=(*i_getc)(f))!=EOF) {
2097 } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2114 if (input_mode == X0208)
2115 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2116 else if (input_mode)
2117 (*oconv)(input_mode, c1); /* other special case */
2118 else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */
2119 int c0 = (*i_getc)(f);
2122 (*iconv)(c2, c1, c0);
2128 /* goto next_word */
2132 (*iconv)(EOF, 0, 0);
2145 /** it must NOT be in the kanji shifte sequence */
2146 /** it must NOT be written in JIS7 */
2147 /** and it must be after 2 byte 8bit code */
2154 while ((c1 = (*i_getc)(f)) != EOF) {
2160 if (push_hold_buf(c1) == EOF || estab_f){
2166 struct input_code *p = input_code_list;
2167 struct input_code *result = p;
2172 if (p->score < result->score){
2177 set_iconv(FALSE, result->iconv_func);
2182 ** 1) EOF is detected, or
2183 ** 2) Code is established, or
2184 ** 3) Buffer is FULL (but last word is pushed)
2186 ** in 1) and 3) cases, we continue to use
2187 ** Kanji codes by oconv and leave estab_f unchanged.
2192 while (wc < hold_count){
2193 c2 = hold_buf[wc++];
2195 #ifdef NUMCHAR_OPTION
2196 || (c2 & CLASS_MASK) == CLASS_UTF16
2201 }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
2202 (*iconv)(X0201, c2, 0);
2205 if (wc < hold_count){
2206 c1 = hold_buf[wc++];
2215 if ((*iconv)(c2, c1, 0) < 0){
2217 if (wc < hold_count){
2218 c0 = hold_buf[wc++];
2227 (*iconv)(c2, c1, c0);
2240 if (hold_count >= HOLD_SIZE*2)
2242 hold_buf[hold_count++] = c2;
2243 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
2246 int s2e_conv(c2, c1, p2, p1)
2250 #ifdef SHIFTJIS_CP932
2251 if (cp932_f && CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){
2252 extern unsigned short shiftjis_cp932[3][189];
2253 c1 = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
2254 if (c1 == 0) return 1;
2258 #endif /* SHIFTJIS_CP932 */
2259 c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
2261 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
2278 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2281 int ret = s2e_conv(c2, c1, &c2, &c1);
2282 if (ret) return ret;
2295 } else if (c2 == SSO){
2298 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2308 #ifdef UTF8_INPUT_ENABLE
2310 w2e_conv(c2, c1, c0, p2, p1)
2314 extern unsigned short * utf8_to_euc_2bytes[];
2315 extern unsigned short ** utf8_to_euc_3bytes[];
2318 if (0xc0 <= c2 && c2 <= 0xef) {
2319 unsigned short **pp;
2322 if (c0 == 0) return -1;
2323 pp = utf8_to_euc_3bytes[c2 - 0x80];
2324 ret = w_iconv_common(c1, c0, pp, sizeof_utf8_to_euc_C2, p2, p1);
2326 ret = w_iconv_common(c2, c1, utf8_to_euc_2bytes, sizeof_utf8_to_euc_2bytes, p2, p1);
2328 #ifdef NUMCHAR_OPTION
2331 if (p1) *p1 = CLASS_UTF16 | ww16_conv(c2, c1, c0);
2336 } else if (c2 == X0201) {
2349 int ret = w2e_conv(c2, c1, c0, &c2, &c1);
2357 w16w_conv(val, p2, p1, p0)
2365 }else if (val < 0x800){
2366 *p2 = 0xc0 | (val >> 6);
2367 *p1 = 0x80 | (val & 0x3f);
2370 *p2 = 0xe0 | (val >> 12);
2371 *p1 = 0x80 | ((val >> 6) & 0x3f);
2372 *p0 = 0x80 | (val & 0x3f);
2377 ww16_conv(c2, c1, c0)
2382 val = (c2 & 0x0f) << 12;
2383 val |= (c1 & 0x3f) << 6;
2385 }else if (c2 >= 0xc0){
2386 val = (c2 & 0x1f) << 6;
2387 val |= (c1 & 0x3f) << 6;
2395 w16e_conv(val, p2, p1)
2399 extern unsigned short * utf8_to_euc_2bytes[];
2400 extern unsigned short ** utf8_to_euc_3bytes[];
2402 unsigned short **pp;
2406 w16w_conv(val, &c2, &c1, &c0);
2409 pp = utf8_to_euc_3bytes[c2 - 0x80];
2410 psize = sizeof_utf8_to_euc_C2;
2411 ret = w_iconv_common(c1, c0, pp, psize, p2, p1);
2413 pp = utf8_to_euc_2bytes;
2414 psize = sizeof_utf8_to_euc_2bytes;
2415 ret = w_iconv_common(c2, c1, pp, psize, p2, p1);
2417 #ifdef NUMCHAR_OPTION
2420 *p1 = CLASS_UTF16 | val;
2429 w_iconv16(c2, c1, c0)
2434 if (c2==0376 && c1==0377){
2435 utf16_mode = UTF16LE_INPUT;
2437 } else if (c2==0377 && c1==0376){
2438 utf16_mode = UTF16BE_INPUT;
2441 if (c2 != EOF && utf16_mode == UTF16BE_INPUT) {
2443 tmp=c1; c1=c2; c2=tmp;
2445 if ((c2==0 && c1 < 0x80) || c2==EOF) {
2449 ret = w16e_conv(((c2<<8)&0xff00) + c1, &c2, &c1);
2450 if (ret) return ret;
2456 w_iconv_common(c1, c0, pp, psize, p2, p1)
2458 unsigned short **pp;
2466 if (pp == 0) return 1;
2469 if (c1 < 0 || psize <= c1) return 1;
2471 if (p == 0) return 1;
2474 if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1;
2476 if (val == 0) return 1;
2479 if (c2 == SO) c2 = X0201;
2488 #ifdef UTF8_OUTPUT_ENABLE
2493 extern unsigned short euc_to_utf8_1byte[];
2494 extern unsigned short * euc_to_utf8_2bytes[];
2495 extern unsigned short * euc_to_utf8_2bytes_ms[];
2499 p = euc_to_utf8_1byte;
2502 c2 = (c2&0x7f) - 0x21;
2503 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2504 p = ms_ucs_map_f ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
2509 c1 = (c1 & 0x7f) - 0x21;
2510 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
2521 #ifdef NUMCHAR_OPTION
2522 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2523 w16w_conv(c1, &c2, &c1, &c0);
2527 if (c0) (*o_putc)(c0);
2536 if (unicode_bom_f==2) {
2544 output_mode = ASCII;
2546 } else if (c2 == ISO8859_1) {
2547 output_mode = ISO8859_1;
2548 (*o_putc)(c1 | 0x080);
2551 w16w_conv((unsigned short)e2w_conv(c2, c1), &c2, &c1, &c0);
2555 if (c0) (*o_putc)(c0);
2570 if (unicode_bom_f==2) {
2572 (*o_putc)((unsigned char)'\377');
2576 (*o_putc)((unsigned char)'\377');
2581 if (c2 == ISO8859_1) {
2584 #ifdef NUMCHAR_OPTION
2585 } else if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16) {
2586 c2 = (c1 >> 8) & 0xff;
2590 unsigned short val = (unsigned short)e2w_conv(c2, c1);
2591 c2 = (val >> 8) & 0xff;
2610 #ifdef NUMCHAR_OPTION
2611 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2612 w16e_conv(c1, &c2, &c1);
2618 } else if (c2 == 0) {
2619 output_mode = ASCII;
2621 } else if (c2 == X0201) {
2622 output_mode = JAPANESE_EUC;
2623 (*o_putc)(SSO); (*o_putc)(c1|0x80);
2624 } else if (c2 == ISO8859_1) {
2625 output_mode = ISO8859_1;
2626 (*o_putc)(c1 | 0x080);
2628 if ((c1<0x21 || 0x7e<c1) ||
2629 (c2<0x21 || 0x7e<c2)) {
2630 set_iconv(FALSE, 0);
2631 return; /* too late to rescue this char */
2633 output_mode = JAPANESE_EUC;
2634 (*o_putc)(c2 | 0x080);
2635 (*o_putc)(c1 | 0x080);
2640 e2s_conv(c2, c1, p2, p1)
2641 int c2, c1, *p2, *p1;
2643 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
2644 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
2652 #ifdef NUMCHAR_OPTION
2653 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2654 w16e_conv(c1, &c2, &c1);
2660 } else if (c2 == 0) {
2661 output_mode = ASCII;
2663 } else if (c2 == X0201) {
2664 output_mode = SHIFT_JIS;
2666 } else if (c2 == ISO8859_1) {
2667 output_mode = ISO8859_1;
2668 (*o_putc)(c1 | 0x080);
2670 if ((c1<0x20 || 0x7e<c1) ||
2671 (c2<0x20 || 0x7e<c2)) {
2672 set_iconv(FALSE, 0);
2673 return; /* too late to rescue this char */
2675 output_mode = SHIFT_JIS;
2676 e2s_conv(c2, c1, &c2, &c1);
2678 #ifdef SHIFTJIS_CP932
2680 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2681 extern unsigned short cp932inv[2][189];
2682 int c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
2688 #endif /* SHIFTJIS_CP932 */
2691 if (prefix_table[(unsigned char)c1]){
2692 (*o_putc)(prefix_table[(unsigned char)c1]);
2703 #ifdef NUMCHAR_OPTION
2704 if ((c1 & CLASS_MASK) == CLASS_UTF16){
2705 w16e_conv(c1, &c2, &c1);
2709 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2712 (*o_putc)(ascii_intro);
2713 output_mode = ASCII;
2716 } else if (c2==X0201) {
2717 if (output_mode!=X0201) {
2718 output_mode = X0201;
2724 } else if (c2==ISO8859_1) {
2725 /* iso8859 introduction, or 8th bit on */
2726 /* Can we convert in 7bit form using ESC-'-'-A ?
2728 output_mode = ISO8859_1;
2730 } else if (c2 == 0) {
2731 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2734 (*o_putc)(ascii_intro);
2735 output_mode = ASCII;
2739 if (output_mode != X0208) {
2740 output_mode = X0208;
2743 (*o_putc)(kanji_intro);
2745 if (c1<0x20 || 0x7e<c1)
2747 if (c2<0x20 || 0x7e<c2)
2759 if (base64_count>50 && !mimeout_mode && c2==0 && c1==SPACE) {
2762 } else if (base64_count>66 && mimeout_mode) {
2763 (*o_base64conv)(EOF,0);
2764 (*o_base64conv)(NL,0);
2765 (*o_base64conv)(SPACE,0);
2767 (*o_base64conv)(c2,c1);
2771 static int broken_buf[3];
2772 static int broken_counter = 0;
2773 static int broken_last = 0;
2780 if (broken_counter>0) {
2781 return broken_buf[--broken_counter];
2784 if (c=='$' && broken_last != ESC
2785 && (input_mode==ASCII || input_mode==X0201)) {
2788 if (c1=='@'|| c1=='B') {
2789 broken_buf[0]=c1; broken_buf[1]=c;
2796 } else if (c=='(' && broken_last != ESC
2797 && (input_mode==X0208 || input_mode==X0201)) { /* ) */
2800 if (c1=='J'|| c1=='B') {
2801 broken_buf[0]=c1; broken_buf[1]=c;
2819 if (broken_counter<2)
2820 broken_buf[broken_counter++]=c;
2824 static int prev_cr = 0;
2832 if (! (c2==0&&c1==NL) ) {
2838 } else if (c1=='\r') {
2840 } else if (c1=='\n') {
2841 if (crmode_f==CRLF) {
2842 (*o_crconv)(0,'\r');
2843 } else if (crmode_f==CR) {
2844 (*o_crconv)(0,'\r');
2848 } else if (c1!='\032' || crmode_f!=NL){
2854 Return value of fold_conv()
2856 \n add newline and output char
2857 \r add newline and output nothing
2860 1 (or else) normal output
2862 fold state in prev (previous character)
2864 >0x80 Japanese (X0208/X0201)
2869 This fold algorthm does not preserve heading space in a line.
2870 This is the main difference from fmt.
2873 #define char_size(c2,c1) (c2?2:1)
2882 if (c1== '\r' && !fold_preserve_f) {
2883 fold_state=0; /* ignore cr */
2884 }else if (c1== '\n'&&f_prev=='\r' && fold_preserve_f) {
2886 fold_state=0; /* ignore cr */
2887 } else if (c1== BS) {
2888 if (f_line>0) f_line--;
2890 } else if (c2==EOF && f_line != 0) { /* close open last line */
2892 } else if ((c1=='\n' && !fold_preserve_f)
2893 || ((c1=='\r'||(c1=='\n'&&f_prev!='\r'))
2894 && fold_preserve_f)) {
2896 if (fold_preserve_f) {
2900 } else if ((f_prev == c1 && !fold_preserve_f)
2901 || (f_prev == '\n' && fold_preserve_f)
2902 ) { /* duplicate newline */
2905 fold_state = '\n'; /* output two newline */
2911 if (f_prev&0x80) { /* Japanese? */
2913 fold_state = 0; /* ignore given single newline */
2914 } else if (f_prev==' ') {
2918 if (++f_line<=fold_len)
2922 fold_state = '\r'; /* fold and output nothing */
2926 } else if (c1=='\f') {
2931 fold_state = '\n'; /* output newline and clear */
2932 } else if ( (c2==0 && c1==' ')||
2933 (c2==0 && c1=='\t')||
2934 (c2=='!'&& c1=='!')) {
2935 /* X0208 kankaku or ascii space */
2936 if (f_prev == ' ') {
2937 fold_state = 0; /* remove duplicate spaces */
2940 if (++f_line<=fold_len)
2941 fold_state = ' '; /* output ASCII space only */
2943 f_prev = ' '; f_line = 0;
2944 fold_state = '\r'; /* fold and output nothing */
2948 prev0 = f_prev; /* we still need this one... , but almost done */
2950 if (c2 || c2==X0201)
2951 f_prev |= 0x80; /* this is Japanese */
2952 f_line += char_size(c2,c1);
2953 if (f_line<=fold_len) { /* normal case */
2956 if (f_line>=fold_len+fold_margin) { /* too many kinsou suspension */
2957 f_line = char_size(c2,c1);
2958 fold_state = '\n'; /* We can't wait, do fold now */
2959 } else if (c2==X0201) {
2960 /* simple kinsoku rules return 1 means no folding */
2961 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
2962 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
2963 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
2964 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
2965 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
2966 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
2967 else if (SPACE<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
2969 fold_state = '\n';/* add one new f_line before this character */
2972 fold_state = '\n';/* add one new f_line before this character */
2975 /* kinsoku point in ASCII */
2976 if ( c1==')'|| /* { [ ( */
2987 /* just after special */
2988 } else if (!is_alnum(prev0)) {
2989 f_line = char_size(c2,c1);
2991 } else if ((prev0==' ') || /* ignored new f_line */
2992 (prev0=='\n')|| /* ignored new f_line */
2993 (prev0&0x80)) { /* X0208 - ASCII */
2994 f_line = char_size(c2,c1);
2995 fold_state = '\n';/* add one new f_line before this character */
2997 fold_state = 1; /* default no fold in ASCII */
3001 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
3002 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
3003 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
3004 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
3005 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
3006 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
3007 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
3008 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
3009 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
3010 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
3011 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
3012 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
3013 /* default no fold in kinsoku */
3016 f_line = char_size(c2,c1);
3017 /* add one new f_line before this character */
3020 f_line = char_size(c2,c1);
3022 /* add one new f_line before this character */
3027 /* terminator process */
3028 switch(fold_state) {
3047 int z_prev2=0,z_prev1=0;
3054 /* if (c2) c1 &= 0x7f; assertion */
3056 if (x0201_f && z_prev2==X0201) { /* X0201 */
3057 if (c1==(0xde&0x7f)) { /*
\e$BByE@
\e(B */
3059 (*o_zconv)(dv[(z_prev1-SPACE)*2],dv[(z_prev1-SPACE)*2+1]);
3061 } else if (c1==(0xdf&0x7f)&&ev[(z_prev1-SPACE)*2]) { /*
\e$BH>ByE@
\e(B */
3063 (*o_zconv)(ev[(z_prev1-SPACE)*2],ev[(z_prev1-SPACE)*2+1]);
3067 (*o_zconv)(cv[(z_prev1-SPACE)*2],cv[(z_prev1-SPACE)*2+1]);
3076 if (x0201_f && c2==X0201) {
3077 if (dv[(c1-SPACE)*2]||ev[(c1-SPACE)*2]) {
3078 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
3079 z_prev1 = c1; z_prev2 = c2;
3082 (*o_zconv)(cv[(c1-SPACE)*2],cv[(c1-SPACE)*2+1]);
3087 /* JISX0208 Alphabet */
3088 if (alpha_f && c2 == 0x23 ) {
3090 } else if (alpha_f && c2 == 0x21 ) {
3091 /* JISX0208 Kigou */
3096 } else if (alpha_f&0x4) {
3101 } else if (0x20<c1 && c1<0x7f && fv[c1-0x20]) {
3107 case '>': entity = ">"; break;
3108 case '<': entity = "<"; break;
3109 case '\"': entity = """; break;
3110 case '&': entity = "&"; break;
3113 while (*entity) (*o_zconv)(0, *entity++);
3123 #define rot13(c) ( \
3125 (c <= 'M') ? (c + 13): \
3126 (c <= 'Z') ? (c - 13): \
3128 (c <= 'm') ? (c + 13): \
3129 (c <= 'z') ? (c - 13): \
3133 #define rot47(c) ( \
3135 ( c <= 'O' ) ? (c + 47) : \
3136 ( c <= '~' ) ? (c - 47) : \
3144 if (c2==0 || c2==X0201 || c2==ISO8859_1) {
3150 (*o_rot_conv)(c2,c1);
3157 if ((hira_f & 1) && c2==0x25 && 0x20<c1 && c1<0x74) {
3159 } else if ((hira_f & 2) && c2==0x24 && 0x20<c1 && c1<0x74) {
3162 (*o_hira_conv)(c2,c1);
3167 iso2022jp_check_conv(c2,c1)
3170 static int range[RANGE_NUM_MAX][2] = {
3193 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
3197 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
3202 for (i = 0; i < RANGE_NUM_MAX; i++) {
3203 start = range[i][0];
3206 if (c >= start && c <= end) {
3211 (*o_iso2022jp_check_conv)(c2,c1);
3215 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
3217 unsigned char *mime_pattern[] = {
3218 (unsigned char *)"\075?EUC-JP?B?",
3219 (unsigned char *)"\075?SHIFT_JIS?B?",
3220 (unsigned char *)"\075?ISO-8859-1?Q?",
3221 (unsigned char *)"\075?ISO-8859-1?B?",
3222 (unsigned char *)"\075?ISO-2022-JP?B?",
3223 (unsigned char *)"\075?ISO-2022-JP?Q?",
3224 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3225 (unsigned char *)"\075?UTF-8?B?",
3226 (unsigned char *)"\075?UTF-8?Q?",
3228 (unsigned char *)"\075?US-ASCII?Q?",
3233 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
3234 int (*mime_priority_func[])PROTO((int c2, int c1, int c0)) = {
3235 e_iconv, s_iconv, 0, 0, 0, 0,
3236 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3242 int mime_encode[] = {
3243 JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201,
3244 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3251 int mime_encode_method[] = {
3252 'B', 'B','Q', 'B', 'B', 'Q',
3253 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3261 #define MAXRECOVER 20
3263 /* I don't trust portablity of toupper */
3264 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
3265 #define nkf_isdigit(c) ('0'<=c && c<='9')
3266 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
3271 if (i_getc!=mime_getc) {
3272 i_mgetc = i_getc; i_getc = mime_getc;
3273 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
3274 if(mime_f==STRICT_MIME) {
3275 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
3276 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
3282 unswitch_mime_getc()
3284 if(mime_f==STRICT_MIME) {
3285 i_mgetc = i_mgetc_buf;
3286 i_mungetc = i_mungetc_buf;
3289 i_ungetc = i_mungetc;
3293 mime_begin_strict(f)
3298 unsigned char *p,*q;
3299 int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
3301 mime_decode_mode = FALSE;
3302 /* =? has been checked */
3304 p = mime_pattern[j];
3307 for(i=2;p[i]>' ';i++) { /* start at =? */
3308 if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) {
3309 /* pattern fails, try next one */
3311 while ((p = mime_pattern[++j])) {
3312 for(k=2;k<i;k++) /* assume length(p) > i */
3313 if (p[k]!=q[k]) break;
3314 if (k==i && nkf_toupper(c1)==p[k]) break;
3316 if (p) continue; /* found next one, continue */
3317 /* all fails, output from recovery buffer */
3325 mime_decode_mode = p[i-2];
3327 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
3329 if (mime_decode_mode=='B') {
3330 mimebuf_f = unbuf_f;
3332 /* do MIME integrity check */
3333 return mime_integrity(f,mime_pattern[j]);
3345 /* we don't keep eof of Fifo, becase it contains ?= as
3346 a terminator. It was checked in mime_integrity. */
3347 return ((mimebuf_f)?
3348 (*i_mgetc_buf)(f):Fifo(mime_input++));
3352 mime_ungetc_buf(c,f)
3357 (*i_mungetc_buf)(c,f);
3359 Fifo(--mime_input)=c;
3370 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
3371 /* re-read and convert again from mime_buffer. */
3373 /* =? has been checked */
3375 Fifo(mime_last++)='='; Fifo(mime_last++)='?';
3376 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
3377 /* We accept any character type even if it is breaked by new lines */
3378 c1 = (*i_getc)(f); Fifo(mime_last++)= c1 ;
3379 if (c1=='\n'||c1==' '||c1=='\r'||
3380 c1=='-'||c1=='_'||is_alnum(c1) ) continue;
3382 /* Failed. But this could be another MIME preemble */
3390 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3391 if (!(++i<MAXRECOVER) || c1==EOF) break;
3392 if (c1=='b'||c1=='B') {
3393 mime_decode_mode = 'B';
3394 } else if (c1=='q'||c1=='Q') {
3395 mime_decode_mode = 'Q';
3399 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3400 if (!(++i<MAXRECOVER) || c1==EOF) break;
3402 mime_decode_mode = FALSE;
3408 if (!mime_decode_mode) {
3409 /* false MIME premble, restart from mime_buffer */
3410 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
3411 /* Since we are in MIME mode until buffer becomes empty, */
3412 /* we never go into mime_begin again for a while. */
3415 /* discard mime preemble, and goto MIME mode */
3417 /* do no MIME integrity check */
3418 return c1; /* used only for checking EOF */
3433 fprintf(stderr, "%s\n", str);
3439 set_input_codename (codename)
3444 strcmp(codename, "") != 0 &&
3445 strcmp(codename, input_codename) != 0)
3447 is_inputcode_mixed = TRUE;
3449 input_codename = codename;
3450 is_inputcode_set = TRUE;
3454 print_guessed_code (filename)
3457 char *codename = "BINARY";
3458 if (!is_inputcode_mixed) {
3459 if (strcmp(input_codename, "") == 0) {
3462 codename = input_codename;
3465 if (filename != NULL) printf("%s:", filename);
3466 printf("%s\n", codename);
3473 if (nkf_isdigit(x)) return x - '0';
3474 return nkf_toupper(x) - 'A' + 10;
3479 #ifdef ANSI_C_PROTOTYPE
3480 int hex_getc(int ch, FILE *f, int (*g)(FILE *f), int (*u)(int c, FILE *f))
3483 hex_getc(ch, f, g, u)
3496 if (!nkf_isxdigit(c2)){
3501 if (!nkf_isxdigit(c3)){
3506 return (hex2bin(c2) << 4) | hex2bin(c3);
3513 return hex_getc(':', f, i_cgetc, i_cungetc);
3521 return (*i_cungetc)(c, f);
3528 return hex_getc('%', f, i_ugetc, i_uungetc);
3536 return (*i_uungetc)(c, f);
3540 #ifdef NUMCHAR_OPTION
3545 int (*g)() = i_ngetc;
3546 int (*u)() = i_nungetc;
3557 if (buf[i] == 'x' || buf[i] == 'X'){
3558 for (j = 0; j < 5; j++){
3560 if (!nkf_isxdigit(buf[i])){
3567 c |= hex2bin(buf[i]);
3570 for (j = 0; j < 6; j++){
3574 if (!nkf_isdigit(buf[i])){
3581 c += hex2bin(buf[i]);
3587 return CLASS_UTF16 | c;
3597 numchar_ungetc(c, f)
3601 return (*i_nungetc)(c, f);
3610 int c1, c2, c3, c4, cc;
3611 int t1, t2, t3, t4, mode, exit_mode;
3615 int lwsp_size = 128;
3617 if (mime_top != mime_last) { /* Something is in FIFO */
3618 return Fifo(mime_top++);
3620 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
3621 mime_decode_mode=FALSE;
3622 unswitch_mime_getc();
3623 return (*i_getc)(f);
3626 if (mimebuf_f == FIXED_MIME)
3627 exit_mode = mime_decode_mode;
3630 if (mime_decode_mode == 'Q') {
3631 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3633 if (c1=='_') return ' ';
3634 if (c1!='=' && c1!='?') {
3638 mime_decode_mode = exit_mode; /* prepare for quit */
3639 if (c1<=' ') return c1;
3640 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
3641 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
3642 /* end Q encoding */
3643 input_mode = exit_mode;
3645 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
3646 if (lwsp_buf==NULL) {
3647 perror("can't malloc");
3650 while ((c1=(*i_getc)(f))!=EOF) {
3655 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
3663 if ((c1=(*i_getc)(f))!=EOF && c1 == NL) {
3664 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
3679 lwsp_buf[lwsp_count] = c1;
3680 if (lwsp_count++>lwsp_size){
3682 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
3683 if (lwsp_buf_new==NULL) {
3686 perror("can't realloc");
3689 lwsp_buf = lwsp_buf_new;
3695 if (lwsp_count > 0) {
3696 if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) {
3700 for(lwsp_count--;lwsp_count>0;lwsp_count--)
3701 i_ungetc(lwsp_buf[lwsp_count],f);
3709 if (c1=='='&&c2<' ') { /* this is soft wrap */
3710 while((c1 = (*i_mgetc)(f)) <=' ') {
3711 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3713 mime_decode_mode = 'Q'; /* still in MIME */
3714 goto restart_mime_q;
3717 mime_decode_mode = 'Q'; /* still in MIME */
3721 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
3722 if (c2<=' ') return c2;
3723 mime_decode_mode = 'Q'; /* still in MIME */
3724 #define hex(c) (('0'<=c&&c<='9')?(c-'0'):\
3725 ('A'<=c&&c<='F')?(c-'A'+10):('a'<=c&&c<='f')?(c-'a'+10):0)
3726 return ((hex(c2)<<4) + hex(c3));
3729 if (mime_decode_mode != 'B') {
3730 mime_decode_mode = FALSE;
3731 return (*i_mgetc)(f);
3735 /* Base64 encoding */
3737 MIME allows line break in the middle of
3738 Base64, but we are very pessimistic in decoding
3739 in unbuf mode because MIME encoded code may broken by
3740 less or editor's control sequence (such as ESC-[-K in unbuffered
3741 mode. ignore incomplete MIME.
3743 mode = mime_decode_mode;
3744 mime_decode_mode = exit_mode; /* prepare for quit */
3746 while ((c1 = (*i_mgetc)(f))<=' ') {
3751 if ((c2 = (*i_mgetc)(f))<=' ') {
3754 if (mime_f != STRICT_MIME) goto mime_c2_retry;
3755 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3758 if ((c1 == '?') && (c2 == '=')) {
3761 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
3762 if (lwsp_buf==NULL) {
3763 perror("can't malloc");
3766 while ((c1=(*i_getc)(f))!=EOF) {
3771 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
3779 if ((c1=(*i_getc)(f))!=EOF) {
3783 } else if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
3798 lwsp_buf[lwsp_count] = c1;
3799 if (lwsp_count++>lwsp_size){
3801 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
3802 if (lwsp_buf_new==NULL) {
3805 perror("can't realloc");
3808 lwsp_buf = lwsp_buf_new;
3814 if (lwsp_count > 0) {
3815 if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) {
3819 for(lwsp_count--;lwsp_count>0;lwsp_count--)
3820 i_ungetc(lwsp_buf[lwsp_count],f);
3829 if ((c3 = (*i_mgetc)(f))<=' ') {
3832 if (mime_f != STRICT_MIME) goto mime_c3_retry;
3833 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3837 if ((c4 = (*i_mgetc)(f))<=' ') {
3840 if (mime_f != STRICT_MIME) goto mime_c4_retry;
3841 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3845 mime_decode_mode = mode; /* still in MIME sigh... */
3847 /* BASE 64 decoding */
3849 t1 = 0x3f & base64decode(c1);
3850 t2 = 0x3f & base64decode(c2);
3851 t3 = 0x3f & base64decode(c3);
3852 t4 = 0x3f & base64decode(c4);
3853 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
3855 Fifo(mime_last++) = cc;
3856 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
3858 Fifo(mime_last++) = cc;
3859 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
3861 Fifo(mime_last++) = cc;
3866 return Fifo(mime_top++);
3874 Fifo(--mime_top) = c;
3885 /* In buffered mode, read until =? or NL or buffer full
3887 mime_input = mime_top;
3888 mime_last = mime_top;
3889 while(*p) Fifo(mime_input++) = *p++;
3892 while((c=(*i_getc)(f))!=EOF) {
3893 if (((mime_input-mime_top)&MIME_BUF_MASK)==0) {
3894 break; /* buffer full */
3896 if (c=='=' && d=='?') {
3897 /* checked. skip header, start decode */
3898 Fifo(mime_input++) = c;
3899 /* mime_last_input = mime_input; */
3904 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
3906 /* Should we check length mod 4? */
3907 Fifo(mime_input++) = c;
3910 /* In case of Incomplete MIME, no MIME decode */
3911 Fifo(mime_input++) = c;
3912 mime_last = mime_input; /* point undecoded buffer */
3913 mime_decode_mode = 1; /* no decode on Fifo last in mime_getc */
3914 switch_mime_getc(); /* anyway we need buffered getc */
3925 i = c - 'A'; /* A..Z 0-25 */
3927 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
3929 } else if (c > '/') {
3930 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
3931 } else if (c == '+') {
3932 i = '>' /* 62 */ ; /* + 62 */
3934 i = '?' /* 63 */ ; /* / 63 */
3939 static char basis_64[] =
3940 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
3943 #define MIMEOUT_BUF_LENGTH (60)
3944 char mimeout_buf[MIMEOUT_BUF_LENGTH+1];
3945 int mimeout_buf_count = 0;
3946 int mimeout_preserve_space = 0;
3947 #define itoh4(c) (c>=10?c+'A'-10:c+'0')
3956 p = mime_pattern[0];
3957 for(i=0;mime_encode[i];i++) {
3958 if (mode == mime_encode[i]) {
3959 p = mime_pattern[i];
3963 mimeout_mode = mime_encode_method[i];
3966 if (base64_count>45) {
3970 if (!mimeout_preserve_space && mimeout_buf_count>0
3971 && (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
3972 || mimeout_buf[i]==CR || mimeout_buf[i]==NL )) {
3976 if (!mimeout_preserve_space) {
3977 for (;i<mimeout_buf_count;i++) {
3978 if (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
3979 || mimeout_buf[i]==CR || mimeout_buf[i]==NL ) {
3980 (*o_mputc)(mimeout_buf[i]);
3987 mimeout_preserve_space = FALSE;
3993 j = mimeout_buf_count;
3994 mimeout_buf_count = 0;
3996 mime_putc(mimeout_buf[i]);
4012 switch(mimeout_mode) {
4017 (*o_mputc)(basis_64[((b64c & 0x3)<< 4)]);
4023 (*o_mputc)(basis_64[((b64c & 0xF) << 2)]);
4029 if (mimeout_f!=FIXED_MIME) {
4031 } else if (mimeout_mode != 'Q')
4040 switch(mimeout_mode) {
4044 (*o_mputc)(itoh4(((c>>4)&0xf)));
4045 (*o_mputc)(itoh4((c&0xf)));
4054 (*o_mputc)(basis_64[c>>2]);
4059 (*o_mputc)(basis_64[((b64c & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
4065 (*o_mputc)(basis_64[((b64c & 0xF) << 2) | ((c & 0xC0) >>6)]);
4066 (*o_mputc)(basis_64[c & 0x3F]);
4080 if (mimeout_f==FIXED_MIME && base64_count>50) {
4084 } else if (c==CR||c==NL) {
4087 if (c!=EOF && mimeout_f!=FIXED_MIME) {
4088 if ( c<=DEL &&(output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
4089 if (mimeout_mode=='Q') {
4098 } else if (mimeout_mode) {
4099 if (base64_count>63) {
4104 mimeout_preserve_space = TRUE;
4106 if (c==SPACE || c==TAB || c==CR || c==NL) {
4107 for (i=0;i<mimeout_buf_count;i++) {
4108 if (SPACE<mimeout_buf[i] && mimeout_buf[i]<DEL) {
4110 for (i=0;i<mimeout_buf_count;i++) {
4111 (*o_mputc)(mimeout_buf[i]);
4114 mimeout_buf_count = 0;
4117 mimeout_buf[mimeout_buf_count++] = c;
4118 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
4121 for (i=0;i<mimeout_buf_count;i++) {
4122 (*o_mputc)(mimeout_buf[i]);
4128 if (mimeout_buf_count>0 && SPACE<c) {
4129 mimeout_buf[mimeout_buf_count++] = c;
4130 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
4135 } else if (!mimeout_mode) {
4136 if (c==SPACE || c==TAB || c==CR || c==NL) {
4137 if ((c==CR || c==NL)
4138 &&(mimeout_buf[mimeout_buf_count-1]==SPACE
4139 || mimeout_buf[mimeout_buf_count-1]==TAB)) {
4140 mimeout_buf_count--;
4142 for (i=0;i<mimeout_buf_count;i++) {
4143 (*o_mputc)(mimeout_buf[i]);
4146 mimeout_buf_count = 0;
4148 mimeout_buf[mimeout_buf_count++] = c;
4149 if (mimeout_buf_count>75) {
4150 open_mime(output_mode);
4154 } else if (!mimeout_mode) {
4155 if (mimeout_buf_count>0 && mimeout_buf[mimeout_buf_count-1]==SPACE) {
4156 for (i=0;i<mimeout_buf_count-1;i++) {
4157 (*o_mputc)(mimeout_buf[i]);
4160 mimeout_buf[0] = SPACE;
4161 mimeout_buf_count = 1;
4163 open_mime(output_mode);
4165 } else { /* c==EOF */
4166 j = mimeout_buf_count;
4169 if (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
4170 || mimeout_buf[i]==CR || mimeout_buf[i]==NL)
4172 (*mime_putc)(mimeout_buf[i]);
4176 (*o_mputc)(mimeout_buf[i]);
4182 if (mimeout_buf_count>0) {
4183 j = mimeout_buf_count;
4184 mimeout_buf_count = 0;
4186 mimeout_addchar(mimeout_buf[i]);
4198 struct input_code *p = input_code_list;
4211 mime_f = STRICT_MIME;
4216 #if defined(MSDOS) || defined(__OS2__)
4221 iso2022jp_f = FALSE;
4222 #ifdef UTF8_OUTPUT_ENABLE
4225 ms_ucs_map_f = FALSE;
4237 is_inputcode_mixed = FALSE;
4238 is_inputcode_set = FALSE;
4242 #ifdef SHIFTJIS_CP932
4248 for (i = 0; i < 256; i++){
4249 prefix_table[i] = 0;
4252 #ifdef UTF8_INPUT_ENABLE
4253 utf16_mode = UTF16LE_INPUT;
4255 mimeout_buf_count = 0;
4260 fold_preserve_f = FALSE;
4263 kanji_intro = DEFAULT_J;
4264 ascii_intro = DEFAULT_R;
4265 fold_margin = FOLD_MARGIN;
4266 output_conv = DEFAULT_CONV;
4267 oconv = DEFAULT_CONV;
4268 o_zconv = no_connection;
4269 o_fconv = no_connection;
4270 o_crconv = no_connection;
4271 o_rot_conv = no_connection;
4272 o_hira_conv = no_connection;
4273 o_base64conv = no_connection;
4274 o_iso2022jp_check_conv = no_connection;
4277 i_ungetc = std_ungetc;
4279 i_bungetc = std_ungetc;
4282 i_mungetc = std_ungetc;
4283 i_mgetc_buf = std_getc;
4284 i_mungetc_buf = std_ungetc;
4285 output_mode = ASCII;
4288 mime_decode_mode = FALSE;
4294 z_prev2=0,z_prev1=0;
4300 no_connection(c2,c1)
4303 no_connection2(c2,c1,0);
4307 no_connection2(c2,c1,c0)
4310 fprintf(stderr,"nkf internal module connection failure.\n");
4318 fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
4319 fprintf(stderr,"Flags:\n");
4320 fprintf(stderr,"b,u Output is buffered (DEFAULT),Output is unbuffered\n");
4321 #ifdef DEFAULT_CODE_SJIS
4322 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC), UTF-8\n");
4324 #ifdef DEFAULT_CODE_JIS
4325 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC), UTF-8\n");
4327 #ifdef DEFAULT_CODE_EUC
4328 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT), UTF-8\n");
4330 #ifdef DEFAULT_CODE_UTF8
4331 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC), UTF-8 (DEFAULT)\n");
4333 #ifdef UTF8_OUTPUT_ENABLE
4334 fprintf(stderr," After 'w' you can add more options. (80?|16((B|L)0?)?) \n");
4336 fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC), UTF-8\n");
4337 #ifdef UTF8_INPUT_ENABLE
4338 fprintf(stderr," After 'W' you can add more options. (8|16(B|L)?) \n");
4340 fprintf(stderr,"t no conversion\n");
4341 fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n");
4342 fprintf(stderr,"r {de/en}crypt ROT13/47\n");
4343 fprintf(stderr,"h 1 hirakana->katakana, 2 katakana->hirakana,3 both\n");
4344 fprintf(stderr,"v Show this usage. V: show version\n");
4345 fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
4346 fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
4347 fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
4348 fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
4349 fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces,\n");
4350 fprintf(stderr," 3: Convert HTML Entity\n");
4351 fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
4352 fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
4354 fprintf(stderr,"T Text mode output\n");
4356 fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
4357 fprintf(stderr,"d,c Delete \\r in line feed and \\032, Add \\r in line feed\n");
4358 fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n");
4359 fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
4360 fprintf(stderr,"long name options\n");
4361 fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n");
4362 fprintf(stderr," --jis,--euc,--sjis,--utf8,--utf16,--mime,--base64 convert for the code\n");
4363 fprintf(stderr," --hiragana, --katakana Hiragana/Katakana Conversion\n");
4364 fprintf(stderr," --cp932, --no-cp932 CP932 compatible\n");
4366 fprintf(stderr," --cap-input, --url-input Convert hex after ':' or '%%'\n");
4368 #ifdef NUMCHAR_OPTION
4369 fprintf(stderr," --numchar-input Convert Unicode Character Reference\n");
4371 #ifdef UTF8_OUTPUT_ENABLE
4372 fprintf(stderr," --ms-ucs-map Microsoft UCS Mapping Compatible\n");
4375 fprintf(stderr," --overwrite Overwrite original listed files by filtered result\n");
4377 fprintf(stderr," -g, --guess Guess the input code\n");
4378 fprintf(stderr," --help,--version\n");
4385 fprintf(stderr,"Network Kanji Filter Version %s (%s) "
4386 #if defined(MSDOS) && !defined(__WIN32__) && !defined(__WIN16__)
4389 #if defined(MSDOS) && defined(__WIN16__)
4392 #if defined(MSDOS) && defined(__WIN32__)
4398 ,NKF_VERSION,NKF_RELEASE_DATE);
4399 fprintf(stderr,"\n%s\n",CopyRight);
4404 **
\e$B%Q%C%A@):n<T
\e(B
4405 ** void@merope.pleiades.or.jp (Kusakabe Youichi)
4406 ** NIDE Naoyuki <nide@ics.nara-wu.ac.jp>
4407 ** ohta@src.ricoh.co.jp (Junn Ohta)
4408 ** inouet@strl.nhk.or.jp (Tomoyuki Inoue)
4409 ** kiri@pulser.win.or.jp (Tetsuaki Kiriyama)
4410 ** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp>
4411 ** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe)
4412 ** kono@ie.u-ryukyu.ac.jp (Shinji Kono)
4413 ** GHG00637@nifty-serve.or.jp (COW)