1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
5 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
6 ** Copyright (C) 1996,1998
8 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
9 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
10 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
11 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
13 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
14 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
15 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
16 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
17 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
18 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
20 ** Everyone is permitted to do anything on this program
21 ** including copying, modifying, improving,
22 ** as long as you don't try to pretend that you wrote it.
23 ** i.e., the above copyright notice has to appear in all copies.
24 ** Binary distribution requires original version messages.
25 ** You don't have to ask before copying, redistribution or publishing.
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
29 /***********************************************************************
30 ** UTF-8
\e$B%5%]!<%H$K$D$$$F
\e(B
31 **
\e$B=>Mh$N
\e(B nkf
\e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9
\e(B
32 ** nkf -e
\e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G
\e(B UTF-8
\e$B$HH=Dj$5$l$l$P!"
\e(B
33 **
\e$B$=$N$^$^
\e(B euc-jp
\e$B$KJQ49$5$l$^$9
\e(B
35 **
\e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#
\e(B
36 ** (
\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O
\e(B)
38 **
\e$B2?$+LdBj$r8+$D$1$?$i!"
\e(B
39 ** E-Mail: furukawa@tcp-ip.or.jp
40 **
\e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#
\e(B
41 ***********************************************************************/
42 /* $Id: nkf.c,v 1.66 2005/03/07 16:16:50 naruse Exp $ */
43 #define NKF_VERSION "2.0.4"
44 #define NKF_RELEASE_DATE "2005-03-05"
47 static char *CopyRight =
48 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2004 Kono, Furukawa";
55 ** USAGE: nkf [flags] [file]
58 ** b Output is buffered (DEFAULT)
59 ** u Output is unbuffered
63 ** j Outout code is JIS 7 bit (DEFAULT SELECT)
64 ** s Output code is MS Kanji (DEFAULT SELECT)
65 ** e Output code is AT&T JIS (DEFAULT SELECT)
66 ** w Output code is AT&T JIS (DEFAULT SELECT)
67 ** l Output code is JIS 7bit and ISO8859-1 Latin-1
69 ** m MIME conversion for ISO-2022-JP
70 ** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
71 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
72 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
73 ** M MIME output conversion
75 ** r {de/en}crypt ROT13/47
79 ** T Text mode output (for MS-DOS)
81 ** x Do not convert X0201 kana into X0208
82 ** Z Convert X0208 alphabet to ASCII
87 ** B try to fix broken JIS, missing Escape
88 ** B[1-9] broken level
90 ** O Output to 'nkf.out' file or last file name
91 ** d Delete \r in line feed
92 ** c Add \r in line feed
93 ** -- other long option
94 ** -- ignore following option (don't use with -O )
98 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__)) && !defined(MSDOS)
100 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
115 #if defined(MSDOS) || defined(__OS2__)
122 #define setbinmode(fp) fsetbin(fp)
123 #else /* Microsoft C, Turbo C */
124 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
126 #else /* UNIX,OS/2 */
127 #define setbinmode(fp)
130 #ifdef _IOFBF /* SysV and MSDOS, Windows */
131 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
133 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
136 /*Borland C++ 4.5 EasyWin*/
137 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
146 /* added by satoru@isoternet.org */
148 #include <sys/stat.h>
149 #ifndef MSDOS /* UNIX, OS/2 */
152 #else /* defined(MSDOS) */
154 #ifdef __BORLANDC__ /* BCC32 */
156 #else /* !defined(__BORLANDC__) */
157 #include <sys/utime.h>
158 #endif /* (__BORLANDC__) */
159 #else /* !defined(__WIN32__) */
160 #if defined(_MSC_VER) || defined(__MINGW32__) /* VC++, MinGW */
161 #include <sys/utime.h>
162 #elif defined(__TURBOC__) /* BCC */
164 #elif defined(LSI_C) /* LSI C */
165 #endif /* (__WIN32__) */
177 /* state of output_mode and input_mode
195 /* Input Assumption */
199 #define LATIN1_INPUT 6
201 #define STRICT_MIME 8
206 #define JAPANESE_EUC 10
210 #define UTF8_INPUT 13
211 #define UTF16LE_INPUT 14
212 #define UTF16BE_INPUT 15
232 #define is_alnum(c) \
233 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
235 #define HOLD_SIZE 1024
236 #define IOBUF_SIZE 16384
238 #define DEFAULT_J 'B'
239 #define DEFAULT_R 'B'
241 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
242 #define SJ6394 0x0161 /* 63 - 94 ku offset */
244 #define RANGE_NUM_MAX 18
249 #if defined( UTF8_OUTPUT_ENABLE ) || defined( UTF8_INPUT_ENABLE )
250 #define sizeof_euc_utf8 94
251 #define sizeof_euc_to_utf8_1byte 94
252 #define sizeof_euc_to_utf8_2bytes 94
253 #define sizeof_utf8_to_euc_C2 64
254 #define sizeof_utf8_to_euc_E5B8 64
255 #define sizeof_utf8_to_euc_2bytes 112
256 #define sizeof_utf8_to_euc_3bytes 112
259 /* MIME preprocessor */
262 #ifdef EASYWIN /*Easy Win */
263 extern POINT _BufferSize;
266 /* function prototype */
268 #ifdef ANSI_C_PROTOTYPE
270 #define STATIC static
282 void (*status_func)PROTO((struct input_code *, int));
283 int (*iconv_func)PROTO((int c2, int c1, int c0));
287 STATIC char *input_codename = "";
289 STATIC int noconvert PROTO((FILE *f));
290 STATIC int kanji_convert PROTO((FILE *f));
291 STATIC int h_conv PROTO((FILE *f,int c2,int c1));
292 STATIC int push_hold_buf PROTO((int c2));
293 STATIC void set_iconv PROTO((int f, int (*iconv_func)(int c2,int c1,int c0)));
294 STATIC int s_iconv PROTO((int c2,int c1,int c0));
295 STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
296 STATIC int e_iconv PROTO((int c2,int c1,int c0));
297 #ifdef UTF8_INPUT_ENABLE
298 STATIC int w2e_conv PROTO((int c2,int c1,int c0,int *p2,int *p1));
299 STATIC int w_iconv PROTO((int c2,int c1,int c0));
300 STATIC int w_iconv16 PROTO((int c2,int c1,int c0));
301 STATIC int w_iconv_common PROTO((int c1,int c0,unsigned short **pp,int psize,int *p2,int *p1));
302 STATIC int ww16_conv PROTO((int c2, int c1, int c0));
304 #ifdef UTF8_OUTPUT_ENABLE
305 STATIC int e2w_conv PROTO((int c2,int c1));
306 STATIC void w_oconv PROTO((int c2,int c1));
307 STATIC void w_oconv16 PROTO((int c2,int c1));
309 STATIC void e_oconv PROTO((int c2,int c1));
310 STATIC int e2s_conv PROTO((int c2, int c1, int *p2, int *p1));
311 STATIC void s_oconv PROTO((int c2,int c1));
312 STATIC void j_oconv PROTO((int c2,int c1));
313 STATIC void fold_conv PROTO((int c2,int c1));
314 STATIC void cr_conv PROTO((int c2,int c1));
315 STATIC void z_conv PROTO((int c2,int c1));
316 STATIC void rot_conv PROTO((int c2,int c1));
317 STATIC void hira_conv PROTO((int c2,int c1));
318 STATIC void base64_conv PROTO((int c2,int c1));
319 STATIC void iso2022jp_check_conv PROTO((int c2,int c1));
320 STATIC void no_connection PROTO((int c2,int c1));
321 STATIC int no_connection2 PROTO((int c2,int c1,int c0));
323 STATIC void code_score PROTO((struct input_code *ptr));
324 STATIC void code_status PROTO((int c));
326 STATIC void std_putc PROTO((int c));
327 STATIC int std_getc PROTO((FILE *f));
328 STATIC int std_ungetc PROTO((int c,FILE *f));
330 STATIC int broken_getc PROTO((FILE *f));
331 STATIC int broken_ungetc PROTO((int c,FILE *f));
333 STATIC int mime_begin PROTO((FILE *f));
334 STATIC int mime_getc PROTO((FILE *f));
335 STATIC int mime_ungetc PROTO((int c,FILE *f));
337 STATIC int mime_begin_strict PROTO((FILE *f));
338 STATIC int mime_getc_buf PROTO((FILE *f));
339 STATIC int mime_ungetc_buf PROTO((int c,FILE *f));
340 STATIC int mime_integrity PROTO((FILE *f,unsigned char *p));
342 STATIC int base64decode PROTO((int c));
343 STATIC void mime_prechar PROTO((int c2, int c1));
344 STATIC void mime_putc PROTO((int c));
345 STATIC void open_mime PROTO((int c));
346 STATIC void close_mime PROTO(());
347 STATIC void usage PROTO(());
348 STATIC void version PROTO(());
349 STATIC void options PROTO((unsigned char *c));
350 #if defined(PERL_XS) || defined(WIN32DLL)
351 STATIC void reinit PROTO(());
356 static unsigned char stdibuf[IOBUF_SIZE];
357 static unsigned char stdobuf[IOBUF_SIZE];
358 static unsigned char hold_buf[HOLD_SIZE*2];
359 static int hold_count;
361 /* MIME preprocessor fifo */
363 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
364 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
365 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
366 static unsigned char mime_buf[MIME_BUF_SIZE];
367 static unsigned int mime_top = 0;
368 static unsigned int mime_last = 0; /* decoded */
369 static unsigned int mime_input = 0; /* undecoded */
372 static int unbuf_f = FALSE;
373 static int estab_f = FALSE;
374 static int nop_f = FALSE;
375 static int binmode_f = TRUE; /* binary mode */
376 static int rot_f = FALSE; /* rot14/43 mode */
377 static int hira_f = FALSE; /* hira/kata henkan */
378 static int input_f = FALSE; /* non fixed input code */
379 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
380 static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
381 static int mime_decode_f = FALSE; /* mime decode is explicitly on */
382 static int mimebuf_f = FALSE; /* MIME buffered input */
383 static int broken_f = FALSE; /* convert ESC-less broken JIS */
384 static int iso8859_f = FALSE; /* ISO8859 through */
385 static int mimeout_f = FALSE; /* base64 mode */
386 #if defined(MSDOS) || defined(__OS2__)
387 static int x0201_f = TRUE; /* Assume JISX0201 kana */
389 static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
391 static int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
392 #ifdef UTF8_OUTPUT_ENABLE
393 static int unicode_bom_f= 0; /* Output Unicode BOM */
394 static int w_oconv16_LE = 0; /* utf-16 little endian */
395 static int ms_ucs_map_f = FALSE; /* Microsoft UCS Mapping Compatible */
399 #ifdef NUMCHAR_OPTION
401 #define CLASS_MASK 0x0f000000
402 #define CLASS_UTF16 0x01000000
406 static int cap_f = FALSE;
407 static int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */
408 static int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc;
409 STATIC int cap_getc PROTO((FILE *f));
410 STATIC int cap_ungetc PROTO((int c,FILE *f));
412 static int url_f = FALSE;
413 static int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
414 static int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc;
415 STATIC int url_getc PROTO((FILE *f));
416 STATIC int url_ungetc PROTO((int c,FILE *f));
418 static int numchar_f = FALSE;
419 static int (*i_ngetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
420 static int (*i_nungetc)PROTO((int c ,FILE *f)) = std_ungetc;
421 STATIC int numchar_getc PROTO((FILE *f));
422 STATIC int numchar_ungetc PROTO((int c,FILE *f));
426 static int noout_f = FALSE;
427 STATIC void no_putc PROTO((int c));
428 static int debug_f = FALSE;
429 STATIC void debug PROTO((char *str));
432 static int guess_f = FALSE;
433 STATIC void print_guessed_code PROTO((char *filename));
434 STATIC void set_input_codename PROTO((char *codename));
435 static int is_inputcode_mixed = FALSE;
436 static int is_inputcode_set = FALSE;
439 static int exec_f = 0;
442 #ifdef SHIFTJIS_CP932
443 STATIC int cp932_f = TRUE;
444 #define CP932_TABLE_BEGIN (0xfa)
445 #define CP932_TABLE_END (0xfc)
447 STATIC int cp932inv_f = TRUE;
448 #define CP932INV_TABLE_BEGIN (0xed)
449 #define CP932INV_TABLE_END (0xee)
451 /* STATIC int cp932_conv PROTO((int c2, int c1)); */
452 #endif /* SHIFTJIS_CP932 */
455 STATIC int x0212_f = FALSE;
456 static int x0212_shift PROTO((int c));
457 static int x0212_unshift PROTO((int c));
460 STATIC unsigned char prefix_table[256];
462 STATIC void e_status PROTO((struct input_code *, int));
463 STATIC void s_status PROTO((struct input_code *, int));
465 #ifdef UTF8_INPUT_ENABLE
466 STATIC void w_status PROTO((struct input_code *, int));
467 STATIC void w16_status PROTO((struct input_code *, int));
468 static int utf16_mode = UTF16LE_INPUT;
471 struct input_code input_code_list[] = {
472 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
473 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
474 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
475 {"UTF-16", 0, 0, 0, {0, 0, 0}, w16_status, w_iconv16, 0},
479 static int mimeout_mode = 0;
480 static int base64_count = 0;
482 /* X0208 -> ASCII converter */
485 static int f_line = 0; /* chars in line */
486 static int f_prev = 0;
487 static int fold_preserve_f = FALSE; /* preserve new lines */
488 static int fold_f = FALSE;
489 static int fold_len = 0;
492 static unsigned char kanji_intro = DEFAULT_J;
493 static unsigned char ascii_intro = DEFAULT_R;
497 #define FOLD_MARGIN 10
498 #define DEFAULT_FOLD 60
500 static int fold_margin = FOLD_MARGIN;
504 #ifdef DEFAULT_CODE_JIS
505 # define DEFAULT_CONV j_oconv
507 #ifdef DEFAULT_CODE_SJIS
508 # define DEFAULT_CONV s_oconv
510 #ifdef DEFAULT_CODE_EUC
511 # define DEFAULT_CONV e_oconv
513 #ifdef DEFAULT_CODE_UTF8
514 # define DEFAULT_CONV w_oconv
517 /* process default */
518 static void (*output_conv)PROTO((int c2,int c1)) = DEFAULT_CONV;
520 static void (*oconv)PROTO((int c2,int c1)) = no_connection;
521 /* s_iconv or oconv */
522 static int (*iconv)PROTO((int c2,int c1,int c0)) = no_connection2;
524 static void (*o_zconv)PROTO((int c2,int c1)) = no_connection;
525 static void (*o_fconv)PROTO((int c2,int c1)) = no_connection;
526 static void (*o_crconv)PROTO((int c2,int c1)) = no_connection;
527 static void (*o_rot_conv)PROTO((int c2,int c1)) = no_connection;
528 static void (*o_hira_conv)PROTO((int c2,int c1)) = no_connection;
529 static void (*o_base64conv)PROTO((int c2,int c1)) = no_connection;
530 static void (*o_iso2022jp_check_conv)PROTO((int c2,int c1)) = no_connection;
532 /* static redirections */
534 static void (*o_putc)PROTO((int c)) = std_putc;
536 static int (*i_getc)PROTO((FILE *f)) = std_getc; /* general input */
537 static int (*i_ungetc)PROTO((int c,FILE *f)) =std_ungetc;
539 static int (*i_bgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
540 static int (*i_bungetc)PROTO((int c ,FILE *f)) = std_ungetc;
542 static void (*o_mputc)PROTO((int c)) = std_putc ; /* output of mputc */
544 static int (*i_mgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
545 static int (*i_mungetc)PROTO((int c ,FILE *f)) = std_ungetc;
547 /* for strict mime */
548 static int (*i_mgetc_buf)PROTO((FILE *)) = std_getc; /* input of mgetc_buf */
549 static int (*i_mungetc_buf)PROTO((int c,FILE *f)) = std_ungetc;
552 static int output_mode = ASCII, /* output kanji mode */
553 input_mode = ASCII, /* input kanji mode */
554 shift_mode = FALSE; /* TRUE shift out, or X0201 */
555 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
557 /* X0201 / X0208 conversion tables */
559 /* X0201 kana conversion table */
562 unsigned char cv[]= {
563 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
564 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
565 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
566 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
567 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
568 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
569 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
570 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
571 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
572 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
573 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
574 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
575 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
576 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
577 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
578 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
582 /* X0201 kana conversion table for daguten */
585 unsigned char dv[]= {
586 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
587 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
588 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
589 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
590 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
591 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
592 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
593 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
594 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
595 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
596 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
597 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
598 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
601 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
604 /* X0201 kana conversion table for han-daguten */
607 unsigned char ev[]= {
608 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
610 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
611 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
614 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
615 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
616 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
617 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
618 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
619 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
620 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
621 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
622 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
623 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
627 /* X0208 kigou conversion table */
628 /* 0x8140 - 0x819e */
630 unsigned char fv[] = {
632 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
633 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
634 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
635 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
636 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
637 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
638 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
639 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
640 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
641 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
642 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
643 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
649 static int file_out = FALSE;
651 static int overwrite = FALSE;
654 static int crmode_f = 0; /* CR, NL, CRLF */
655 #ifdef EASYWIN /*Easy Win */
656 static int end_check;
659 #define STD_GC_BUFSIZE (256)
660 int std_gc_buf[STD_GC_BUFSIZE];
664 #include "nkf32dll.c"
665 #elif defined(PERL_XS)
675 char *outfname = NULL;
678 #ifdef EASYWIN /*Easy Win */
679 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
682 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
683 cp = (unsigned char *)*argv;
688 if (pipe(fds) < 0 || (pid = fork()) < 0){
699 execvp(argv[1], &argv[1]);
713 if(x0201_f == WISH_TRUE)
714 x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
716 if (binmode_f == TRUE)
718 if (freopen("","wb",stdout) == NULL)
725 setbuf(stdout, (char *) NULL);
727 setvbuffer(stdout, stdobuf, IOBUF_SIZE);
730 if (binmode_f == TRUE)
732 if (freopen("","rb",stdin) == NULL) return (-1);
736 setvbuffer(stdin, stdibuf, IOBUF_SIZE);
740 kanji_convert(stdin);
741 if (guess_f) print_guessed_code(NULL);
746 is_inputcode_mixed = FALSE;
747 is_inputcode_set = FALSE;
749 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
758 /* reopen file for stdout */
759 if (file_out == TRUE) {
762 outfname = malloc(strlen(origfname)
763 + strlen(".nkftmpXXXXXX")
769 strcpy(outfname, origfname);
773 for (i = strlen(outfname); i; --i){
774 if (outfname[i - 1] == '/'
775 || outfname[i - 1] == '\\'){
781 strcat(outfname, "ntXXXXXX");
783 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC,
786 strcat(outfname, ".nkftmpXXXXXX");
787 fd = mkstemp(outfname);
790 || (fd_backup = dup(fileno(stdout))) < 0
791 || dup2(fd, fileno(stdout)) < 0
802 outfname = "nkf.out";
805 if(freopen(outfname, "w", stdout) == NULL) {
809 if (binmode_f == TRUE) {
811 if (freopen("","wb",stdout) == NULL)
818 if (binmode_f == TRUE)
820 if (freopen("","rb",fin) == NULL)
825 setvbuffer(fin, stdibuf, IOBUF_SIZE);
829 char *filename = NULL;
831 if (nfiles > 1) filename = origfname;
832 if (guess_f) print_guessed_code(filename);
838 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
846 if (dup2(fd_backup, fileno(stdout)) < 0){
849 if (stat(origfname, &sb)) {
850 fprintf(stderr, "Can't stat %s\n", origfname);
852 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
853 if (chmod(outfname, sb.st_mode)) {
854 fprintf(stderr, "Can't set permission %s\n", outfname);
857 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
858 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
859 tb[0] = tb[1] = sb.st_mtime;
860 if (utime(outfname, tb)) {
861 fprintf(stderr, "Can't set timestamp %s\n", outfname);
864 tb.actime = sb.st_atime;
865 tb.modtime = sb.st_mtime;
866 if (utime(outfname, &tb)) {
867 fprintf(stderr, "Can't set timestamp %s\n", outfname);
871 if (unlink(origfname)){
875 if (rename(outfname, origfname)) {
877 fprintf(stderr, "Can't rename %s to %s\n",
878 outfname, origfname);
886 #ifdef EASYWIN /*Easy Win */
887 if (file_out == FALSE)
888 scanf("%d",&end_check);
891 #else /* for Other OS */
892 if (file_out == TRUE)
897 #endif /* WIN32DLL */
922 {"katakana-hiragana","h3"},
929 #ifdef UTF8_OUTPUT_ENABLE
934 #ifdef UTF8_INPUT_ENABLE
936 {"utf16-input", "W16"},
945 #ifdef NUMCHAR_OPTION
946 {"numchar-input", ""},
952 #ifdef SHIFTJIS_CP932
962 static int option_mode = 0;
969 unsigned char *p = NULL;
981 case '-': /* literal options */
982 if (!*cp) { /* ignore the rest of arguments */
986 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
988 p = (unsigned char *)long_option[i].name;
989 for (j=0;*p && (*p != '=') && *p == cp[j];p++, j++);
997 cp = (unsigned char *)long_option[i].alias;
1000 if (strcmp(long_option[i].name, "overwrite") == 0){
1007 if (strcmp(long_option[i].name, "cap-input") == 0){
1011 if (strcmp(long_option[i].name, "url-input") == 0){
1016 #ifdef NUMCHAR_OPTION
1017 if (strcmp(long_option[i].name, "numchar-input") == 0){
1023 if (strcmp(long_option[i].name, "no-output") == 0){
1027 if (strcmp(long_option[i].name, "debug") == 0){
1032 if (strcmp(long_option[i].name, "cp932") == 0){
1033 #ifdef SHIFTJIS_CP932
1037 #ifdef UTF8_OUTPUT_ENABLE
1038 ms_ucs_map_f = TRUE;
1042 if (strcmp(long_option[i].name, "no-cp932") == 0){
1043 #ifdef SHIFTJIS_CP932
1047 #ifdef UTF8_OUTPUT_ENABLE
1048 ms_ucs_map_f = FALSE;
1052 #ifdef SHIFTJIS_CP932
1053 if (strcmp(long_option[i].name, "cp932inv") == 0){
1060 if (strcmp(long_option[i].name, "x0212") == 0){
1067 if (strcmp(long_option[i].name, "exec-in") == 0){
1071 if (strcmp(long_option[i].name, "exec-out") == 0){
1076 #ifdef UTF8_OUTPUT_ENABLE
1077 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1078 ms_ucs_map_f = TRUE;
1082 if (strcmp(long_option[i].name, "prefix=") == 0){
1083 if (*p == '=' && ' ' < p[1] && p[1] < 128){
1084 for (i = 2; ' ' < p[i] && p[i] < 128; i++){
1085 prefix_table[p[i]] = p[1];
1092 case 'b': /* buffered mode */
1095 case 'u': /* non bufferd mode */
1098 case 't': /* transparent mode */
1101 case 'j': /* JIS output */
1103 output_conv = j_oconv;
1105 case 'e': /* AT&T EUC output */
1106 output_conv = e_oconv;
1108 case 's': /* SJIS output */
1109 output_conv = s_oconv;
1111 case 'l': /* ISO8859 Latin-1 support, no conversion */
1112 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
1113 input_f = LATIN1_INPUT;
1115 case 'i': /* Kanji IN ESC-$-@/B */
1116 if (*cp=='@'||*cp=='B')
1117 kanji_intro = *cp++;
1119 case 'o': /* ASCII IN ESC-(-J/B */
1120 if (*cp=='J'||*cp=='B'||*cp=='H')
1121 ascii_intro = *cp++;
1128 if ('9'>= *cp && *cp>='0')
1129 hira_f |= (*cp++ -'0');
1136 #if defined(MSDOS) || defined(__OS2__)
1151 #ifdef UTF8_OUTPUT_ENABLE
1152 case 'w': /* UTF-8 output */
1153 if ('1'== cp[0] && '6'==cp[1]) {
1154 output_conv = w_oconv16; cp+=2;
1156 unicode_bom_f=2; cp++;
1159 unicode_bom_f=1; cp++;
1161 } else if (cp[0] == 'B') {
1162 unicode_bom_f=2; cp++;
1164 unicode_bom_f=1; cp++;
1167 } else if (cp[0] == '8') {
1168 output_conv = w_oconv; cp++;
1171 unicode_bom_f=1; cp++;
1174 output_conv = w_oconv;
1177 #ifdef UTF8_INPUT_ENABLE
1178 case 'W': /* UTF-8 input */
1179 if ('1'== cp[0] && '6'==cp[1]) {
1180 input_f = UTF16LE_INPUT;
1183 } else if (cp[0] == 'B') {
1185 input_f = UTF16BE_INPUT;
1187 } else if (cp[0] == '8') {
1189 input_f = UTF8_INPUT;
1191 input_f = UTF8_INPUT;
1194 /* Input code assumption */
1195 case 'J': /* JIS input */
1196 case 'E': /* AT&T EUC input */
1197 input_f = JIS_INPUT;
1199 case 'S': /* MS Kanji input */
1200 input_f = SJIS_INPUT;
1201 if (x0201_f==NO_X0201) x0201_f=TRUE;
1203 case 'Z': /* Convert X0208 alphabet to asii */
1204 /* bit:0 Convert X0208
1205 bit:1 Convert Kankaku to one space
1206 bit:2 Convert Kankaku to two spaces
1207 bit:3 Convert HTML Entity
1209 if ('9'>= *cp && *cp>='0')
1210 alpha_f |= 1<<(*cp++ -'0');
1214 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
1215 x0201_f = FALSE; /* No X0201->X0208 conversion */
1217 ESC-(-I in JIS, EUC, MS Kanji
1218 SI/SO in JIS, EUC, MS Kanji
1219 SSO in EUC, JIS, not in MS Kanji
1220 MS Kanji (0xa0-0xdf)
1222 ESC-(-I in JIS (0x20-0x5f)
1223 SSO in EUC (0xa0-0xdf)
1224 0xa0-0xd in MS Kanji (0xa0-0xdf)
1227 case 'X': /* Assume X0201 kana */
1228 /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1231 case 'F': /* prserve new lines */
1232 fold_preserve_f = TRUE;
1233 case 'f': /* folding -f60 or -f */
1236 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1238 fold_len += *cp++ - '0';
1240 if (!(0<fold_len && fold_len<BUFSIZ))
1241 fold_len = DEFAULT_FOLD;
1245 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1247 fold_margin += *cp++ - '0';
1251 case 'm': /* MIME support */
1252 /* mime_decode_f = TRUE; */ /* this has too large side effects... */
1253 if (*cp=='B'||*cp=='Q') {
1254 mime_decode_mode = *cp++;
1255 mimebuf_f = FIXED_MIME;
1256 } else if (*cp=='N') {
1257 mime_f = TRUE; cp++;
1258 } else if (*cp=='S') {
1259 mime_f = STRICT_MIME; cp++;
1260 } else if (*cp=='0') {
1261 mime_decode_f = FALSE;
1262 mime_f = FALSE; cp++;
1265 case 'M': /* MIME output */
1268 mimeout_f = FIXED_MIME; cp++;
1269 } else if (*cp=='Q') {
1271 mimeout_f = FIXED_MIME; cp++;
1276 case 'B': /* Broken JIS support */
1278 bit:1 allow any x on ESC-(-x or ESC-$-x
1279 bit:2 reset to ascii on NL
1281 if ('9'>= *cp && *cp>='0')
1282 broken_f |= 1<<(*cp++ -'0');
1287 case 'O':/* for Output file */
1291 case 'c':/* add cr code */
1294 case 'd':/* delete cr code */
1297 case 'I': /* ISO-2022-JP output */
1300 case 'L': /* line mode */
1301 if (*cp=='u') { /* unix */
1302 crmode_f = NL; cp++;
1303 } else if (*cp=='m') { /* mac */
1304 crmode_f = CR; cp++;
1305 } else if (*cp=='w') { /* windows */
1306 crmode_f = CRLF; cp++;
1307 } else if (*cp=='0') { /* no conversion */
1317 /* module muliple options in a string are allowed for Perl moudle */
1318 while(*cp && *cp!='-') cp++;
1322 /* bogus option but ignored */
1328 #ifdef ANSI_C_PROTOTYPE
1329 struct input_code * find_inputcode_byfunc(int (*iconv_func)(int c2,int c1,int c0))
1331 struct input_code * find_inputcode_byfunc(iconv_func)
1332 int (*iconv_func)();
1336 struct input_code *p = input_code_list;
1338 if (iconv_func == p->iconv_func){
1348 static int (*iconv_for_check)() = 0;
1351 #ifdef ANSI_C_PROTOTYPE
1352 void set_iconv(int f, int (*iconv_func)(int c2,int c1,int c0))
1354 void set_iconv(f, iconv_func)
1356 int (*iconv_func)();
1359 #ifdef INPUT_CODE_FIX
1367 #ifdef INPUT_CODE_FIX
1368 && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1374 if (estab_f && iconv_for_check != iconv){
1375 struct input_code *p = find_inputcode_byfunc(iconv);
1377 set_input_codename(p->name);
1378 debug(input_codename);
1380 iconv_for_check = iconv;
1385 #define SCORE_L2 (1) /*
\e$BBh
\e(B2
\e$B?e=`4A;z
\e(B */
1386 #define SCORE_KANA (SCORE_L2 << 1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
1387 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
1388 #ifdef SHIFTJIS_CP932
1389 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932
\e$B$K$h$kFI$_49$(
\e(B */
1390 #define SCORE_NO_EXIST (SCORE_CP932 << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1392 #define SCORE_NO_EXIST (SCORE_DEPEND << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1394 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME
\e$B$K$h$k;XDj
\e(B */
1395 #define SCORE_ERROR (SCORE_iMIME << 1) /*
\e$B%(%i!<
\e(B */
1397 #define SCORE_INIT (SCORE_iMIME)
1399 int score_table_A0[] = {
1402 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1403 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1406 int score_table_F0[] = {
1407 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1408 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1409 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1410 SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1413 void set_code_score(ptr, score)
1414 struct input_code *ptr;
1418 ptr->score |= score;
1422 void clr_code_score(ptr, score)
1423 struct input_code *ptr;
1427 ptr->score &= ~score;
1431 void code_score(ptr)
1432 struct input_code *ptr;
1434 int c2 = ptr->buf[0];
1435 int c1 = ptr->buf[1];
1437 set_code_score(ptr, SCORE_ERROR);
1438 }else if (c2 == SSO){
1439 set_code_score(ptr, SCORE_KANA);
1440 #ifdef UTF8_OUTPUT_ENABLE
1441 }else if (!e2w_conv(c2, c1)){
1442 set_code_score(ptr, SCORE_NO_EXIST);
1444 }else if ((c2 & 0x70) == 0x20){
1445 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1446 }else if ((c2 & 0x70) == 0x70){
1447 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1448 }else if ((c2 & 0x70) >= 0x50){
1449 set_code_score(ptr, SCORE_L2);
1453 void status_disable(ptr)
1454 struct input_code *ptr;
1459 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1462 void status_push_ch(ptr, c)
1463 struct input_code *ptr;
1466 ptr->buf[ptr->index++] = c;
1469 void status_clear(ptr)
1470 struct input_code *ptr;
1476 void status_reset(ptr)
1477 struct input_code *ptr;
1480 ptr->score = SCORE_INIT;
1483 void status_reinit(ptr)
1484 struct input_code *ptr;
1487 ptr->_file_stat = 0;
1490 void status_check(ptr, c)
1491 struct input_code *ptr;
1494 if (c <= DEL && estab_f){
1499 void s_status(ptr, c)
1500 struct input_code *ptr;
1505 status_check(ptr, c);
1510 #ifdef NUMCHAR_OPTION
1511 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1514 }else if (0xa1 <= c && c <= 0xdf){
1515 status_push_ch(ptr, SSO);
1516 status_push_ch(ptr, c);
1519 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
1521 status_push_ch(ptr, c);
1522 #ifdef SHIFTJIS_CP932
1524 && CP932_TABLE_BEGIN <= c && c <= CP932_TABLE_END){
1526 status_push_ch(ptr, c);
1527 #endif /* SHIFTJIS_CP932 */
1529 }else if (x0212_f && 0xf0 <= c && c <= 0xfc){
1531 status_push_ch(ptr, c);
1532 #endif /* X0212_ENABLE */
1534 status_disable(ptr);
1538 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1539 status_push_ch(ptr, c);
1540 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
1544 status_disable(ptr);
1548 #ifdef SHIFTJIS_CP932
1549 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1550 status_push_ch(ptr, c);
1551 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
1552 set_code_score(ptr, SCORE_CP932);
1557 #endif /* SHIFTJIS_CP932 */
1558 #ifndef X0212_ENABLE
1559 status_disable(ptr);
1565 void e_status(ptr, c)
1566 struct input_code *ptr;
1571 status_check(ptr, c);
1576 #ifdef NUMCHAR_OPTION
1577 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1580 }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
1582 status_push_ch(ptr, c);
1584 }else if (0x8f == c){
1586 status_push_ch(ptr, c);
1587 #endif /* X0212_ENABLE */
1589 status_disable(ptr);
1593 if (0xa1 <= c && c <= 0xfe){
1594 status_push_ch(ptr, c);
1598 status_disable(ptr);
1603 if (0xa1 <= c && c <= 0xfe){
1605 status_push_ch(ptr, c);
1607 status_disable(ptr);
1609 #endif /* X0212_ENABLE */
1613 #ifdef UTF8_INPUT_ENABLE
1614 void w16_status(ptr, c)
1615 struct input_code *ptr;
1622 if (ptr->_file_stat == 0){
1623 if (c == 0xfe || c == 0xff){
1625 status_push_ch(ptr, c);
1626 ptr->_file_stat = 1;
1628 status_disable(ptr);
1629 ptr->_file_stat = -1;
1631 }else if (ptr->_file_stat > 0){
1633 status_push_ch(ptr, c);
1634 }else if (ptr->_file_stat < 0){
1635 status_disable(ptr);
1641 status_disable(ptr);
1642 ptr->_file_stat = -1;
1644 status_push_ch(ptr, c);
1651 if (ptr->stat != c && (c == 0xfe || c == 0xff)){
1652 status_push_ch(ptr, c);
1655 status_disable(ptr);
1656 ptr->_file_stat = -1;
1662 void w_status(ptr, c)
1663 struct input_code *ptr;
1668 status_check(ptr, c);
1673 #ifdef NUMCHAR_OPTION
1674 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1677 }else if (0xc0 <= c && c <= 0xdf){
1679 status_push_ch(ptr, c);
1680 }else if (0xe0 <= c && c <= 0xef){
1682 status_push_ch(ptr, c);
1684 status_disable(ptr);
1689 if (0x80 <= c && c <= 0xbf){
1690 status_push_ch(ptr, c);
1691 if (ptr->index > ptr->stat){
1692 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
1693 && ptr->buf[2] == 0xbf);
1694 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
1695 &ptr->buf[0], &ptr->buf[1]);
1702 status_disable(ptr);
1713 int action_flag = 1;
1714 struct input_code *result = 0;
1715 struct input_code *p = input_code_list;
1717 (p->status_func)(p, c);
1720 }else if(p->stat == 0){
1731 if (result && !estab_f){
1732 set_iconv(TRUE, result->iconv_func);
1733 }else if (c <= DEL){
1734 struct input_code *ptr = input_code_list;
1749 return std_gc_buf[--std_gc_ndx];
1760 if (std_gc_ndx == STD_GC_BUFSIZE){
1763 std_gc_buf[std_gc_ndx++] = c;
1783 while ((c = (*i_getc)(f)) != EOF)
1792 oconv = output_conv;
1795 /* replace continucation module, from output side */
1797 /* output redicrection */
1799 if (noout_f || guess_f){
1806 if (mimeout_f == TRUE) {
1807 o_base64conv = oconv; oconv = base64_conv;
1809 /* base64_count = 0; */
1813 o_crconv = oconv; oconv = cr_conv;
1816 o_rot_conv = oconv; oconv = rot_conv;
1819 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
1822 o_hira_conv = oconv; oconv = hira_conv;
1825 o_fconv = oconv; oconv = fold_conv;
1828 if (alpha_f || x0201_f) {
1829 o_zconv = oconv; oconv = z_conv;
1833 i_ungetc = std_ungetc;
1834 /* input redicrection */
1837 i_cgetc = i_getc; i_getc = cap_getc;
1838 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
1841 i_ugetc = i_getc; i_getc = url_getc;
1842 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
1845 #ifdef NUMCHAR_OPTION
1847 i_ngetc = i_getc; i_getc = numchar_getc;
1848 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
1851 if (mime_f && mimebuf_f==FIXED_MIME) {
1852 i_mgetc = i_getc; i_getc = mime_getc;
1853 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
1856 i_bgetc = i_getc; i_getc = broken_getc;
1857 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
1859 if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
1860 set_iconv(-TRUE, e_iconv);
1861 } else if (input_f == SJIS_INPUT) {
1862 set_iconv(-TRUE, s_iconv);
1863 #ifdef UTF8_INPUT_ENABLE
1864 } else if (input_f == UTF8_INPUT) {
1865 set_iconv(-TRUE, w_iconv);
1866 } else if (input_f == UTF16LE_INPUT) {
1867 set_iconv(-TRUE, w_iconv16);
1870 set_iconv(FALSE, e_iconv);
1874 struct input_code *p = input_code_list;
1882 Conversion main loop. Code detection only.
1891 int is_8bit = FALSE;
1893 module_connection();
1898 output_mode = ASCII;
1901 #define NEXT continue /* no output, get next */
1902 #define SEND ; /* output c1 and c2, get next */
1903 #define LAST break /* end of loop, go closing */
1905 while ((c1 = (*i_getc)(f)) != EOF) {
1910 /* in case of 8th bit is on */
1911 if (!estab_f&&!mime_decode_mode) {
1912 /* in case of not established yet */
1913 /* It is still ambiguious */
1914 if (h_conv(f, c2, c1)==EOF)
1920 /* in case of already established */
1922 /* ignore bogus code */
1928 /* second byte, 7 bit code */
1929 /* it might be kanji shitfted */
1930 if ((c1 == DEL) || (c1 <= SPACE)) {
1931 /* ignore bogus first code */
1939 #ifdef UTF8_INPUT_ENABLE
1948 #ifdef NUMCHAR_OPTION
1949 } else if ((c1 & CLASS_MASK) == CLASS_UTF16){
1952 } else if (c1 > DEL) {
1954 if (!estab_f && !iso8859_f) {
1955 /* not established yet */
1956 if (!is_8bit) is_8bit = TRUE;
1959 } else { /* estab_f==TRUE */
1964 } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
1965 /* SJIS X0201 Case... */
1966 if(iso2022jp_f && x0201_f==NO_X0201) {
1967 (*oconv)(GETA1, GETA2);
1974 } else if (c1==SSO && iconv != s_iconv) {
1975 /* EUC X0201 Case */
1976 c1 = (*i_getc)(f); /* skip SSO */
1978 if (SSP<=c1 && c1<0xe0) {
1979 if(iso2022jp_f && x0201_f==NO_X0201) {
1980 (*oconv)(GETA1, GETA2);
1987 } else { /* bogus code, skip SSO and one byte */
1991 /* already established */
1996 } else if ((c1 > SPACE) && (c1 != DEL)) {
1997 /* in case of Roman characters */
1999 /* output 1 shifted byte */
2003 } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
2004 /* output 1 shifted byte */
2005 if(iso2022jp_f && x0201_f==NO_X0201) {
2006 (*oconv)(GETA1, GETA2);
2013 /* look like bogus code */
2016 } else if (input_mode == X0208) {
2017 /* in case of Kanji shifted */
2020 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
2021 /* Check MIME code */
2022 if ((c1 = (*i_getc)(f)) == EOF) {
2025 } else if (c1 == '?') {
2026 /* =? is mime conversion start sequence */
2027 if(mime_f == STRICT_MIME) {
2028 /* check in real detail */
2029 if (mime_begin_strict(f) == EOF)
2033 } else if (mime_begin(f) == EOF)
2043 /* normal ASCII code */
2046 } else if (c1 == SI) {
2049 } else if (c1 == SO) {
2052 } else if (c1 == ESC ) {
2053 if ((c1 = (*i_getc)(f)) == EOF) {
2054 /* (*oconv)(0, ESC); don't send bogus code */
2056 } else if (c1 == '$') {
2057 if ((c1 = (*i_getc)(f)) == EOF) {
2059 (*oconv)(0, ESC); don't send bogus code
2060 (*oconv)(0, '$'); */
2062 } else if (c1 == '@'|| c1 == 'B') {
2063 /* This is kanji introduction */
2066 set_input_codename("ISO-2022-JP");
2067 debug(input_codename);
2069 } else if (c1 == '(') {
2070 if ((c1 = (*i_getc)(f)) == EOF) {
2071 /* don't send bogus code
2077 } else if (c1 == '@'|| c1 == 'B') {
2078 /* This is kanji introduction */
2083 } else if (c1 == 'D'){
2087 #endif /* X0212_ENABLE */
2089 /* could be some special code */
2096 } else if (broken_f&0x2) {
2097 /* accept any ESC-(-x as broken code ... */
2107 } else if (c1 == '(') {
2108 if ((c1 = (*i_getc)(f)) == EOF) {
2109 /* don't send bogus code
2111 (*oconv)(0, '('); */
2115 /* This is X0201 kana introduction */
2116 input_mode = X0201; shift_mode = X0201;
2118 } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2119 /* This is X0208 kanji introduction */
2120 input_mode = ASCII; shift_mode = FALSE;
2122 } else if (broken_f&0x2) {
2123 input_mode = ASCII; shift_mode = FALSE;
2128 /* maintain various input_mode here */
2132 } else if ( c1 == 'N' || c1 == 'n' ){
2134 c3 = (*i_getc)(f); /* skip SS2 */
2135 if ( (SPACE<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2150 } else if ((c1 == NL || c1 == CR) && broken_f&4) {
2151 input_mode = ASCII; set_iconv(FALSE, 0);
2153 } else if (c1 == NL && mime_decode_f && !mime_decode_mode ) {
2154 if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2162 } else if (c1 == CR && mime_decode_f && !mime_decode_mode ) {
2163 if ((c1=(*i_getc)(f))!=EOF) {
2167 } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2183 if (input_mode == X0208)
2184 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2186 else if (input_mode == X0212)
2187 (*oconv)((0x8f << 8) | c2, c1);
2188 #endif /* X0212_ENABLE */
2189 else if (input_mode)
2190 (*oconv)(input_mode, c1); /* other special case */
2191 else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */
2192 int c0 = (*i_getc)(f);
2195 (*iconv)(c2, c1, c0);
2201 /* goto next_word */
2205 (*iconv)(EOF, 0, 0);
2206 if (!is_inputcode_set)
2209 struct input_code *p = input_code_list;
2210 struct input_code *result = p;
2212 if (p->score < result->score) result = p;
2215 set_input_codename(result->name);
2230 /** it must NOT be in the kanji shifte sequence */
2231 /** it must NOT be written in JIS7 */
2232 /** and it must be after 2 byte 8bit code */
2239 while ((c1 = (*i_getc)(f)) != EOF) {
2245 if (push_hold_buf(c1) == EOF || estab_f){
2251 struct input_code *p = input_code_list;
2252 struct input_code *result = p;
2257 if (p->score < result->score){
2262 set_iconv(FALSE, result->iconv_func);
2267 ** 1) EOF is detected, or
2268 ** 2) Code is established, or
2269 ** 3) Buffer is FULL (but last word is pushed)
2271 ** in 1) and 3) cases, we continue to use
2272 ** Kanji codes by oconv and leave estab_f unchanged.
2277 while (wc < hold_count){
2278 c2 = hold_buf[wc++];
2280 #ifdef NUMCHAR_OPTION
2281 || (c2 & CLASS_MASK) == CLASS_UTF16
2286 }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
2287 (*iconv)(X0201, c2, 0);
2290 if (wc < hold_count){
2291 c1 = hold_buf[wc++];
2300 if ((*iconv)(c2, c1, 0) < 0){
2302 if (wc < hold_count){
2303 c0 = hold_buf[wc++];
2312 (*iconv)(c2, c1, c0);
2325 if (hold_count >= HOLD_SIZE*2)
2327 hold_buf[hold_count++] = c2;
2328 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
2331 int s2e_conv(c2, c1, p2, p1)
2336 #ifdef SHIFTJIS_CP932
2337 if (cp932_f && CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){
2338 extern unsigned short shiftjis_cp932[3][189];
2339 val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
2345 #endif /* SHIFTJIS_CP932 */
2347 if (x0212_f && 0xfa <= c2 && c2 <= 0xfc){
2348 extern unsigned short shiftjis_x0212[3][189];
2349 val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
2352 c2 = (0x8f << 8) | (val >> 8);
2364 c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
2366 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
2374 c2 = x0212_unshift(c2);
2389 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2392 int ret = s2e_conv(c2, c1, &c2, &c1);
2393 if (ret) return ret;
2407 }else if (c2 == 0x8f){
2411 c2 = (c2 << 8) | (c1 & 0x7f);
2413 #ifdef SHIFTJIS_CP932
2416 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2417 s2e_conv(s2, s1, &c2, &c1);
2418 if ((c2 & 0xff00) == 0){
2424 #endif /* SHIFTJIS_CP932 */
2425 #endif /* X0212_ENABLE */
2426 } else if (c2 == SSO){
2429 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2439 #ifdef UTF8_INPUT_ENABLE
2441 w2e_conv(c2, c1, c0, p2, p1)
2445 extern unsigned short * utf8_to_euc_2bytes[];
2446 extern unsigned short ** utf8_to_euc_3bytes[];
2449 if (0xc0 <= c2 && c2 <= 0xef) {
2450 unsigned short **pp;
2453 if (c0 == 0) return -1;
2454 pp = utf8_to_euc_3bytes[c2 - 0x80];
2455 ret = w_iconv_common(c1, c0, pp, sizeof_utf8_to_euc_C2, p2, p1);
2457 ret = w_iconv_common(c2, c1, utf8_to_euc_2bytes, sizeof_utf8_to_euc_2bytes, p2, p1);
2459 #ifdef NUMCHAR_OPTION
2462 if (p1) *p1 = CLASS_UTF16 | ww16_conv(c2, c1, c0);
2467 } else if (c2 == X0201) {
2480 int ret = w2e_conv(c2, c1, c0, &c2, &c1);
2488 w16w_conv(val, p2, p1, p0)
2496 }else if (val < 0x800){
2497 *p2 = 0xc0 | (val >> 6);
2498 *p1 = 0x80 | (val & 0x3f);
2501 *p2 = 0xe0 | (val >> 12);
2502 *p1 = 0x80 | ((val >> 6) & 0x3f);
2503 *p0 = 0x80 | (val & 0x3f);
2508 ww16_conv(c2, c1, c0)
2513 val = (c2 & 0x0f) << 12;
2514 val |= (c1 & 0x3f) << 6;
2516 }else if (c2 >= 0xc0){
2517 val = (c2 & 0x1f) << 6;
2526 w16e_conv(val, p2, p1)
2530 extern unsigned short * utf8_to_euc_2bytes[];
2531 extern unsigned short ** utf8_to_euc_3bytes[];
2533 unsigned short **pp;
2537 w16w_conv(val, &c2, &c1, &c0);
2540 pp = utf8_to_euc_3bytes[c2 - 0x80];
2541 psize = sizeof_utf8_to_euc_C2;
2542 ret = w_iconv_common(c1, c0, pp, psize, p2, p1);
2544 pp = utf8_to_euc_2bytes;
2545 psize = sizeof_utf8_to_euc_2bytes;
2546 ret = w_iconv_common(c2, c1, pp, psize, p2, p1);
2548 #ifdef NUMCHAR_OPTION
2551 *p1 = CLASS_UTF16 | val;
2563 w_iconv16(c2, c1, c0)
2568 if (c2==0376 && c1==0377){
2569 utf16_mode = UTF16LE_INPUT;
2571 } else if (c2==0377 && c1==0376){
2572 utf16_mode = UTF16BE_INPUT;
2575 if (c2 != EOF && utf16_mode == UTF16BE_INPUT) {
2577 tmp=c1; c1=c2; c2=tmp;
2579 if ((c2==0 && c1 < 0x80) || c2==EOF) {
2583 ret = w16e_conv(((c2<<8)&0xff00) + c1, &c2, &c1);
2584 if (ret) return ret;
2590 w_iconv_common(c1, c0, pp, psize, p2, p1)
2592 unsigned short **pp;
2600 if (pp == 0) return 1;
2603 if (c1 < 0 || psize <= c1) return 1;
2605 if (p == 0) return 1;
2608 if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1;
2610 if (val == 0) return 1;
2617 if (c2 == SO) c2 = X0201;
2626 #ifdef UTF8_OUTPUT_ENABLE
2631 extern unsigned short euc_to_utf8_1byte[];
2632 extern unsigned short * euc_to_utf8_2bytes[];
2633 extern unsigned short * euc_to_utf8_2bytes_ms[];
2637 p = euc_to_utf8_1byte;
2639 } else if (c2 >> 8 == 0x8f){
2640 extern unsigned short * x0212_to_utf8_2bytes[];
2641 c2 = (c2&0x7f) - 0x21;
2642 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2643 p = x0212_to_utf8_2bytes[c2];
2649 c2 = (c2&0x7f) - 0x21;
2650 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2651 p = ms_ucs_map_f ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
2656 c1 = (c1 & 0x7f) - 0x21;
2657 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
2673 if (unicode_bom_f==2) {
2680 #ifdef NUMCHAR_OPTION
2681 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2682 w16w_conv(c1, &c2, &c1, &c0);
2686 if (c0) (*o_putc)(c0);
2693 output_mode = ASCII;
2695 } else if (c2 == ISO8859_1) {
2696 output_mode = ISO8859_1;
2697 (*o_putc)(c1 | 0x080);
2701 val = e2w_conv(c2, c1);
2703 w16w_conv(val, &c2, &c1, &c0);
2707 if (c0) (*o_putc)(c0);
2723 if (unicode_bom_f==2) {
2725 (*o_putc)((unsigned char)'\377');
2729 (*o_putc)((unsigned char)'\377');
2734 if (c2 == ISO8859_1) {
2737 #ifdef NUMCHAR_OPTION
2738 } else if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16) {
2739 c2 = (c1 >> 8) & 0xff;
2743 unsigned short val = e2w_conv(c2, c1);
2744 c2 = (val >> 8) & 0xff;
2763 #ifdef NUMCHAR_OPTION
2764 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2765 w16e_conv(c1, &c2, &c1);
2766 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2774 } else if (c2 == 0) {
2775 output_mode = ASCII;
2777 } else if (c2 == X0201) {
2778 output_mode = JAPANESE_EUC;
2779 (*o_putc)(SSO); (*o_putc)(c1|0x80);
2780 } else if (c2 == ISO8859_1) {
2781 output_mode = ISO8859_1;
2782 (*o_putc)(c1 | 0x080);
2784 } else if ((c2 & 0xff00) >> 8 == 0x8f){
2785 output_mode = JAPANESE_EUC;
2786 #ifdef SHIFTJIS_CP932
2789 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2790 s2e_conv(s2, s1, &c2, &c1);
2794 if ((c2 & 0xff00) >> 8 == 0x8f){
2797 (*o_putc)((c2 & 0x7f) | 0x080);
2798 (*o_putc)(c1 | 0x080);
2801 (*o_putc)((c2 & 0x7f) | 0x080);
2802 (*o_putc)(c1 | 0x080);
2806 if ((c1<0x21 || 0x7e<c1) ||
2807 (c2<0x21 || 0x7e<c2)) {
2808 set_iconv(FALSE, 0);
2809 return; /* too late to rescue this char */
2811 output_mode = JAPANESE_EUC;
2812 (*o_putc)(c2 | 0x080);
2813 (*o_putc)(c1 | 0x080);
2823 if ((ret & 0xff00) == 0x8f00){
2824 if (0x75 <= c && c <= 0x7f){
2825 ret = c + (0x109 - 0x75);
2828 if (0x75 <= c && c <= 0x7f){
2829 ret = c + (0x113 - 0x75);
2836 int x0212_unshift(c)
2840 if (0x7f <= c && c <= 0x88){
2841 ret = c + (0x75 - 0x7f);
2842 }else if (0x89 <= c && c <= 0x92){
2843 ret = (0x8f << 8) | 0x80 | (c + (0x75 - 0x89));
2847 #endif /* X0212_ENABLE */
2850 e2s_conv(c2, c1, p2, p1)
2851 int c2, c1, *p2, *p1;
2854 unsigned short *ptr;
2856 extern unsigned short *x0212_shiftjis[];
2858 if ((c2 & 0xff00) == 0x8f00){
2860 if (0x21 <= ndx && ndx <= 0x7e){
2861 ptr = x0212_shiftjis[ndx - 0x21];
2863 val = ptr[(c1 & 0x7f) - 0x21];
2873 c2 = x0212_shift(c2);
2875 #endif /* X0212_ENABLE */
2876 if ((c2 & 0xff00) == 0x8f00){
2879 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
2880 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
2889 #ifdef NUMCHAR_OPTION
2890 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2891 w16e_conv(c1, &c2, &c1);
2897 } else if (c2 == 0) {
2898 output_mode = ASCII;
2900 } else if (c2 == X0201) {
2901 output_mode = SHIFT_JIS;
2903 } else if (c2 == ISO8859_1) {
2904 output_mode = ISO8859_1;
2905 (*o_putc)(c1 | 0x080);
2907 } else if ((c2 & 0xff00) >> 8 == 0x8f){
2908 output_mode = SHIFT_JIS;
2909 if (e2s_conv(c2, c1, &c2, &c1) == 0){
2915 if ((c1<0x20 || 0x7e<c1) ||
2916 (c2<0x20 || 0x7e<c2)) {
2917 set_iconv(FALSE, 0);
2918 return; /* too late to rescue this char */
2920 output_mode = SHIFT_JIS;
2921 e2s_conv(c2, c1, &c2, &c1);
2923 #ifdef SHIFTJIS_CP932
2925 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2926 extern unsigned short cp932inv[2][189];
2927 int c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
2933 #endif /* SHIFTJIS_CP932 */
2936 if (prefix_table[(unsigned char)c1]){
2937 (*o_putc)(prefix_table[(unsigned char)c1]);
2948 #ifdef NUMCHAR_OPTION
2949 if ((c1 & CLASS_MASK) == CLASS_UTF16){
2950 w16e_conv(c1, &c2, &c1);
2954 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2957 (*o_putc)(ascii_intro);
2958 output_mode = ASCII;
2962 } else if ((c2 & 0xff00) >> 8 == 0x8f){
2963 if (output_mode!=X0212) {
2964 output_mode = X0212;
2970 (*o_putc)(c2 & 0x7f);
2973 } else if (c2==X0201) {
2974 if (output_mode!=X0201) {
2975 output_mode = X0201;
2981 } else if (c2==ISO8859_1) {
2982 /* iso8859 introduction, or 8th bit on */
2983 /* Can we convert in 7bit form using ESC-'-'-A ?
2985 output_mode = ISO8859_1;
2987 } else if (c2 == 0) {
2988 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2991 (*o_putc)(ascii_intro);
2992 output_mode = ASCII;
2996 if (output_mode != X0208) {
2997 output_mode = X0208;
3000 (*o_putc)(kanji_intro);
3002 if (c1<0x20 || 0x7e<c1)
3004 if (c2<0x20 || 0x7e<c2)
3016 mime_prechar(c2, c1);
3017 (*o_base64conv)(c2,c1);
3021 static int broken_buf[3];
3022 static int broken_counter = 0;
3023 static int broken_last = 0;
3030 if (broken_counter>0) {
3031 return broken_buf[--broken_counter];
3034 if (c=='$' && broken_last != ESC
3035 && (input_mode==ASCII || input_mode==X0201)) {
3038 if (c1=='@'|| c1=='B') {
3039 broken_buf[0]=c1; broken_buf[1]=c;
3046 } else if (c=='(' && broken_last != ESC
3047 && (input_mode==X0208 || input_mode==X0201)) { /* ) */
3050 if (c1=='J'|| c1=='B') {
3051 broken_buf[0]=c1; broken_buf[1]=c;
3069 if (broken_counter<2)
3070 broken_buf[broken_counter++]=c;
3074 static int prev_cr = 0;
3082 if (! (c2==0&&c1==NL) ) {
3088 } else if (c1=='\r') {
3090 } else if (c1=='\n') {
3091 if (crmode_f==CRLF) {
3092 (*o_crconv)(0,'\r');
3093 } else if (crmode_f==CR) {
3094 (*o_crconv)(0,'\r');
3098 } else if (c1!='\032' || crmode_f!=NL){
3104 Return value of fold_conv()
3106 \n add newline and output char
3107 \r add newline and output nothing
3110 1 (or else) normal output
3112 fold state in prev (previous character)
3114 >0x80 Japanese (X0208/X0201)
3119 This fold algorthm does not preserve heading space in a line.
3120 This is the main difference from fmt.
3123 #define char_size(c2,c1) (c2?2:1)
3132 if (c1== '\r' && !fold_preserve_f) {
3133 fold_state=0; /* ignore cr */
3134 }else if (c1== '\n'&&f_prev=='\r' && fold_preserve_f) {
3136 fold_state=0; /* ignore cr */
3137 } else if (c1== BS) {
3138 if (f_line>0) f_line--;
3140 } else if (c2==EOF && f_line != 0) { /* close open last line */
3142 } else if ((c1=='\n' && !fold_preserve_f)
3143 || ((c1=='\r'||(c1=='\n'&&f_prev!='\r'))
3144 && fold_preserve_f)) {
3146 if (fold_preserve_f) {
3150 } else if ((f_prev == c1 && !fold_preserve_f)
3151 || (f_prev == '\n' && fold_preserve_f)
3152 ) { /* duplicate newline */
3155 fold_state = '\n'; /* output two newline */
3161 if (f_prev&0x80) { /* Japanese? */
3163 fold_state = 0; /* ignore given single newline */
3164 } else if (f_prev==' ') {
3168 if (++f_line<=fold_len)
3172 fold_state = '\r'; /* fold and output nothing */
3176 } else if (c1=='\f') {
3181 fold_state = '\n'; /* output newline and clear */
3182 } else if ( (c2==0 && c1==' ')||
3183 (c2==0 && c1=='\t')||
3184 (c2=='!'&& c1=='!')) {
3185 /* X0208 kankaku or ascii space */
3186 if (f_prev == ' ') {
3187 fold_state = 0; /* remove duplicate spaces */
3190 if (++f_line<=fold_len)
3191 fold_state = ' '; /* output ASCII space only */
3193 f_prev = ' '; f_line = 0;
3194 fold_state = '\r'; /* fold and output nothing */
3198 prev0 = f_prev; /* we still need this one... , but almost done */
3200 if (c2 || c2==X0201)
3201 f_prev |= 0x80; /* this is Japanese */
3202 f_line += char_size(c2,c1);
3203 if (f_line<=fold_len) { /* normal case */
3206 if (f_line>=fold_len+fold_margin) { /* too many kinsou suspension */
3207 f_line = char_size(c2,c1);
3208 fold_state = '\n'; /* We can't wait, do fold now */
3209 } else if (c2==X0201) {
3210 /* simple kinsoku rules return 1 means no folding */
3211 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
3212 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
3213 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
3214 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
3215 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
3216 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
3217 else if (SPACE<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
3219 fold_state = '\n';/* add one new f_line before this character */
3222 fold_state = '\n';/* add one new f_line before this character */
3225 /* kinsoku point in ASCII */
3226 if ( c1==')'|| /* { [ ( */
3237 /* just after special */
3238 } else if (!is_alnum(prev0)) {
3239 f_line = char_size(c2,c1);
3241 } else if ((prev0==' ') || /* ignored new f_line */
3242 (prev0=='\n')|| /* ignored new f_line */
3243 (prev0&0x80)) { /* X0208 - ASCII */
3244 f_line = char_size(c2,c1);
3245 fold_state = '\n';/* add one new f_line before this character */
3247 fold_state = 1; /* default no fold in ASCII */
3251 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
3252 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
3253 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
3254 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
3255 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
3256 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
3257 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
3258 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
3259 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
3260 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
3261 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
3262 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
3263 /* default no fold in kinsoku */
3266 f_line = char_size(c2,c1);
3267 /* add one new f_line before this character */
3270 f_line = char_size(c2,c1);
3272 /* add one new f_line before this character */
3277 /* terminator process */
3278 switch(fold_state) {
3297 int z_prev2=0,z_prev1=0;
3304 /* if (c2) c1 &= 0x7f; assertion */
3306 if (x0201_f && z_prev2==X0201) { /* X0201 */
3307 if (c1==(0xde&0x7f)) { /*
\e$BByE@
\e(B */
3309 (*o_zconv)(dv[(z_prev1-SPACE)*2],dv[(z_prev1-SPACE)*2+1]);
3311 } else if (c1==(0xdf&0x7f)&&ev[(z_prev1-SPACE)*2]) { /*
\e$BH>ByE@
\e(B */
3313 (*o_zconv)(ev[(z_prev1-SPACE)*2],ev[(z_prev1-SPACE)*2+1]);
3317 (*o_zconv)(cv[(z_prev1-SPACE)*2],cv[(z_prev1-SPACE)*2+1]);
3326 if (x0201_f && c2==X0201) {
3327 if (dv[(c1-SPACE)*2]||ev[(c1-SPACE)*2]) {
3328 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
3329 z_prev1 = c1; z_prev2 = c2;
3332 (*o_zconv)(cv[(c1-SPACE)*2],cv[(c1-SPACE)*2+1]);
3337 /* JISX0208 Alphabet */
3338 if (alpha_f && c2 == 0x23 ) {
3340 } else if (alpha_f && c2 == 0x21 ) {
3341 /* JISX0208 Kigou */
3346 } else if (alpha_f&0x4) {
3351 } else if (0x20<c1 && c1<0x7f && fv[c1-0x20]) {
3357 case '>': entity = ">"; break;
3358 case '<': entity = "<"; break;
3359 case '\"': entity = """; break;
3360 case '&': entity = "&"; break;
3363 while (*entity) (*o_zconv)(0, *entity++);
3373 #define rot13(c) ( \
3375 (c <= 'M') ? (c + 13): \
3376 (c <= 'Z') ? (c - 13): \
3378 (c <= 'm') ? (c + 13): \
3379 (c <= 'z') ? (c - 13): \
3383 #define rot47(c) ( \
3385 ( c <= 'O' ) ? (c + 47) : \
3386 ( c <= '~' ) ? (c - 47) : \
3394 if (c2==0 || c2==X0201 || c2==ISO8859_1) {
3400 (*o_rot_conv)(c2,c1);
3407 if ((hira_f & 1) && c2==0x25 && 0x20<c1 && c1<0x74) {
3409 } else if ((hira_f & 2) && c2==0x24 && 0x20<c1 && c1<0x74) {
3412 (*o_hira_conv)(c2,c1);
3417 iso2022jp_check_conv(c2,c1)
3420 static int range[RANGE_NUM_MAX][2] = {
3443 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
3447 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
3452 for (i = 0; i < RANGE_NUM_MAX; i++) {
3453 start = range[i][0];
3456 if (c >= start && c <= end) {
3461 (*o_iso2022jp_check_conv)(c2,c1);
3465 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
3467 unsigned char *mime_pattern[] = {
3468 (unsigned char *)"\075?EUC-JP?B?",
3469 (unsigned char *)"\075?SHIFT_JIS?B?",
3470 (unsigned char *)"\075?ISO-8859-1?Q?",
3471 (unsigned char *)"\075?ISO-8859-1?B?",
3472 (unsigned char *)"\075?ISO-2022-JP?B?",
3473 (unsigned char *)"\075?ISO-2022-JP?Q?",
3474 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3475 (unsigned char *)"\075?UTF-8?B?",
3476 (unsigned char *)"\075?UTF-8?Q?",
3478 (unsigned char *)"\075?US-ASCII?Q?",
3483 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
3484 int (*mime_priority_func[])PROTO((int c2, int c1, int c0)) = {
3485 e_iconv, s_iconv, 0, 0, 0, 0,
3486 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3492 int mime_encode[] = {
3493 JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201,
3494 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3501 int mime_encode_method[] = {
3502 'B', 'B','Q', 'B', 'B', 'Q',
3503 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3511 #define MAXRECOVER 20
3513 /* I don't trust portablity of toupper */
3514 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
3515 #define nkf_isdigit(c) ('0'<=c && c<='9')
3516 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
3517 #define nkf_isblank(c) (c == SPACE || c == TAB)
3518 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == NL)
3519 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
3520 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
3525 if (i_getc!=mime_getc) {
3526 i_mgetc = i_getc; i_getc = mime_getc;
3527 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
3528 if(mime_f==STRICT_MIME) {
3529 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
3530 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
3536 unswitch_mime_getc()
3538 if(mime_f==STRICT_MIME) {
3539 i_mgetc = i_mgetc_buf;
3540 i_mungetc = i_mungetc_buf;
3543 i_ungetc = i_mungetc;
3547 mime_begin_strict(f)
3552 unsigned char *p,*q;
3553 int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
3555 mime_decode_mode = FALSE;
3556 /* =? has been checked */
3558 p = mime_pattern[j];
3561 for(i=2;p[i]>' ';i++) { /* start at =? */
3562 if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) {
3563 /* pattern fails, try next one */
3565 while ((p = mime_pattern[++j])) {
3566 for(k=2;k<i;k++) /* assume length(p) > i */
3567 if (p[k]!=q[k]) break;
3568 if (k==i && nkf_toupper(c1)==p[k]) break;
3570 if (p) continue; /* found next one, continue */
3571 /* all fails, output from recovery buffer */
3579 mime_decode_mode = p[i-2];
3581 set_iconv(FALSE, mime_priority_func[j]);
3582 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
3584 if (mime_decode_mode=='B') {
3585 mimebuf_f = unbuf_f;
3587 /* do MIME integrity check */
3588 return mime_integrity(f,mime_pattern[j]);
3600 /* we don't keep eof of Fifo, becase it contains ?= as
3601 a terminator. It was checked in mime_integrity. */
3602 return ((mimebuf_f)?
3603 (*i_mgetc_buf)(f):Fifo(mime_input++));
3607 mime_ungetc_buf(c,f)
3612 (*i_mungetc_buf)(c,f);
3614 Fifo(--mime_input)=c;
3625 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
3626 /* re-read and convert again from mime_buffer. */
3628 /* =? has been checked */
3630 Fifo(mime_last++)='='; Fifo(mime_last++)='?';
3631 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
3632 /* We accept any character type even if it is breaked by new lines */
3633 c1 = (*i_getc)(f); Fifo(mime_last++)= c1 ;
3634 if (c1=='\n'||c1==' '||c1=='\r'||
3635 c1=='-'||c1=='_'||is_alnum(c1) ) continue;
3637 /* Failed. But this could be another MIME preemble */
3645 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3646 if (!(++i<MAXRECOVER) || c1==EOF) break;
3647 if (c1=='b'||c1=='B') {
3648 mime_decode_mode = 'B';
3649 } else if (c1=='q'||c1=='Q') {
3650 mime_decode_mode = 'Q';
3654 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3655 if (!(++i<MAXRECOVER) || c1==EOF) break;
3657 mime_decode_mode = FALSE;
3663 if (!mime_decode_mode) {
3664 /* false MIME premble, restart from mime_buffer */
3665 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
3666 /* Since we are in MIME mode until buffer becomes empty, */
3667 /* we never go into mime_begin again for a while. */
3670 /* discard mime preemble, and goto MIME mode */
3672 /* do no MIME integrity check */
3673 return c1; /* used only for checking EOF */
3688 fprintf(stderr, "%s\n", str);
3694 set_input_codename (codename)
3699 strcmp(codename, "") != 0 &&
3700 strcmp(codename, input_codename) != 0)
3702 is_inputcode_mixed = TRUE;
3704 input_codename = codename;
3705 is_inputcode_set = TRUE;
3710 print_guessed_code (filename)
3713 char *codename = "BINARY";
3714 if (!is_inputcode_mixed) {
3715 if (strcmp(input_codename, "") == 0) {
3718 codename = input_codename;
3721 if (filename != NULL) printf("%s:", filename);
3722 printf("%s\n", codename);
3730 if (nkf_isdigit(x)) return x - '0';
3731 return nkf_toupper(x) - 'A' + 10;
3736 #ifdef ANSI_C_PROTOTYPE
3737 int hex_getc(int ch, FILE *f, int (*g)(FILE *f), int (*u)(int c, FILE *f))
3740 hex_getc(ch, f, g, u)
3753 if (!nkf_isxdigit(c2)){
3758 if (!nkf_isxdigit(c3)){
3763 return (hex2bin(c2) << 4) | hex2bin(c3);
3770 return hex_getc(':', f, i_cgetc, i_cungetc);
3778 return (*i_cungetc)(c, f);
3785 return hex_getc('%', f, i_ugetc, i_uungetc);
3793 return (*i_uungetc)(c, f);
3797 #ifdef NUMCHAR_OPTION
3802 int (*g)() = i_ngetc;
3803 int (*u)() = i_nungetc;
3814 if (buf[i] == 'x' || buf[i] == 'X'){
3815 for (j = 0; j < 5; j++){
3817 if (!nkf_isxdigit(buf[i])){
3824 c |= hex2bin(buf[i]);
3827 for (j = 0; j < 6; j++){
3831 if (!nkf_isdigit(buf[i])){
3838 c += hex2bin(buf[i]);
3844 return CLASS_UTF16 | c;
3854 numchar_ungetc(c, f)
3858 return (*i_nungetc)(c, f);
3867 int c1, c2, c3, c4, cc;
3868 int t1, t2, t3, t4, mode, exit_mode;
3872 int lwsp_size = 128;
3874 if (mime_top != mime_last) { /* Something is in FIFO */
3875 return Fifo(mime_top++);
3877 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
3878 mime_decode_mode=FALSE;
3879 unswitch_mime_getc();
3880 return (*i_getc)(f);
3883 if (mimebuf_f == FIXED_MIME)
3884 exit_mode = mime_decode_mode;
3887 if (mime_decode_mode == 'Q') {
3888 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3890 if (c1=='_') return ' ';
3891 if (c1!='=' && c1!='?') {
3895 mime_decode_mode = exit_mode; /* prepare for quit */
3896 if (c1<=' ') return c1;
3897 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
3898 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
3899 /* end Q encoding */
3900 input_mode = exit_mode;
3902 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
3903 if (lwsp_buf==NULL) {
3904 perror("can't malloc");
3907 while ((c1=(*i_getc)(f))!=EOF) {
3912 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
3920 if ((c1=(*i_getc)(f))!=EOF && c1 == NL) {
3921 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
3936 lwsp_buf[lwsp_count] = c1;
3937 if (lwsp_count++>lwsp_size){
3939 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
3940 if (lwsp_buf_new==NULL) {
3943 perror("can't realloc");
3946 lwsp_buf = lwsp_buf_new;
3952 if (lwsp_count > 0) {
3953 if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) {
3957 for(lwsp_count--;lwsp_count>0;lwsp_count--)
3958 i_ungetc(lwsp_buf[lwsp_count],f);
3966 if (c1=='='&&c2<' ') { /* this is soft wrap */
3967 while((c1 = (*i_mgetc)(f)) <=' ') {
3968 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3970 mime_decode_mode = 'Q'; /* still in MIME */
3971 goto restart_mime_q;
3974 mime_decode_mode = 'Q'; /* still in MIME */
3978 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
3979 if (c2<=' ') return c2;
3980 mime_decode_mode = 'Q'; /* still in MIME */
3981 #define hex(c) (('0'<=c&&c<='9')?(c-'0'):\
3982 ('A'<=c&&c<='F')?(c-'A'+10):('a'<=c&&c<='f')?(c-'a'+10):0)
3983 return ((hex(c2)<<4) + hex(c3));
3986 if (mime_decode_mode != 'B') {
3987 mime_decode_mode = FALSE;
3988 return (*i_mgetc)(f);
3992 /* Base64 encoding */
3994 MIME allows line break in the middle of
3995 Base64, but we are very pessimistic in decoding
3996 in unbuf mode because MIME encoded code may broken by
3997 less or editor's control sequence (such as ESC-[-K in unbuffered
3998 mode. ignore incomplete MIME.
4000 mode = mime_decode_mode;
4001 mime_decode_mode = exit_mode; /* prepare for quit */
4003 while ((c1 = (*i_mgetc)(f))<=' ') {
4008 if ((c2 = (*i_mgetc)(f))<=' ') {
4011 if (mime_f != STRICT_MIME) goto mime_c2_retry;
4012 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4015 if ((c1 == '?') && (c2 == '=')) {
4018 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
4019 if (lwsp_buf==NULL) {
4020 perror("can't malloc");
4023 while ((c1=(*i_getc)(f))!=EOF) {
4028 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4036 if ((c1=(*i_getc)(f))!=EOF) {
4040 } else if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4055 lwsp_buf[lwsp_count] = c1;
4056 if (lwsp_count++>lwsp_size){
4058 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4059 if (lwsp_buf_new==NULL) {
4062 perror("can't realloc");
4065 lwsp_buf = lwsp_buf_new;
4071 if (lwsp_count > 0) {
4072 if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) {
4076 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4077 i_ungetc(lwsp_buf[lwsp_count],f);
4086 if ((c3 = (*i_mgetc)(f))<=' ') {
4089 if (mime_f != STRICT_MIME) goto mime_c3_retry;
4090 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4094 if ((c4 = (*i_mgetc)(f))<=' ') {
4097 if (mime_f != STRICT_MIME) goto mime_c4_retry;
4098 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4102 mime_decode_mode = mode; /* still in MIME sigh... */
4104 /* BASE 64 decoding */
4106 t1 = 0x3f & base64decode(c1);
4107 t2 = 0x3f & base64decode(c2);
4108 t3 = 0x3f & base64decode(c3);
4109 t4 = 0x3f & base64decode(c4);
4110 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
4112 Fifo(mime_last++) = cc;
4113 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
4115 Fifo(mime_last++) = cc;
4116 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
4118 Fifo(mime_last++) = cc;
4123 return Fifo(mime_top++);
4131 Fifo(--mime_top) = c;
4142 /* In buffered mode, read until =? or NL or buffer full
4144 mime_input = mime_top;
4145 mime_last = mime_top;
4146 while(*p) Fifo(mime_input++) = *p++;
4149 while((c=(*i_getc)(f))!=EOF) {
4150 if (((mime_input-mime_top)&MIME_BUF_MASK)==0) {
4151 break; /* buffer full */
4153 if (c=='=' && d=='?') {
4154 /* checked. skip header, start decode */
4155 Fifo(mime_input++) = c;
4156 /* mime_last_input = mime_input; */
4161 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
4163 /* Should we check length mod 4? */
4164 Fifo(mime_input++) = c;
4167 /* In case of Incomplete MIME, no MIME decode */
4168 Fifo(mime_input++) = c;
4169 mime_last = mime_input; /* point undecoded buffer */
4170 mime_decode_mode = 1; /* no decode on Fifo last in mime_getc */
4171 switch_mime_getc(); /* anyway we need buffered getc */
4182 i = c - 'A'; /* A..Z 0-25 */
4184 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
4186 } else if (c > '/') {
4187 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
4188 } else if (c == '+') {
4189 i = '>' /* 62 */ ; /* + 62 */
4191 i = '?' /* 63 */ ; /* / 63 */
4196 static char basis_64[] =
4197 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
4200 #define MIMEOUT_BUF_LENGTH (60)
4201 char mimeout_buf[MIMEOUT_BUF_LENGTH+1];
4202 int mimeout_buf_count = 0;
4203 int mimeout_preserve_space = 0;
4204 #define itoh4(c) (c>=10?c+'A'-10:c+'0')
4213 p = mime_pattern[0];
4214 for(i=0;mime_encode[i];i++) {
4215 if (mode == mime_encode[i]) {
4216 p = mime_pattern[i];
4220 mimeout_mode = mime_encode_method[i];
4223 if (base64_count>45) {
4224 if (mimeout_buf_count>0 && nkf_isblank(mimeout_buf[i])){
4225 (*o_mputc)(mimeout_buf[i]);
4231 if (!mimeout_preserve_space && mimeout_buf_count>0
4232 && (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
4233 || mimeout_buf[i]==CR || mimeout_buf[i]==NL )) {
4237 if (!mimeout_preserve_space) {
4238 for (;i<mimeout_buf_count;i++) {
4239 if (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
4240 || mimeout_buf[i]==CR || mimeout_buf[i]==NL ) {
4241 (*o_mputc)(mimeout_buf[i]);
4248 mimeout_preserve_space = FALSE;
4254 j = mimeout_buf_count;
4255 mimeout_buf_count = 0;
4257 mime_putc(mimeout_buf[i]);
4273 switch(mimeout_mode) {
4278 (*o_mputc)(basis_64[((b64c & 0x3)<< 4)]);
4284 (*o_mputc)(basis_64[((b64c & 0xF) << 2)]);
4290 if (mimeout_f!=FIXED_MIME) {
4292 } else if (mimeout_mode != 'Q')
4301 switch(mimeout_mode) {
4306 } else if (c==CR||c==NL) {
4309 } else if(c<SPACE||c=='='||c=='?'||c=='_'||DEL<=c) {
4311 (*o_mputc)(itoh4(((c>>4)&0xf)));
4312 (*o_mputc)(itoh4((c&0xf)));
4321 (*o_mputc)(basis_64[c>>2]);
4326 (*o_mputc)(basis_64[((b64c & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
4332 (*o_mputc)(basis_64[((b64c & 0xF) << 2) | ((c & 0xC0) >>6)]);
4333 (*o_mputc)(basis_64[c & 0x3F]);
4344 int mime_lastchar2, mime_lastchar1;
4346 void mime_prechar(c2, c1)
4351 if (base64_count + mimeout_buf_count/3*4> 66){
4352 (*o_base64conv)(EOF,0);
4353 (*o_base64conv)(0,NL);
4354 (*o_base64conv)(0,SPACE);
4356 }/*else if (mime_lastchar2){
4357 if (c1 <=DEL && !nkf_isspace(c1)){
4358 (*o_base64conv)(0,SPACE);
4362 if (c2 && mime_lastchar2 == 0
4363 && mime_lastchar1 && !nkf_isspace(mime_lastchar1)){
4364 (*o_base64conv)(0,SPACE);
4367 mime_lastchar2 = c2;
4368 mime_lastchar1 = c1;
4379 if (mimeout_f == FIXED_MIME){
4380 if (mimeout_mode == 'Q'){
4381 if (base64_count > 71){
4382 if (c!=CR && c!=NL) {
4389 if (base64_count > 71){
4394 if (c == EOF) { /* c==EOF */
4398 if (c != EOF) { /* c==EOF */
4404 /* mimeout_f != FIXED_MIME */
4406 if (c == EOF) { /* c==EOF */
4407 j = mimeout_buf_count;
4408 mimeout_buf_count = 0;
4411 /*if (nkf_isspace(mimeout_buf[i])){
4414 mimeout_addchar(mimeout_buf[i]);
4418 (*o_mputc)(mimeout_buf[i]);
4424 if (mimeout_mode=='Q') {
4425 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
4437 if (mimeout_buf_count > 0){
4438 lastchar = mimeout_buf[mimeout_buf_count - 1];
4443 if (!mimeout_mode) {
4444 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1)) {
4445 if (nkf_isspace(c)) {
4446 if (c==CR || c==NL) {
4449 for (i=0;i<mimeout_buf_count;i++) {
4450 (*o_mputc)(mimeout_buf[i]);
4451 if (mimeout_buf[i] == CR || mimeout_buf[i] == NL){
4458 mimeout_buf_count = 1;
4460 if (base64_count > 1
4461 && base64_count + mimeout_buf_count > 76){
4464 if (!nkf_isspace(mimeout_buf[0])){
4469 mimeout_buf[mimeout_buf_count++] = c;
4470 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
4471 open_mime(output_mode);
4476 if (lastchar==CR || lastchar == NL){
4477 for (i=0;i<mimeout_buf_count;i++) {
4478 (*o_mputc)(mimeout_buf[i]);
4481 mimeout_buf_count = 0;
4483 if (lastchar==SPACE) {
4484 for (i=0;i<mimeout_buf_count-1;i++) {
4485 (*o_mputc)(mimeout_buf[i]);
4488 mimeout_buf[0] = SPACE;
4489 mimeout_buf_count = 1;
4491 open_mime(output_mode);
4494 /* mimeout_mode == 'B', 1, 2 */
4495 if ( c<=DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
4496 if (lastchar == CR || lastchar == NL){
4497 if (nkf_isblank(c)) {
4498 for (i=0;i<mimeout_buf_count;i++) {
4499 mimeout_addchar(mimeout_buf[i]);
4501 mimeout_buf_count = 0;
4502 } else if (SPACE<c && c<DEL) {
4504 for (i=0;i<mimeout_buf_count;i++) {
4505 (*o_mputc)(mimeout_buf[i]);
4508 mimeout_buf_count = 0;
4511 if (c==SPACE || c==TAB || c==CR || c==NL) {
4512 for (i=0;i<mimeout_buf_count;i++) {
4513 if (SPACE<mimeout_buf[i] && mimeout_buf[i]<DEL) {
4515 for (i=0;i<mimeout_buf_count;i++) {
4516 (*o_mputc)(mimeout_buf[i]);
4519 mimeout_buf_count = 0;
4522 mimeout_buf[mimeout_buf_count++] = c;
4523 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
4525 for (i=0;i<mimeout_buf_count;i++) {
4526 (*o_mputc)(mimeout_buf[i]);
4529 mimeout_buf_count = 0;
4533 if (mimeout_buf_count>0 && SPACE<c && c!='=') {
4534 mimeout_buf[mimeout_buf_count++] = c;
4535 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
4536 j = mimeout_buf_count;
4537 mimeout_buf_count = 0;
4539 mimeout_addchar(mimeout_buf[i]);
4546 if (mimeout_buf_count>0) {
4547 j = mimeout_buf_count;
4548 mimeout_buf_count = 0;
4550 if (mimeout_buf[i]==CR || mimeout_buf[i]==NL)
4552 mimeout_addchar(mimeout_buf[i]);
4558 (*o_mputc)(mimeout_buf[i]);
4560 open_mime(output_mode);
4567 #if defined(PERL_XS) || defined(WIN32DLL)
4572 struct input_code *p = input_code_list;
4585 mime_f = STRICT_MIME;
4586 mime_decode_f = FALSE;
4591 #if defined(MSDOS) || defined(__OS2__)
4596 iso2022jp_f = FALSE;
4597 #ifdef UTF8_OUTPUT_ENABLE
4600 ms_ucs_map_f = FALSE;
4612 is_inputcode_mixed = FALSE;
4613 is_inputcode_set = FALSE;
4617 #ifdef SHIFTJIS_CP932
4623 for (i = 0; i < 256; i++){
4624 prefix_table[i] = 0;
4627 #ifdef UTF8_INPUT_ENABLE
4628 utf16_mode = UTF16LE_INPUT;
4630 mimeout_buf_count = 0;
4635 fold_preserve_f = FALSE;
4638 kanji_intro = DEFAULT_J;
4639 ascii_intro = DEFAULT_R;
4640 fold_margin = FOLD_MARGIN;
4641 output_conv = DEFAULT_CONV;
4642 oconv = DEFAULT_CONV;
4643 o_zconv = no_connection;
4644 o_fconv = no_connection;
4645 o_crconv = no_connection;
4646 o_rot_conv = no_connection;
4647 o_hira_conv = no_connection;
4648 o_base64conv = no_connection;
4649 o_iso2022jp_check_conv = no_connection;
4652 i_ungetc = std_ungetc;
4654 i_bungetc = std_ungetc;
4657 i_mungetc = std_ungetc;
4658 i_mgetc_buf = std_getc;
4659 i_mungetc_buf = std_ungetc;
4660 output_mode = ASCII;
4663 mime_decode_mode = FALSE;
4669 z_prev2=0,z_prev1=0;
4671 iconv_for_check = 0;
4673 input_codename = "";
4681 no_connection(c2,c1)
4684 no_connection2(c2,c1,0);
4688 no_connection2(c2,c1,c0)
4691 fprintf(stderr,"nkf internal module connection failure.\n");
4693 return 0; /* LINT */
4698 #define fprintf dllprintf
4703 fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
4704 fprintf(stderr,"Flags:\n");
4705 fprintf(stderr,"b,u Output is buffered (DEFAULT),Output is unbuffered\n");
4706 #ifdef DEFAULT_CODE_SJIS
4707 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC), UTF-8\n");
4709 #ifdef DEFAULT_CODE_JIS
4710 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC), UTF-8\n");
4712 #ifdef DEFAULT_CODE_EUC
4713 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT), UTF-8\n");
4715 #ifdef DEFAULT_CODE_UTF8
4716 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC), UTF-8 (DEFAULT)\n");
4718 #ifdef UTF8_OUTPUT_ENABLE
4719 fprintf(stderr," After 'w' you can add more options. (80?|16((B|L)0?)?) \n");
4721 fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC), UTF-8\n");
4722 #ifdef UTF8_INPUT_ENABLE
4723 fprintf(stderr," After 'W' you can add more options. (8|16(B|L)?) \n");
4725 fprintf(stderr,"t no conversion\n");
4726 fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n");
4727 fprintf(stderr,"r {de/en}crypt ROT13/47\n");
4728 fprintf(stderr,"h 1 hirakana->katakana, 2 katakana->hirakana,3 both\n");
4729 fprintf(stderr,"v Show this usage. V: show version\n");
4730 fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
4731 fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
4732 fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
4733 fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
4734 fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces,\n");
4735 fprintf(stderr," 3: Convert HTML Entity\n");
4736 fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
4737 fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
4739 fprintf(stderr,"T Text mode output\n");
4741 fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
4742 fprintf(stderr,"d,c Delete \\r in line feed and \\032, Add \\r in line feed\n");
4743 fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n");
4744 fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
4745 fprintf(stderr,"long name options\n");
4746 fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n");
4747 fprintf(stderr," --jis,--euc,--sjis,--utf8,--utf16,--mime,--base64 convert for the code\n");
4748 fprintf(stderr," --hiragana, --katakana Hiragana/Katakana Conversion\n");
4749 fprintf(stderr," --x0212 Convert JISX0212\n");
4750 fprintf(stderr," --cp932, --no-cp932 CP932 compatibility\n");
4752 fprintf(stderr," --cap-input, --url-input Convert hex after ':' or '%%'\n");
4754 #ifdef NUMCHAR_OPTION
4755 fprintf(stderr," --numchar-input Convert Unicode Character Reference\n");
4757 #ifdef UTF8_OUTPUT_ENABLE
4758 fprintf(stderr," --ms-ucs-map Microsoft UCS Mapping Compatible\n");
4761 fprintf(stderr," --overwrite Overwrite original listed files by filtered result\n");
4763 fprintf(stderr," -g, --guess Guess the input code\n");
4764 fprintf(stderr," --help,--version\n");
4771 fprintf(stderr,"Network Kanji Filter Version %s (%s) "
4772 #if defined(MSDOS) && !defined(__WIN32__) && !defined(__WIN16__)
4775 #if defined(MSDOS) && defined(__WIN16__)
4778 #if defined(MSDOS) && defined(__WIN32__)
4784 ,NKF_VERSION,NKF_RELEASE_DATE);
4785 fprintf(stderr,"\n%s\n",CopyRight);
4790 **
\e$B%Q%C%A@):n<T
\e(B
4791 ** void@merope.pleiades.or.jp (Kusakabe Youichi)
4792 ** NIDE Naoyuki <nide@ics.nara-wu.ac.jp>
4793 ** ohta@src.ricoh.co.jp (Junn Ohta)
4794 ** inouet@strl.nhk.or.jp (Tomoyuki Inoue)
4795 ** kiri@pulser.win.or.jp (Tetsuaki Kiriyama)
4796 ** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp>
4797 ** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe)
4798 ** kono@ie.u-ryukyu.ac.jp (Shinji Kono)
4799 ** GHG00637@nifty-serve.or.jp (COW)