1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
5 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
6 ** Copyright (C) 1996,1998
8 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
9 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
10 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
11 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
13 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
14 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
15 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
16 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
17 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
18 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
20 ** Everyone is permitted to do anything on this program
21 ** including copying, modifying, improving,
22 ** as long as you don't try to pretend that you wrote it.
23 ** i.e., the above copyright notice has to appear in all copies.
24 ** Binary distribution requires original version messages.
25 ** You don't have to ask before copying, redistribution or publishing.
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
29 /***********************************************************************
30 ** UTF-8
\e$B%5%]!<%H$K$D$$$F
\e(B
31 **
\e$B=>Mh$N
\e(B nkf
\e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9
\e(B
32 ** nkf -e
\e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G
\e(B UTF-8
\e$B$HH=Dj$5$l$l$P!"
\e(B
33 **
\e$B$=$N$^$^
\e(B euc-jp
\e$B$KJQ49$5$l$^$9
\e(B
35 **
\e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#
\e(B
36 ** (
\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O
\e(B)
38 **
\e$B2?$+LdBj$r8+$D$1$?$i!"
\e(B
39 ** E-Mail: furukawa@tcp-ip.or.jp
40 **
\e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#
\e(B
41 ***********************************************************************/
42 /* $Id: nkf.c,v 1.58 2005/02/17 16:48:48 rei_furukawa Exp $ */
43 #define NKF_VERSION "2.0.4"
44 #define NKF_RELEASE_DATE "2005-02-02"
47 static char *CopyRight =
48 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2004 Kono, Furukawa";
55 ** USAGE: nkf [flags] [file]
58 ** b Output is buffered (DEFAULT)
59 ** u Output is unbuffered
63 ** j Outout code is JIS 7 bit (DEFAULT SELECT)
64 ** s Output code is MS Kanji (DEFAULT SELECT)
65 ** e Output code is AT&T JIS (DEFAULT SELECT)
66 ** w Output code is AT&T JIS (DEFAULT SELECT)
67 ** l Output code is JIS 7bit and ISO8859-1 Latin-1
69 ** m MIME conversion for ISO-2022-JP
70 ** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
71 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
72 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
73 ** M MIME output conversion
75 ** r {de/en}crypt ROT13/47
79 ** T Text mode output (for MS-DOS)
81 ** x Do not convert X0201 kana into X0208
82 ** Z Convert X0208 alphabet to ASCII
87 ** B try to fix broken JIS, missing Escape
88 ** B[1-9] broken level
90 ** O Output to 'nkf.out' file or last file name
91 ** d Delete \r in line feed
92 ** c Add \r in line feed
93 ** -- other long option
94 ** -- ignore following option (don't use with -O )
98 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__)) && !defined(MSDOS)
100 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
115 #if defined(MSDOS) || defined(__OS2__)
122 #define setbinmode(fp) fsetbin(fp)
123 #else /* Microsoft C, Turbo C */
124 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
126 #else /* UNIX,OS/2 */
127 #define setbinmode(fp)
130 #ifdef _IOFBF /* SysV and MSDOS, Windows */
131 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
133 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
136 /*Borland C++ 4.5 EasyWin*/
137 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
146 /* added by satoru@isoternet.org */
148 #include <sys/stat.h>
149 #ifndef MSDOS /* UNIX, OS/2 */
153 #if defined(_MSC_VER) || defined(__MINGW32__) /* VC++, MinGW */
154 #include <sys/utime.h>
155 #elif defined(__TURBOC__) /* BCC */
157 #elif defined(LSI_C) /* LSI C */
169 /* state of output_mode and input_mode
187 /* Input Assumption */
191 #define LATIN1_INPUT 6
193 #define STRICT_MIME 8
198 #define JAPANESE_EUC 10
202 #define UTF8_INPUT 13
203 #define UTF16LE_INPUT 14
204 #define UTF16BE_INPUT 15
224 #define is_alnum(c) \
225 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
227 #define HOLD_SIZE 1024
228 #define IOBUF_SIZE 16384
230 #define DEFAULT_J 'B'
231 #define DEFAULT_R 'B'
233 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
234 #define SJ6394 0x0161 /* 63 - 94 ku offset */
236 #define RANGE_NUM_MAX 18
241 #if defined( UTF8_OUTPUT_ENABLE ) || defined( UTF8_INPUT_ENABLE )
242 #define sizeof_euc_utf8 94
243 #define sizeof_euc_to_utf8_1byte 94
244 #define sizeof_euc_to_utf8_2bytes 94
245 #define sizeof_utf8_to_euc_C2 64
246 #define sizeof_utf8_to_euc_E5B8 64
247 #define sizeof_utf8_to_euc_2bytes 112
248 #define sizeof_utf8_to_euc_3bytes 112
251 /* MIME preprocessor */
254 #ifdef EASYWIN /*Easy Win */
255 extern POINT _BufferSize;
258 /* function prototype */
260 #ifdef ANSI_C_PROTOTYPE
262 #define STATIC static
274 void (*status_func)PROTO((struct input_code *, int));
275 int (*iconv_func)PROTO((int c2, int c1, int c0));
279 STATIC char *input_codename = "";
281 STATIC int noconvert PROTO((FILE *f));
282 STATIC int kanji_convert PROTO((FILE *f));
283 STATIC int h_conv PROTO((FILE *f,int c2,int c1));
284 STATIC int push_hold_buf PROTO((int c2));
285 STATIC void set_iconv PROTO((int f, int (*iconv_func)()));
286 STATIC int s_iconv PROTO((int c2,int c1,int c0));
287 STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
288 STATIC int e_iconv PROTO((int c2,int c1,int c0));
289 #ifdef UTF8_INPUT_ENABLE
290 STATIC int w2e_conv PROTO((int c2,int c1,int c0,int *p2,int *p1));
291 STATIC int w_iconv PROTO((int c2,int c1,int c0));
292 STATIC int w_iconv16 PROTO((int c2,int c1,int c0));
293 STATIC int w_iconv_common PROTO((int c1,int c0,unsigned short **pp,int psize,int *p2,int *p1));
294 STATIC int ww16_conv PROTO((int c2, int c1, int c0));
296 #ifdef UTF8_OUTPUT_ENABLE
297 STATIC int e2w_conv PROTO((int c2,int c1));
298 STATIC void w_oconv PROTO((int c2,int c1));
299 STATIC void w_oconv16 PROTO((int c2,int c1));
301 STATIC void e_oconv PROTO((int c2,int c1));
302 STATIC int e2s_conv PROTO((int c2, int c1, int *p2, int *p1));
303 STATIC void s_oconv PROTO((int c2,int c1));
304 STATIC void j_oconv PROTO((int c2,int c1));
305 STATIC void fold_conv PROTO((int c2,int c1));
306 STATIC void cr_conv PROTO((int c2,int c1));
307 STATIC void z_conv PROTO((int c2,int c1));
308 STATIC void rot_conv PROTO((int c2,int c1));
309 STATIC void hira_conv PROTO((int c2,int c1));
310 STATIC void base64_conv PROTO((int c2,int c1));
311 STATIC void iso2022jp_check_conv PROTO((int c2,int c1));
312 STATIC void no_connection PROTO((int c2,int c1));
313 STATIC int no_connection2 PROTO((int c2,int c1,int c0));
315 STATIC void code_score PROTO((struct input_code *ptr));
316 STATIC void code_status PROTO((int c));
318 STATIC void std_putc PROTO((int c));
319 STATIC int std_getc PROTO((FILE *f));
320 STATIC int std_ungetc PROTO((int c,FILE *f));
322 STATIC int broken_getc PROTO((FILE *f));
323 STATIC int broken_ungetc PROTO((int c,FILE *f));
325 STATIC int mime_begin PROTO((FILE *f));
326 STATIC int mime_getc PROTO((FILE *f));
327 STATIC int mime_ungetc PROTO((int c,FILE *f));
329 STATIC int mime_begin_strict PROTO((FILE *f));
330 STATIC int mime_getc_buf PROTO((FILE *f));
331 STATIC int mime_ungetc_buf PROTO((int c,FILE *f));
332 STATIC int mime_integrity PROTO((FILE *f,unsigned char *p));
334 STATIC int base64decode PROTO((int c));
335 STATIC void mime_prechar PROTO((int c2, int c1));
336 STATIC void mime_putc PROTO((int c));
337 STATIC void open_mime PROTO((int c));
338 STATIC void close_mime PROTO(());
339 STATIC void usage PROTO(());
340 STATIC void version PROTO(());
341 STATIC void options PROTO((unsigned char *c));
343 STATIC void reinit PROTO(());
348 static unsigned char stdibuf[IOBUF_SIZE];
349 static unsigned char stdobuf[IOBUF_SIZE];
350 static unsigned char hold_buf[HOLD_SIZE*2];
351 static int hold_count;
353 /* MIME preprocessor fifo */
355 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
356 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
357 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
358 static unsigned char mime_buf[MIME_BUF_SIZE];
359 static unsigned int mime_top = 0;
360 static unsigned int mime_last = 0; /* decoded */
361 static unsigned int mime_input = 0; /* undecoded */
364 static int unbuf_f = FALSE;
365 static int estab_f = FALSE;
366 static int nop_f = FALSE;
367 static int binmode_f = TRUE; /* binary mode */
368 static int rot_f = FALSE; /* rot14/43 mode */
369 static int hira_f = FALSE; /* hira/kata henkan */
370 static int input_f = FALSE; /* non fixed input code */
371 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
372 static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
373 static int mimebuf_f = FALSE; /* MIME buffered input */
374 static int broken_f = FALSE; /* convert ESC-less broken JIS */
375 static int iso8859_f = FALSE; /* ISO8859 through */
376 static int mimeout_f = FALSE; /* base64 mode */
377 #if defined(MSDOS) || defined(__OS2__)
378 static int x0201_f = TRUE; /* Assume JISX0201 kana */
380 static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
382 static int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
383 #ifdef UTF8_OUTPUT_ENABLE
384 static int unicode_bom_f= 0; /* Output Unicode BOM */
385 static int w_oconv16_LE = 0; /* utf-16 little endian */
386 static int ms_ucs_map_f = FALSE; /* Microsoft UCS Mapping Compatible */
390 #ifdef NUMCHAR_OPTION
392 #define CLASS_MASK 0x0f000000
393 #define CLASS_UTF16 0x01000000
397 static int cap_f = FALSE;
398 static int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */
399 static int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc;
400 STATIC int cap_getc PROTO((FILE *f));
401 STATIC int cap_ungetc PROTO((int c,FILE *f));
403 static int url_f = FALSE;
404 static int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
405 static int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc;
406 STATIC int url_getc PROTO((FILE *f));
407 STATIC int url_ungetc PROTO((int c,FILE *f));
409 static int numchar_f = FALSE;
410 static int (*i_ngetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
411 static int (*i_nungetc)PROTO((int c ,FILE *f)) = std_ungetc;
412 STATIC int numchar_getc PROTO((FILE *f));
413 STATIC int numchar_ungetc PROTO((int c,FILE *f));
417 static int noout_f = FALSE;
418 STATIC void no_putc PROTO((int c));
419 static int debug_f = FALSE;
420 STATIC void debug PROTO((char *str));
423 static int guess_f = FALSE;
424 STATIC void print_guessed_code PROTO((char *filename));
425 STATIC void set_input_codename PROTO((char *codename));
426 static int is_inputcode_mixed = FALSE;
427 static int is_inputcode_set = FALSE;
430 static int exec_f = 0;
433 #ifdef SHIFTJIS_CP932
434 STATIC int cp932_f = TRUE;
435 #define CP932_TABLE_BEGIN (0xfa)
436 #define CP932_TABLE_END (0xfc)
438 STATIC int cp932inv_f = TRUE;
439 #define CP932INV_TABLE_BEGIN (0xed)
440 #define CP932INV_TABLE_END (0xee)
442 /* STATIC int cp932_conv PROTO((int c2, int c1)); */
443 #endif /* SHIFTJIS_CP932 */
446 STATIC int x0212_f = FALSE;
447 static int x0212_shift PROTO((int c));
448 static int x0212_unshift PROTO((int c));
451 STATIC unsigned char prefix_table[256];
453 STATIC void e_status PROTO((struct input_code *, int));
454 STATIC void s_status PROTO((struct input_code *, int));
456 #ifdef UTF8_INPUT_ENABLE
457 STATIC void w_status PROTO((struct input_code *, int));
458 STATIC void w16_status PROTO((struct input_code *, int));
459 static int utf16_mode = UTF16LE_INPUT;
462 struct input_code input_code_list[] = {
463 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
464 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
465 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
466 {"UTF-16", 0, 0, 0, {0, 0, 0}, w16_status, w_iconv16, 0},
470 static int mimeout_mode = 0;
471 static int base64_count = 0;
473 /* X0208 -> ASCII converter */
476 static int f_line = 0; /* chars in line */
477 static int f_prev = 0;
478 static int fold_preserve_f = FALSE; /* preserve new lines */
479 static int fold_f = FALSE;
480 static int fold_len = 0;
483 static unsigned char kanji_intro = DEFAULT_J;
484 static unsigned char ascii_intro = DEFAULT_R;
488 #define FOLD_MARGIN 10
489 #define DEFAULT_FOLD 60
491 static int fold_margin = FOLD_MARGIN;
495 #ifdef DEFAULT_CODE_JIS
496 # define DEFAULT_CONV j_oconv
498 #ifdef DEFAULT_CODE_SJIS
499 # define DEFAULT_CONV s_oconv
501 #ifdef DEFAULT_CODE_EUC
502 # define DEFAULT_CONV e_oconv
504 #ifdef DEFAULT_CODE_UTF8
505 # define DEFAULT_CONV w_oconv
508 /* process default */
509 static void (*output_conv)PROTO((int c2,int c1)) = DEFAULT_CONV;
511 static void (*oconv)PROTO((int c2,int c1)) = no_connection;
512 /* s_iconv or oconv */
513 static int (*iconv)PROTO((int c2,int c1,int c0)) = no_connection2;
515 static void (*o_zconv)PROTO((int c2,int c1)) = no_connection;
516 static void (*o_fconv)PROTO((int c2,int c1)) = no_connection;
517 static void (*o_crconv)PROTO((int c2,int c1)) = no_connection;
518 static void (*o_rot_conv)PROTO((int c2,int c1)) = no_connection;
519 static void (*o_hira_conv)PROTO((int c2,int c1)) = no_connection;
520 static void (*o_base64conv)PROTO((int c2,int c1)) = no_connection;
521 static void (*o_iso2022jp_check_conv)PROTO((int c2,int c1)) = no_connection;
523 /* static redirections */
525 static void (*o_putc)PROTO((int c)) = std_putc;
527 static int (*i_getc)PROTO((FILE *f)) = std_getc; /* general input */
528 static int (*i_ungetc)PROTO((int c,FILE *f)) =std_ungetc;
530 static int (*i_bgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
531 static int (*i_bungetc)PROTO((int c ,FILE *f)) = std_ungetc;
533 static void (*o_mputc)PROTO((int c)) = std_putc ; /* output of mputc */
535 static int (*i_mgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
536 static int (*i_mungetc)PROTO((int c ,FILE *f)) = std_ungetc;
538 /* for strict mime */
539 static int (*i_mgetc_buf)PROTO((FILE *)) = std_getc; /* input of mgetc_buf */
540 static int (*i_mungetc_buf)PROTO((int c,FILE *f)) = std_ungetc;
543 static int output_mode = ASCII, /* output kanji mode */
544 input_mode = ASCII, /* input kanji mode */
545 shift_mode = FALSE; /* TRUE shift out, or X0201 */
546 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
548 /* X0201 / X0208 conversion tables */
550 /* X0201 kana conversion table */
553 unsigned char cv[]= {
554 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
555 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
556 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
557 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
558 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
559 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
560 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
561 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
562 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
563 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
564 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
565 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
566 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
567 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
568 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
569 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
573 /* X0201 kana conversion table for daguten */
576 unsigned char dv[]= {
577 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
578 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
579 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
580 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
581 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
582 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
583 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
584 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
585 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
586 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
587 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
588 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
589 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
590 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
591 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
592 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
595 /* X0201 kana conversion table for han-daguten */
598 unsigned char ev[]= {
599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
601 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
602 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
603 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
604 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
605 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
606 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
607 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
608 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
610 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
611 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
614 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
618 /* X0208 kigou conversion table */
619 /* 0x8140 - 0x819e */
621 unsigned char fv[] = {
623 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
624 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
625 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
626 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
627 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
628 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
629 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
630 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
631 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
632 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
633 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
634 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
640 static int file_out = FALSE;
642 static int overwrite = FALSE;
645 static int crmode_f = 0; /* CR, NL, CRLF */
646 #ifdef EASYWIN /*Easy Win */
647 static int end_check;
659 char *outfname = NULL;
662 #ifdef EASYWIN /*Easy Win */
663 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
666 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
667 cp = (unsigned char *)*argv;
672 if (pipe(fds) < 0 || (pid = fork()) < 0){
683 execvp(argv[1], &argv[1]);
697 if(x0201_f == WISH_TRUE)
698 x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
700 if (binmode_f == TRUE)
702 if (freopen("","wb",stdout) == NULL)
709 setbuf(stdout, (char *) NULL);
711 setvbuffer(stdout, stdobuf, IOBUF_SIZE);
714 if (binmode_f == TRUE)
716 if (freopen("","rb",stdin) == NULL) return (-1);
720 setvbuffer(stdin, stdibuf, IOBUF_SIZE);
724 kanji_convert(stdin);
725 if (guess_f) print_guessed_code(NULL);
730 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
739 /* reopen file for stdout */
740 if (file_out == TRUE) {
743 outfname = malloc(strlen(origfname)
744 + strlen(".nkftmpXXXXXX")
750 strcpy(outfname, origfname);
754 for (i = strlen(outfname); i; --i){
755 if (outfname[i - 1] == '/'
756 || outfname[i - 1] == '\\'){
762 strcat(outfname, "ntXXXXXX");
764 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC,
767 strcat(outfname, ".nkftmpXXXXXX");
768 fd = mkstemp(outfname);
771 || (fd_backup = dup(fileno(stdout))) < 0
772 || dup2(fd, fileno(stdout)) < 0
783 outfname = "nkf.out";
786 if(freopen(outfname, "w", stdout) == NULL) {
790 if (binmode_f == TRUE) {
792 if (freopen("","wb",stdout) == NULL)
799 if (binmode_f == TRUE)
801 if (freopen("","rb",fin) == NULL)
806 setvbuffer(fin, stdibuf, IOBUF_SIZE);
810 char *filename = NULL;
812 if (nfiles > 1) filename = origfname;
813 if (guess_f) print_guessed_code(filename);
819 #if defined(MSDOS) && !defined(__MINGW32__)
827 if (dup2(fd_backup, fileno(stdout)) < 0){
830 if (stat(origfname, &sb)) {
831 fprintf(stderr, "Can't stat %s\n", origfname);
833 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
834 if (chmod(outfname, sb.st_mode)) {
835 fprintf(stderr, "Can't set permission %s\n", outfname);
838 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
839 #if defined(MSDOS) && !defined(__MINGW32__)
840 tb[0] = tb[1] = sb.st_mtime;
841 if (utime(outfname, tb)) {
842 fprintf(stderr, "Can't set timestamp %s\n", outfname);
845 tb.actime = sb.st_atime;
846 tb.modtime = sb.st_mtime;
847 if (utime(outfname, &tb)) {
848 fprintf(stderr, "Can't set timestamp %s\n", outfname);
852 if (unlink(origfname)){
856 if (rename(outfname, origfname)) {
858 fprintf(stderr, "Can't rename %s to %s\n",
859 outfname, origfname);
867 #ifdef EASYWIN /*Easy Win */
868 if (file_out == FALSE)
869 scanf("%d",&end_check);
872 #else /* for Other OS */
873 if (file_out == TRUE)
903 {"katakana-hiragana","h3"},
910 #ifdef UTF8_OUTPUT_ENABLE
915 #ifdef UTF8_INPUT_ENABLE
917 {"utf16-input", "W16"},
926 #ifdef NUMCHAR_OPTION
927 {"numchar-input", ""},
933 #ifdef SHIFTJIS_CP932
943 static int option_mode = 0;
950 unsigned char *p = NULL;
962 case '-': /* literal options */
963 if (!*cp) { /* ignore the rest of arguments */
967 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
969 p = (unsigned char *)long_option[i].name;
970 for (j=0;*p && (*p != '=') && *p == cp[j];p++, j++);
978 cp = (unsigned char *)long_option[i].alias;
981 if (strcmp(long_option[i].name, "overwrite") == 0){
988 if (strcmp(long_option[i].name, "cap-input") == 0){
992 if (strcmp(long_option[i].name, "url-input") == 0){
997 #ifdef NUMCHAR_OPTION
998 if (strcmp(long_option[i].name, "numchar-input") == 0){
1004 if (strcmp(long_option[i].name, "no-output") == 0){
1008 if (strcmp(long_option[i].name, "debug") == 0){
1013 if (strcmp(long_option[i].name, "cp932") == 0){
1014 #ifdef SHIFTJIS_CP932
1018 #ifdef UTF8_OUTPUT_ENABLE
1019 ms_ucs_map_f = TRUE;
1023 if (strcmp(long_option[i].name, "no-cp932") == 0){
1024 #ifdef SHIFTJIS_CP932
1028 #ifdef UTF8_OUTPUT_ENABLE
1029 ms_ucs_map_f = FALSE;
1033 #ifdef SHIFTJIS_CP932
1034 if (strcmp(long_option[i].name, "cp932inv") == 0){
1041 if (strcmp(long_option[i].name, "x0212") == 0){
1048 if (strcmp(long_option[i].name, "exec-in") == 0){
1052 if (strcmp(long_option[i].name, "exec-out") == 0){
1057 #ifdef UTF8_OUTPUT_ENABLE
1058 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1059 ms_ucs_map_f = TRUE;
1063 if (strcmp(long_option[i].name, "prefix=") == 0){
1064 if (*p == '=' && ' ' < p[1] && p[1] < 128){
1065 for (i = 2; ' ' < p[i] && p[i] < 128; i++){
1066 prefix_table[p[i]] = p[1];
1073 case 'b': /* buffered mode */
1076 case 'u': /* non bufferd mode */
1079 case 't': /* transparent mode */
1082 case 'j': /* JIS output */
1084 output_conv = j_oconv;
1086 case 'e': /* AT&T EUC output */
1087 output_conv = e_oconv;
1089 case 's': /* SJIS output */
1090 output_conv = s_oconv;
1092 case 'l': /* ISO8859 Latin-1 support, no conversion */
1093 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
1094 input_f = LATIN1_INPUT;
1096 case 'i': /* Kanji IN ESC-$-@/B */
1097 if (*cp=='@'||*cp=='B')
1098 kanji_intro = *cp++;
1100 case 'o': /* ASCII IN ESC-(-J/B */
1101 if (*cp=='J'||*cp=='B'||*cp=='H')
1102 ascii_intro = *cp++;
1109 if ('9'>= *cp && *cp>='0')
1110 hira_f |= (*cp++ -'0');
1117 #if defined(MSDOS) || defined(__OS2__)
1132 #ifdef UTF8_OUTPUT_ENABLE
1133 case 'w': /* UTF-8 output */
1134 if ('1'== cp[0] && '6'==cp[1]) {
1135 output_conv = w_oconv16; cp+=2;
1137 unicode_bom_f=2; cp++;
1140 unicode_bom_f=1; cp++;
1142 } else if (cp[0] == 'B') {
1143 unicode_bom_f=2; cp++;
1145 unicode_bom_f=1; cp++;
1148 } else if (cp[0] == '8') {
1149 output_conv = w_oconv; cp++;
1152 unicode_bom_f=1; cp++;
1155 output_conv = w_oconv;
1158 #ifdef UTF8_INPUT_ENABLE
1159 case 'W': /* UTF-8 input */
1160 if ('1'== cp[0] && '6'==cp[1]) {
1161 input_f = UTF16LE_INPUT;
1164 } else if (cp[0] == 'B') {
1166 input_f = UTF16BE_INPUT;
1168 } else if (cp[0] == '8') {
1170 input_f = UTF8_INPUT;
1172 input_f = UTF8_INPUT;
1175 /* Input code assumption */
1176 case 'J': /* JIS input */
1177 case 'E': /* AT&T EUC input */
1178 input_f = JIS_INPUT;
1180 case 'S': /* MS Kanji input */
1181 input_f = SJIS_INPUT;
1182 if (x0201_f==NO_X0201) x0201_f=TRUE;
1184 case 'Z': /* Convert X0208 alphabet to asii */
1185 /* bit:0 Convert X0208
1186 bit:1 Convert Kankaku to one space
1187 bit:2 Convert Kankaku to two spaces
1188 bit:3 Convert HTML Entity
1190 if ('9'>= *cp && *cp>='0')
1191 alpha_f |= 1<<(*cp++ -'0');
1195 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
1196 x0201_f = FALSE; /* No X0201->X0208 conversion */
1198 ESC-(-I in JIS, EUC, MS Kanji
1199 SI/SO in JIS, EUC, MS Kanji
1200 SSO in EUC, JIS, not in MS Kanji
1201 MS Kanji (0xa0-0xdf)
1203 ESC-(-I in JIS (0x20-0x5f)
1204 SSO in EUC (0xa0-0xdf)
1205 0xa0-0xd in MS Kanji (0xa0-0xdf)
1208 case 'X': /* Assume X0201 kana */
1209 /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1212 case 'F': /* prserve new lines */
1213 fold_preserve_f = TRUE;
1214 case 'f': /* folding -f60 or -f */
1217 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1219 fold_len += *cp++ - '0';
1221 if (!(0<fold_len && fold_len<BUFSIZ))
1222 fold_len = DEFAULT_FOLD;
1226 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1228 fold_margin += *cp++ - '0';
1232 case 'm': /* MIME support */
1233 if (*cp=='B'||*cp=='Q') {
1234 mime_decode_mode = *cp++;
1235 mimebuf_f = FIXED_MIME;
1236 } else if (*cp=='N') {
1237 mime_f = TRUE; cp++;
1238 } else if (*cp=='S') {
1239 mime_f = STRICT_MIME; cp++;
1240 } else if (*cp=='0') {
1241 mime_f = FALSE; cp++;
1244 case 'M': /* MIME output */
1247 mimeout_f = FIXED_MIME; cp++;
1248 } else if (*cp=='Q') {
1250 mimeout_f = FIXED_MIME; cp++;
1255 case 'B': /* Broken JIS support */
1257 bit:1 allow any x on ESC-(-x or ESC-$-x
1258 bit:2 reset to ascii on NL
1260 if ('9'>= *cp && *cp>='0')
1261 broken_f |= 1<<(*cp++ -'0');
1266 case 'O':/* for Output file */
1270 case 'c':/* add cr code */
1273 case 'd':/* delete cr code */
1276 case 'I': /* ISO-2022-JP output */
1279 case 'L': /* line mode */
1280 if (*cp=='u') { /* unix */
1281 crmode_f = NL; cp++;
1282 } else if (*cp=='m') { /* mac */
1283 crmode_f = CR; cp++;
1284 } else if (*cp=='w') { /* windows */
1285 crmode_f = CRLF; cp++;
1286 } else if (*cp=='0') { /* no conversion */
1296 /* module muliple options in a string are allowed for Perl moudle */
1297 while(*cp && *cp!='-') cp++;
1301 /* bogus option but ignored */
1307 #ifdef ANSI_C_PROTOTYPE
1308 struct input_code * find_inputcode_byfunc(int (*iconv_func)(int c2,int c1,int c0))
1310 struct input_code * find_inputcode_byfunc(iconv_func)
1311 int (*iconv_func)();
1315 struct input_code *p = input_code_list;
1317 if (iconv_func == p->iconv_func){
1327 static int (*iconv_for_check)() = 0;
1330 #ifdef ANSI_C_PROTOTYPE
1331 void set_iconv(int f, int (*iconv_func)(int c2,int c1,int c0))
1333 void set_iconv(f, iconv_func)
1335 int (*iconv_func)();
1338 #ifdef INPUT_CODE_FIX
1346 #ifdef INPUT_CODE_FIX
1347 && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1353 if (estab_f && iconv_for_check != iconv){
1354 struct input_code *p = find_inputcode_byfunc(iconv);
1356 set_input_codename(p->name);
1357 debug(input_codename);
1359 iconv_for_check = iconv;
1364 #define SCORE_L2 (1) /*
\e$BBh
\e(B2
\e$B?e=`4A;z
\e(B */
1365 #define SCORE_KANA (SCORE_L2 << 1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
1366 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
1367 #ifdef SHIFTJIS_CP932
1368 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932
\e$B$K$h$kFI$_49$(
\e(B */
1369 #define SCORE_NO_EXIST (SCORE_CP932 << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1371 #define SCORE_NO_EXIST (SCORE_DEPEND << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1373 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME
\e$B$K$h$k;XDj
\e(B */
1374 #define SCORE_ERROR (SCORE_iMIME << 1) /*
\e$B%(%i!<
\e(B */
1376 #define SCORE_INIT (SCORE_iMIME)
1378 int score_table_A0[] = {
1381 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1382 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1385 int score_table_F0[] = {
1386 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1387 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1388 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1389 SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1392 void set_code_score(ptr, score)
1393 struct input_code *ptr;
1397 ptr->score |= score;
1401 void clr_code_score(ptr, score)
1402 struct input_code *ptr;
1406 ptr->score &= ~score;
1410 void code_score(ptr)
1411 struct input_code *ptr;
1413 int c2 = ptr->buf[0];
1414 int c1 = ptr->buf[1];
1416 set_code_score(ptr, SCORE_ERROR);
1417 }else if (c2 == SSO){
1418 set_code_score(ptr, SCORE_KANA);
1419 #ifdef UTF8_OUTPUT_ENABLE
1420 }else if (!e2w_conv(c2, c1)){
1421 set_code_score(ptr, SCORE_NO_EXIST);
1423 }else if ((c2 & 0x70) == 0x20){
1424 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1425 }else if ((c2 & 0x70) == 0x70){
1426 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1427 }else if ((c2 & 0x70) >= 0x50){
1428 set_code_score(ptr, SCORE_L2);
1432 void status_disable(ptr)
1433 struct input_code *ptr;
1438 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1441 void status_push_ch(ptr, c)
1442 struct input_code *ptr;
1445 ptr->buf[ptr->index++] = c;
1448 void status_clear(ptr)
1449 struct input_code *ptr;
1455 void status_reset(ptr)
1456 struct input_code *ptr;
1459 ptr->score = SCORE_INIT;
1462 void status_reinit(ptr)
1463 struct input_code *ptr;
1466 ptr->_file_stat = 0;
1469 void status_check(ptr, c)
1470 struct input_code *ptr;
1473 if (c <= DEL && estab_f){
1478 void s_status(ptr, c)
1479 struct input_code *ptr;
1484 status_check(ptr, c);
1489 #ifdef NUMCHAR_OPTION
1490 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1493 }else if (0xa1 <= c && c <= 0xdf){
1494 status_push_ch(ptr, SSO);
1495 status_push_ch(ptr, c);
1498 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
1500 status_push_ch(ptr, c);
1501 #ifdef SHIFTJIS_CP932
1503 && CP932_TABLE_BEGIN <= c && c <= CP932_TABLE_END){
1505 status_push_ch(ptr, c);
1506 #endif /* SHIFTJIS_CP932 */
1508 }else if (x0212_f && 0xf0 <= c && c <= 0xfc){
1510 status_push_ch(ptr, c);
1511 #endif /* X0212_ENABLE */
1513 status_disable(ptr);
1517 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1518 status_push_ch(ptr, c);
1519 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
1523 status_disable(ptr);
1527 #ifdef SHIFTJIS_CP932
1528 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1529 status_push_ch(ptr, c);
1530 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
1531 set_code_score(ptr, SCORE_CP932);
1536 #endif /* SHIFTJIS_CP932 */
1537 #ifndef X0212_ENABLE
1538 status_disable(ptr);
1544 void e_status(ptr, c)
1545 struct input_code *ptr;
1550 status_check(ptr, c);
1555 #ifdef NUMCHAR_OPTION
1556 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1559 }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
1561 status_push_ch(ptr, c);
1563 }else if (0x8f == c){
1565 status_push_ch(ptr, c);
1566 #endif /* X0212_ENABLE */
1568 status_disable(ptr);
1572 if (0xa1 <= c && c <= 0xfe){
1573 status_push_ch(ptr, c);
1577 status_disable(ptr);
1582 if (0xa1 <= c && c <= 0xfe){
1584 status_push_ch(ptr, c);
1586 status_disable(ptr);
1588 #endif /* X0212_ENABLE */
1592 #ifdef UTF8_INPUT_ENABLE
1593 void w16_status(ptr, c)
1594 struct input_code *ptr;
1601 if (ptr->_file_stat == 0){
1602 if (c == 0xfe || c == 0xff){
1604 status_push_ch(ptr, c);
1605 ptr->_file_stat = 1;
1607 status_disable(ptr);
1608 ptr->_file_stat = -1;
1610 }else if (ptr->_file_stat > 0){
1612 status_push_ch(ptr, c);
1613 }else if (ptr->_file_stat < 0){
1614 status_disable(ptr);
1620 status_disable(ptr);
1621 ptr->_file_stat = -1;
1623 status_push_ch(ptr, c);
1630 if (ptr->stat != c && (c == 0xfe || c == 0xff)){
1631 status_push_ch(ptr, c);
1634 status_disable(ptr);
1635 ptr->_file_stat = -1;
1641 void w_status(ptr, c)
1642 struct input_code *ptr;
1647 status_check(ptr, c);
1652 #ifdef NUMCHAR_OPTION
1653 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1656 }else if (0xc0 <= c && c <= 0xdf){
1658 status_push_ch(ptr, c);
1659 }else if (0xe0 <= c && c <= 0xef){
1661 status_push_ch(ptr, c);
1663 status_disable(ptr);
1668 if (0x80 <= c && c <= 0xbf){
1669 status_push_ch(ptr, c);
1670 if (ptr->index > ptr->stat){
1671 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
1672 && ptr->buf[2] == 0xbf);
1673 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
1674 &ptr->buf[0], &ptr->buf[1]);
1681 status_disable(ptr);
1692 int action_flag = 1;
1693 struct input_code *result = 0;
1694 struct input_code *p = input_code_list;
1696 (p->status_func)(p, c);
1699 }else if(p->stat == 0){
1710 if (result && !estab_f){
1711 set_iconv(TRUE, result->iconv_func);
1712 }else if (c <= DEL){
1713 struct input_code *ptr = input_code_list;
1722 #define STD_GC_BUFSIZE (256)
1723 int std_gc_buf[STD_GC_BUFSIZE];
1731 return std_gc_buf[--std_gc_ndx];
1741 if (std_gc_ndx == STD_GC_BUFSIZE){
1744 std_gc_buf[std_gc_ndx++] = c;
1762 while ((c = (*i_getc)(f)) != EOF)
1771 oconv = output_conv;
1774 /* replace continucation module, from output side */
1776 /* output redicrection */
1778 if (noout_f || guess_f){
1785 if (mimeout_f == TRUE) {
1786 o_base64conv = oconv; oconv = base64_conv;
1788 /* base64_count = 0; */
1792 o_crconv = oconv; oconv = cr_conv;
1795 o_rot_conv = oconv; oconv = rot_conv;
1798 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
1801 o_hira_conv = oconv; oconv = hira_conv;
1804 o_fconv = oconv; oconv = fold_conv;
1807 if (alpha_f || x0201_f) {
1808 o_zconv = oconv; oconv = z_conv;
1812 i_ungetc = std_ungetc;
1813 /* input redicrection */
1816 i_cgetc = i_getc; i_getc = cap_getc;
1817 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
1820 i_ugetc = i_getc; i_getc = url_getc;
1821 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
1824 #ifdef NUMCHAR_OPTION
1826 i_ngetc = i_getc; i_getc = numchar_getc;
1827 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
1830 if (mime_f && mimebuf_f==FIXED_MIME) {
1831 i_mgetc = i_getc; i_getc = mime_getc;
1832 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
1835 i_bgetc = i_getc; i_getc = broken_getc;
1836 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
1838 if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
1839 set_iconv(-TRUE, e_iconv);
1840 } else if (input_f == SJIS_INPUT) {
1841 set_iconv(-TRUE, s_iconv);
1842 #ifdef UTF8_INPUT_ENABLE
1843 } else if (input_f == UTF8_INPUT) {
1844 set_iconv(-TRUE, w_iconv);
1845 } else if (input_f == UTF16LE_INPUT) {
1846 set_iconv(-TRUE, w_iconv16);
1849 set_iconv(FALSE, e_iconv);
1853 struct input_code *p = input_code_list;
1861 Conversion main loop. Code detection only.
1871 module_connection();
1876 output_mode = ASCII;
1879 #define NEXT continue /* no output, get next */
1880 #define SEND ; /* output c1 and c2, get next */
1881 #define LAST break /* end of loop, go closing */
1883 while ((c1 = (*i_getc)(f)) != EOF) {
1888 /* in case of 8th bit is on */
1889 if (!estab_f&&!mime_decode_mode) {
1890 /* in case of not established yet */
1891 /* It is still ambiguious */
1892 if (h_conv(f, c2, c1)==EOF)
1898 /* in case of already established */
1900 /* ignore bogus code */
1906 /* second byte, 7 bit code */
1907 /* it might be kanji shitfted */
1908 if ((c1 == DEL) || (c1 <= SPACE)) {
1909 /* ignore bogus first code */
1917 #ifdef UTF8_INPUT_ENABLE
1926 #ifdef NUMCHAR_OPTION
1927 } else if ((c1 & CLASS_MASK) == CLASS_UTF16){
1930 } else if (c1 > DEL) {
1932 if (!estab_f && !iso8859_f) {
1933 /* not established yet */
1936 } else { /* estab_f==TRUE */
1941 } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
1942 /* SJIS X0201 Case... */
1943 if(iso2022jp_f && x0201_f==NO_X0201) {
1944 (*oconv)(GETA1, GETA2);
1951 } else if (c1==SSO && iconv != s_iconv) {
1952 /* EUC X0201 Case */
1953 c1 = (*i_getc)(f); /* skip SSO */
1955 if (SSP<=c1 && c1<0xe0) {
1956 if(iso2022jp_f && x0201_f==NO_X0201) {
1957 (*oconv)(GETA1, GETA2);
1964 } else { /* bogus code, skip SSO and one byte */
1968 /* already established */
1973 } else if ((c1 > SPACE) && (c1 != DEL)) {
1974 /* in case of Roman characters */
1976 /* output 1 shifted byte */
1980 } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
1981 /* output 1 shifted byte */
1982 if(iso2022jp_f && x0201_f==NO_X0201) {
1983 (*oconv)(GETA1, GETA2);
1990 /* look like bogus code */
1993 } else if (input_mode == X0208) {
1994 /* in case of Kanji shifted */
1997 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
1998 /* Check MIME code */
1999 if ((c1 = (*i_getc)(f)) == EOF) {
2002 } else if (c1 == '?') {
2003 /* =? is mime conversion start sequence */
2004 if(mime_f == STRICT_MIME) {
2005 /* check in real detail */
2006 if (mime_begin_strict(f) == EOF)
2010 } else if (mime_begin(f) == EOF)
2020 /* normal ASCII code */
2023 } else if (c1 == SI) {
2026 } else if (c1 == SO) {
2029 } else if (c1 == ESC ) {
2030 if ((c1 = (*i_getc)(f)) == EOF) {
2031 /* (*oconv)(0, ESC); don't send bogus code */
2033 } else if (c1 == '$') {
2034 if ((c1 = (*i_getc)(f)) == EOF) {
2036 (*oconv)(0, ESC); don't send bogus code
2037 (*oconv)(0, '$'); */
2039 } else if (c1 == '@'|| c1 == 'B') {
2040 /* This is kanji introduction */
2043 set_input_codename("ISO-2022-JP");
2044 debug(input_codename);
2046 } else if (c1 == '(') {
2047 if ((c1 = (*i_getc)(f)) == EOF) {
2048 /* don't send bogus code
2054 } else if (c1 == '@'|| c1 == 'B') {
2055 /* This is kanji introduction */
2060 } else if (c1 == 'D'){
2064 #endif /* X0212_ENABLE */
2066 /* could be some special code */
2073 } else if (broken_f&0x2) {
2074 /* accept any ESC-(-x as broken code ... */
2084 } else if (c1 == '(') {
2085 if ((c1 = (*i_getc)(f)) == EOF) {
2086 /* don't send bogus code
2088 (*oconv)(0, '('); */
2092 /* This is X0201 kana introduction */
2093 input_mode = X0201; shift_mode = X0201;
2095 } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2096 /* This is X0208 kanji introduction */
2097 input_mode = ASCII; shift_mode = FALSE;
2099 } else if (broken_f&0x2) {
2100 input_mode = ASCII; shift_mode = FALSE;
2105 /* maintain various input_mode here */
2109 } else if ( c1 == 'N' || c1 == 'n' ){
2111 c3 = (*i_getc)(f); /* skip SS2 */
2112 if ( (SPACE<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2127 } else if ((c1 == NL || c1 == CR) && broken_f&4) {
2128 input_mode = ASCII; set_iconv(FALSE, 0);
2131 } else if (c1 == NL && mime_f && !mime_decode_mode ) {
2132 if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2140 } else if (c1 == CR && mime_f && !mime_decode_mode ) {
2141 if ((c1=(*i_getc)(f))!=EOF) {
2145 } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2162 if (input_mode == X0208)
2163 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2165 else if (input_mode == X0212)
2166 (*oconv)((0x8f << 8) | c2, c1);
2167 #endif /* X0212_ENABLE */
2168 else if (input_mode)
2169 (*oconv)(input_mode, c1); /* other special case */
2170 else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */
2171 int c0 = (*i_getc)(f);
2174 (*iconv)(c2, c1, c0);
2180 /* goto next_word */
2184 (*iconv)(EOF, 0, 0);
2197 /** it must NOT be in the kanji shifte sequence */
2198 /** it must NOT be written in JIS7 */
2199 /** and it must be after 2 byte 8bit code */
2206 while ((c1 = (*i_getc)(f)) != EOF) {
2212 if (push_hold_buf(c1) == EOF || estab_f){
2218 struct input_code *p = input_code_list;
2219 struct input_code *result = p;
2224 if (p->score < result->score){
2229 set_iconv(FALSE, result->iconv_func);
2234 ** 1) EOF is detected, or
2235 ** 2) Code is established, or
2236 ** 3) Buffer is FULL (but last word is pushed)
2238 ** in 1) and 3) cases, we continue to use
2239 ** Kanji codes by oconv and leave estab_f unchanged.
2244 while (wc < hold_count){
2245 c2 = hold_buf[wc++];
2247 #ifdef NUMCHAR_OPTION
2248 || (c2 & CLASS_MASK) == CLASS_UTF16
2253 }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
2254 (*iconv)(X0201, c2, 0);
2257 if (wc < hold_count){
2258 c1 = hold_buf[wc++];
2267 if ((*iconv)(c2, c1, 0) < 0){
2269 if (wc < hold_count){
2270 c0 = hold_buf[wc++];
2279 (*iconv)(c2, c1, c0);
2292 if (hold_count >= HOLD_SIZE*2)
2294 hold_buf[hold_count++] = c2;
2295 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
2298 int s2e_conv(c2, c1, p2, p1)
2303 #ifdef SHIFTJIS_CP932
2304 if (cp932_f && CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){
2305 extern unsigned short shiftjis_cp932[3][189];
2306 val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
2312 #endif /* SHIFTJIS_CP932 */
2314 if (x0212_f && 0xfa <= c2 && c2 <= 0xfc){
2315 extern unsigned short shiftjis_x0212[3][189];
2316 val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
2319 c2 = (0x8f << 8) | (val >> 8);
2331 c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
2333 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
2341 c2 = x0212_unshift(c2);
2356 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2359 int ret = s2e_conv(c2, c1, &c2, &c1);
2360 if (ret) return ret;
2374 }else if (c2 == 0x8f){
2378 c2 = (c2 << 8) | (c1 & 0x7f);
2380 #ifdef SHIFTJIS_CP932
2383 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2384 s2e_conv(s2, s1, &c2, &c1);
2385 if ((c2 & 0xff00) == 0){
2391 #endif /* SHIFTJIS_CP932 */
2392 #endif /* X0212_ENABLE */
2393 } else if (c2 == SSO){
2396 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2406 #ifdef UTF8_INPUT_ENABLE
2408 w2e_conv(c2, c1, c0, p2, p1)
2412 extern unsigned short * utf8_to_euc_2bytes[];
2413 extern unsigned short ** utf8_to_euc_3bytes[];
2416 if (0xc0 <= c2 && c2 <= 0xef) {
2417 unsigned short **pp;
2420 if (c0 == 0) return -1;
2421 pp = utf8_to_euc_3bytes[c2 - 0x80];
2422 ret = w_iconv_common(c1, c0, pp, sizeof_utf8_to_euc_C2, p2, p1);
2424 ret = w_iconv_common(c2, c1, utf8_to_euc_2bytes, sizeof_utf8_to_euc_2bytes, p2, p1);
2426 #ifdef NUMCHAR_OPTION
2429 if (p1) *p1 = CLASS_UTF16 | ww16_conv(c2, c1, c0);
2434 } else if (c2 == X0201) {
2447 int ret = w2e_conv(c2, c1, c0, &c2, &c1);
2455 w16w_conv(val, p2, p1, p0)
2463 }else if (val < 0x800){
2464 *p2 = 0xc0 | (val >> 6);
2465 *p1 = 0x80 | (val & 0x3f);
2468 *p2 = 0xe0 | (val >> 12);
2469 *p1 = 0x80 | ((val >> 6) & 0x3f);
2470 *p0 = 0x80 | (val & 0x3f);
2475 ww16_conv(c2, c1, c0)
2480 val = (c2 & 0x0f) << 12;
2481 val |= (c1 & 0x3f) << 6;
2483 }else if (c2 >= 0xc0){
2484 val = (c2 & 0x1f) << 6;
2493 w16e_conv(val, p2, p1)
2497 extern unsigned short * utf8_to_euc_2bytes[];
2498 extern unsigned short ** utf8_to_euc_3bytes[];
2500 unsigned short **pp;
2504 w16w_conv(val, &c2, &c1, &c0);
2507 pp = utf8_to_euc_3bytes[c2 - 0x80];
2508 psize = sizeof_utf8_to_euc_C2;
2509 ret = w_iconv_common(c1, c0, pp, psize, p2, p1);
2511 pp = utf8_to_euc_2bytes;
2512 psize = sizeof_utf8_to_euc_2bytes;
2513 ret = w_iconv_common(c2, c1, pp, psize, p2, p1);
2515 #ifdef NUMCHAR_OPTION
2518 *p1 = CLASS_UTF16 | val;
2530 w_iconv16(c2, c1, c0)
2535 if (c2==0376 && c1==0377){
2536 utf16_mode = UTF16LE_INPUT;
2538 } else if (c2==0377 && c1==0376){
2539 utf16_mode = UTF16BE_INPUT;
2542 if (c2 != EOF && utf16_mode == UTF16BE_INPUT) {
2544 tmp=c1; c1=c2; c2=tmp;
2546 if ((c2==0 && c1 < 0x80) || c2==EOF) {
2550 ret = w16e_conv(((c2<<8)&0xff00) + c1, &c2, &c1);
2551 if (ret) return ret;
2557 w_iconv_common(c1, c0, pp, psize, p2, p1)
2559 unsigned short **pp;
2567 if (pp == 0) return 1;
2570 if (c1 < 0 || psize <= c1) return 1;
2572 if (p == 0) return 1;
2575 if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1;
2577 if (val == 0) return 1;
2584 if (c2 == SO) c2 = X0201;
2593 #ifdef UTF8_OUTPUT_ENABLE
2598 extern unsigned short euc_to_utf8_1byte[];
2599 extern unsigned short * euc_to_utf8_2bytes[];
2600 extern unsigned short * euc_to_utf8_2bytes_ms[];
2604 p = euc_to_utf8_1byte;
2606 } else if (c2 >> 8 == 0x8f){
2607 extern unsigned short * x0212_to_utf8_2bytes[];
2608 c2 = (c2&0x7f) - 0x21;
2609 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2610 p = x0212_to_utf8_2bytes[c2];
2616 c2 = (c2&0x7f) - 0x21;
2617 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2618 p = ms_ucs_map_f ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
2623 c1 = (c1 & 0x7f) - 0x21;
2624 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
2640 if (unicode_bom_f==2) {
2647 #ifdef NUMCHAR_OPTION
2648 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2649 w16w_conv(c1, &c2, &c1, &c0);
2653 if (c0) (*o_putc)(c0);
2660 output_mode = ASCII;
2662 } else if (c2 == ISO8859_1) {
2663 output_mode = ISO8859_1;
2664 (*o_putc)(c1 | 0x080);
2668 val = e2w_conv(c2, c1);
2670 w16w_conv(val, &c2, &c1, &c0);
2674 if (c0) (*o_putc)(c0);
2690 if (unicode_bom_f==2) {
2692 (*o_putc)((unsigned char)'\377');
2696 (*o_putc)((unsigned char)'\377');
2701 if (c2 == ISO8859_1) {
2704 #ifdef NUMCHAR_OPTION
2705 } else if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16) {
2706 c2 = (c1 >> 8) & 0xff;
2710 unsigned short val = e2w_conv(c2, c1);
2711 c2 = (val >> 8) & 0xff;
2730 #ifdef NUMCHAR_OPTION
2731 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2732 w16e_conv(c1, &c2, &c1);
2733 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2741 } else if (c2 == 0) {
2742 output_mode = ASCII;
2744 } else if (c2 == X0201) {
2745 output_mode = JAPANESE_EUC;
2746 (*o_putc)(SSO); (*o_putc)(c1|0x80);
2747 } else if (c2 == ISO8859_1) {
2748 output_mode = ISO8859_1;
2749 (*o_putc)(c1 | 0x080);
2751 } else if ((c2 & 0xff00) >> 8 == 0x8f){
2752 output_mode = JAPANESE_EUC;
2753 #ifdef SHIFTJIS_CP932
2756 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2757 s2e_conv(s2, s1, &c2, &c1);
2761 if ((c2 & 0xff00) >> 8 == 0x8f){
2764 (*o_putc)((c2 & 0x7f) | 0x080);
2765 (*o_putc)(c1 | 0x080);
2768 (*o_putc)((c2 & 0x7f) | 0x080);
2769 (*o_putc)(c1 | 0x080);
2773 if ((c1<0x21 || 0x7e<c1) ||
2774 (c2<0x21 || 0x7e<c2)) {
2775 set_iconv(FALSE, 0);
2776 return; /* too late to rescue this char */
2778 output_mode = JAPANESE_EUC;
2779 (*o_putc)(c2 | 0x080);
2780 (*o_putc)(c1 | 0x080);
2790 if ((ret & 0xff00) == 0x8f00){
2791 if (0x75 <= c && c <= 0x7f){
2792 ret = c + (0x109 - 0x75);
2795 if (0x75 <= c && c <= 0x7f){
2796 ret = c + (0x113 - 0x75);
2803 int x0212_unshift(c)
2807 if (0x7f <= c && c <= 0x88){
2808 ret = c + (0x75 - 0x7f);
2809 }else if (0x89 <= c && c <= 0x92){
2810 ret = (0x8f << 8) | 0x80 | (c + (0x75 - 0x89));
2814 #endif /* X0212_ENABLE */
2817 e2s_conv(c2, c1, p2, p1)
2818 int c2, c1, *p2, *p1;
2821 unsigned short *ptr;
2823 extern unsigned short *x0212_shiftjis[];
2825 if ((c2 & 0xff00) == 0x8f00){
2827 if (0x21 <= ndx && ndx <= 0x7e){
2828 ptr = x0212_shiftjis[ndx - 0x21];
2830 val = ptr[(c1 & 0x7f) - 0x21];
2840 c2 = x0212_shift(c2);
2842 #endif /* X0212_ENABLE */
2843 if ((c2 & 0xff00) == 0x8f00){
2846 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
2847 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
2856 #ifdef NUMCHAR_OPTION
2857 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2858 w16e_conv(c1, &c2, &c1);
2864 } else if (c2 == 0) {
2865 output_mode = ASCII;
2867 } else if (c2 == X0201) {
2868 output_mode = SHIFT_JIS;
2870 } else if (c2 == ISO8859_1) {
2871 output_mode = ISO8859_1;
2872 (*o_putc)(c1 | 0x080);
2874 } else if ((c2 & 0xff00) >> 8 == 0x8f){
2875 output_mode = SHIFT_JIS;
2876 if (e2s_conv(c2, c1, &c2, &c1) == 0){
2882 if ((c1<0x20 || 0x7e<c1) ||
2883 (c2<0x20 || 0x7e<c2)) {
2884 set_iconv(FALSE, 0);
2885 return; /* too late to rescue this char */
2887 output_mode = SHIFT_JIS;
2888 e2s_conv(c2, c1, &c2, &c1);
2890 #ifdef SHIFTJIS_CP932
2892 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2893 extern unsigned short cp932inv[2][189];
2894 int c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
2900 #endif /* SHIFTJIS_CP932 */
2903 if (prefix_table[(unsigned char)c1]){
2904 (*o_putc)(prefix_table[(unsigned char)c1]);
2915 #ifdef NUMCHAR_OPTION
2916 if ((c1 & CLASS_MASK) == CLASS_UTF16){
2917 w16e_conv(c1, &c2, &c1);
2921 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2924 (*o_putc)(ascii_intro);
2925 output_mode = ASCII;
2929 } else if ((c2 & 0xff00) >> 8 == 0x8f){
2930 if (output_mode!=X0212) {
2931 output_mode = X0212;
2937 (*o_putc)(c2 & 0x7f);
2940 } else if (c2==X0201) {
2941 if (output_mode!=X0201) {
2942 output_mode = X0201;
2948 } else if (c2==ISO8859_1) {
2949 /* iso8859 introduction, or 8th bit on */
2950 /* Can we convert in 7bit form using ESC-'-'-A ?
2952 output_mode = ISO8859_1;
2954 } else if (c2 == 0) {
2955 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2958 (*o_putc)(ascii_intro);
2959 output_mode = ASCII;
2963 if (output_mode != X0208) {
2964 output_mode = X0208;
2967 (*o_putc)(kanji_intro);
2969 if (c1<0x20 || 0x7e<c1)
2971 if (c2<0x20 || 0x7e<c2)
2983 mime_prechar(c2, c1);
2984 (*o_base64conv)(c2,c1);
2988 static int broken_buf[3];
2989 static int broken_counter = 0;
2990 static int broken_last = 0;
2997 if (broken_counter>0) {
2998 return broken_buf[--broken_counter];
3001 if (c=='$' && broken_last != ESC
3002 && (input_mode==ASCII || input_mode==X0201)) {
3005 if (c1=='@'|| c1=='B') {
3006 broken_buf[0]=c1; broken_buf[1]=c;
3013 } else if (c=='(' && broken_last != ESC
3014 && (input_mode==X0208 || input_mode==X0201)) { /* ) */
3017 if (c1=='J'|| c1=='B') {
3018 broken_buf[0]=c1; broken_buf[1]=c;
3036 if (broken_counter<2)
3037 broken_buf[broken_counter++]=c;
3041 static int prev_cr = 0;
3049 if (! (c2==0&&c1==NL) ) {
3055 } else if (c1=='\r') {
3057 } else if (c1=='\n') {
3058 if (crmode_f==CRLF) {
3059 (*o_crconv)(0,'\r');
3060 } else if (crmode_f==CR) {
3061 (*o_crconv)(0,'\r');
3065 } else if (c1!='\032' || crmode_f!=NL){
3071 Return value of fold_conv()
3073 \n add newline and output char
3074 \r add newline and output nothing
3077 1 (or else) normal output
3079 fold state in prev (previous character)
3081 >0x80 Japanese (X0208/X0201)
3086 This fold algorthm does not preserve heading space in a line.
3087 This is the main difference from fmt.
3090 #define char_size(c2,c1) (c2?2:1)
3099 if (c1== '\r' && !fold_preserve_f) {
3100 fold_state=0; /* ignore cr */
3101 }else if (c1== '\n'&&f_prev=='\r' && fold_preserve_f) {
3103 fold_state=0; /* ignore cr */
3104 } else if (c1== BS) {
3105 if (f_line>0) f_line--;
3107 } else if (c2==EOF && f_line != 0) { /* close open last line */
3109 } else if ((c1=='\n' && !fold_preserve_f)
3110 || ((c1=='\r'||(c1=='\n'&&f_prev!='\r'))
3111 && fold_preserve_f)) {
3113 if (fold_preserve_f) {
3117 } else if ((f_prev == c1 && !fold_preserve_f)
3118 || (f_prev == '\n' && fold_preserve_f)
3119 ) { /* duplicate newline */
3122 fold_state = '\n'; /* output two newline */
3128 if (f_prev&0x80) { /* Japanese? */
3130 fold_state = 0; /* ignore given single newline */
3131 } else if (f_prev==' ') {
3135 if (++f_line<=fold_len)
3139 fold_state = '\r'; /* fold and output nothing */
3143 } else if (c1=='\f') {
3148 fold_state = '\n'; /* output newline and clear */
3149 } else if ( (c2==0 && c1==' ')||
3150 (c2==0 && c1=='\t')||
3151 (c2=='!'&& c1=='!')) {
3152 /* X0208 kankaku or ascii space */
3153 if (f_prev == ' ') {
3154 fold_state = 0; /* remove duplicate spaces */
3157 if (++f_line<=fold_len)
3158 fold_state = ' '; /* output ASCII space only */
3160 f_prev = ' '; f_line = 0;
3161 fold_state = '\r'; /* fold and output nothing */
3165 prev0 = f_prev; /* we still need this one... , but almost done */
3167 if (c2 || c2==X0201)
3168 f_prev |= 0x80; /* this is Japanese */
3169 f_line += char_size(c2,c1);
3170 if (f_line<=fold_len) { /* normal case */
3173 if (f_line>=fold_len+fold_margin) { /* too many kinsou suspension */
3174 f_line = char_size(c2,c1);
3175 fold_state = '\n'; /* We can't wait, do fold now */
3176 } else if (c2==X0201) {
3177 /* simple kinsoku rules return 1 means no folding */
3178 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
3179 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
3180 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
3181 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
3182 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
3183 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
3184 else if (SPACE<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
3186 fold_state = '\n';/* add one new f_line before this character */
3189 fold_state = '\n';/* add one new f_line before this character */
3192 /* kinsoku point in ASCII */
3193 if ( c1==')'|| /* { [ ( */
3204 /* just after special */
3205 } else if (!is_alnum(prev0)) {
3206 f_line = char_size(c2,c1);
3208 } else if ((prev0==' ') || /* ignored new f_line */
3209 (prev0=='\n')|| /* ignored new f_line */
3210 (prev0&0x80)) { /* X0208 - ASCII */
3211 f_line = char_size(c2,c1);
3212 fold_state = '\n';/* add one new f_line before this character */
3214 fold_state = 1; /* default no fold in ASCII */
3218 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
3219 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
3220 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
3221 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
3222 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
3223 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
3224 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
3225 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
3226 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
3227 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
3228 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
3229 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
3230 /* default no fold in kinsoku */
3233 f_line = char_size(c2,c1);
3234 /* add one new f_line before this character */
3237 f_line = char_size(c2,c1);
3239 /* add one new f_line before this character */
3244 /* terminator process */
3245 switch(fold_state) {
3264 int z_prev2=0,z_prev1=0;
3271 /* if (c2) c1 &= 0x7f; assertion */
3273 if (x0201_f && z_prev2==X0201) { /* X0201 */
3274 if (c1==(0xde&0x7f)) { /*
\e$BByE@
\e(B */
3276 (*o_zconv)(dv[(z_prev1-SPACE)*2],dv[(z_prev1-SPACE)*2+1]);
3278 } else if (c1==(0xdf&0x7f)&&ev[(z_prev1-SPACE)*2]) { /*
\e$BH>ByE@
\e(B */
3280 (*o_zconv)(ev[(z_prev1-SPACE)*2],ev[(z_prev1-SPACE)*2+1]);
3284 (*o_zconv)(cv[(z_prev1-SPACE)*2],cv[(z_prev1-SPACE)*2+1]);
3293 if (x0201_f && c2==X0201) {
3294 if (dv[(c1-SPACE)*2]||ev[(c1-SPACE)*2]) {
3295 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
3296 z_prev1 = c1; z_prev2 = c2;
3299 (*o_zconv)(cv[(c1-SPACE)*2],cv[(c1-SPACE)*2+1]);
3304 /* JISX0208 Alphabet */
3305 if (alpha_f && c2 == 0x23 ) {
3307 } else if (alpha_f && c2 == 0x21 ) {
3308 /* JISX0208 Kigou */
3313 } else if (alpha_f&0x4) {
3318 } else if (0x20<c1 && c1<0x7f && fv[c1-0x20]) {
3324 case '>': entity = ">"; break;
3325 case '<': entity = "<"; break;
3326 case '\"': entity = """; break;
3327 case '&': entity = "&"; break;
3330 while (*entity) (*o_zconv)(0, *entity++);
3340 #define rot13(c) ( \
3342 (c <= 'M') ? (c + 13): \
3343 (c <= 'Z') ? (c - 13): \
3345 (c <= 'm') ? (c + 13): \
3346 (c <= 'z') ? (c - 13): \
3350 #define rot47(c) ( \
3352 ( c <= 'O' ) ? (c + 47) : \
3353 ( c <= '~' ) ? (c - 47) : \
3361 if (c2==0 || c2==X0201 || c2==ISO8859_1) {
3367 (*o_rot_conv)(c2,c1);
3374 if ((hira_f & 1) && c2==0x25 && 0x20<c1 && c1<0x74) {
3376 } else if ((hira_f & 2) && c2==0x24 && 0x20<c1 && c1<0x74) {
3379 (*o_hira_conv)(c2,c1);
3384 iso2022jp_check_conv(c2,c1)
3387 static int range[RANGE_NUM_MAX][2] = {
3410 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
3414 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
3419 for (i = 0; i < RANGE_NUM_MAX; i++) {
3420 start = range[i][0];
3423 if (c >= start && c <= end) {
3428 (*o_iso2022jp_check_conv)(c2,c1);
3432 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
3434 unsigned char *mime_pattern[] = {
3435 (unsigned char *)"\075?EUC-JP?B?",
3436 (unsigned char *)"\075?SHIFT_JIS?B?",
3437 (unsigned char *)"\075?ISO-8859-1?Q?",
3438 (unsigned char *)"\075?ISO-8859-1?B?",
3439 (unsigned char *)"\075?ISO-2022-JP?B?",
3440 (unsigned char *)"\075?ISO-2022-JP?Q?",
3441 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3442 (unsigned char *)"\075?UTF-8?B?",
3443 (unsigned char *)"\075?UTF-8?Q?",
3445 (unsigned char *)"\075?US-ASCII?Q?",
3450 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
3451 int (*mime_priority_func[])PROTO((int c2, int c1, int c0)) = {
3452 e_iconv, s_iconv, 0, 0, 0, 0,
3453 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3459 int mime_encode[] = {
3460 JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201,
3461 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3468 int mime_encode_method[] = {
3469 'B', 'B','Q', 'B', 'B', 'Q',
3470 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3478 #define MAXRECOVER 20
3480 /* I don't trust portablity of toupper */
3481 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
3482 #define nkf_isdigit(c) ('0'<=c && c<='9')
3483 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
3484 #define nkf_isblank(c) (c == SPACE || c == TAB)
3485 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == NL)
3486 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
3487 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
3492 if (i_getc!=mime_getc) {
3493 i_mgetc = i_getc; i_getc = mime_getc;
3494 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
3495 if(mime_f==STRICT_MIME) {
3496 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
3497 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
3503 unswitch_mime_getc()
3505 if(mime_f==STRICT_MIME) {
3506 i_mgetc = i_mgetc_buf;
3507 i_mungetc = i_mungetc_buf;
3510 i_ungetc = i_mungetc;
3514 mime_begin_strict(f)
3519 unsigned char *p,*q;
3520 int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
3522 mime_decode_mode = FALSE;
3523 /* =? has been checked */
3525 p = mime_pattern[j];
3528 for(i=2;p[i]>' ';i++) { /* start at =? */
3529 if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) {
3530 /* pattern fails, try next one */
3532 while ((p = mime_pattern[++j])) {
3533 for(k=2;k<i;k++) /* assume length(p) > i */
3534 if (p[k]!=q[k]) break;
3535 if (k==i && nkf_toupper(c1)==p[k]) break;
3537 if (p) continue; /* found next one, continue */
3538 /* all fails, output from recovery buffer */
3546 mime_decode_mode = p[i-2];
3548 set_iconv(FALSE, mime_priority_func[j]);
3549 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
3551 if (mime_decode_mode=='B') {
3552 mimebuf_f = unbuf_f;
3554 /* do MIME integrity check */
3555 return mime_integrity(f,mime_pattern[j]);
3567 /* we don't keep eof of Fifo, becase it contains ?= as
3568 a terminator. It was checked in mime_integrity. */
3569 return ((mimebuf_f)?
3570 (*i_mgetc_buf)(f):Fifo(mime_input++));
3574 mime_ungetc_buf(c,f)
3579 (*i_mungetc_buf)(c,f);
3581 Fifo(--mime_input)=c;
3592 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
3593 /* re-read and convert again from mime_buffer. */
3595 /* =? has been checked */
3597 Fifo(mime_last++)='='; Fifo(mime_last++)='?';
3598 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
3599 /* We accept any character type even if it is breaked by new lines */
3600 c1 = (*i_getc)(f); Fifo(mime_last++)= c1 ;
3601 if (c1=='\n'||c1==' '||c1=='\r'||
3602 c1=='-'||c1=='_'||is_alnum(c1) ) continue;
3604 /* Failed. But this could be another MIME preemble */
3612 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3613 if (!(++i<MAXRECOVER) || c1==EOF) break;
3614 if (c1=='b'||c1=='B') {
3615 mime_decode_mode = 'B';
3616 } else if (c1=='q'||c1=='Q') {
3617 mime_decode_mode = 'Q';
3621 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3622 if (!(++i<MAXRECOVER) || c1==EOF) break;
3624 mime_decode_mode = FALSE;
3630 if (!mime_decode_mode) {
3631 /* false MIME premble, restart from mime_buffer */
3632 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
3633 /* Since we are in MIME mode until buffer becomes empty, */
3634 /* we never go into mime_begin again for a while. */
3637 /* discard mime preemble, and goto MIME mode */
3639 /* do no MIME integrity check */
3640 return c1; /* used only for checking EOF */
3655 fprintf(stderr, "%s\n", str);
3661 set_input_codename (codename)
3666 strcmp(codename, "") != 0 &&
3667 strcmp(codename, input_codename) != 0)
3669 is_inputcode_mixed = TRUE;
3671 input_codename = codename;
3672 is_inputcode_set = TRUE;
3676 print_guessed_code (filename)
3679 char *codename = "BINARY";
3680 if (!is_inputcode_mixed) {
3681 if (strcmp(input_codename, "") == 0) {
3684 codename = input_codename;
3687 if (filename != NULL) printf("%s:", filename);
3688 printf("%s\n", codename);
3695 if (nkf_isdigit(x)) return x - '0';
3696 return nkf_toupper(x) - 'A' + 10;
3701 #ifdef ANSI_C_PROTOTYPE
3702 int hex_getc(int ch, FILE *f, int (*g)(FILE *f), int (*u)(int c, FILE *f))
3705 hex_getc(ch, f, g, u)
3718 if (!nkf_isxdigit(c2)){
3723 if (!nkf_isxdigit(c3)){
3728 return (hex2bin(c2) << 4) | hex2bin(c3);
3735 return hex_getc(':', f, i_cgetc, i_cungetc);
3743 return (*i_cungetc)(c, f);
3750 return hex_getc('%', f, i_ugetc, i_uungetc);
3758 return (*i_uungetc)(c, f);
3762 #ifdef NUMCHAR_OPTION
3767 int (*g)() = i_ngetc;
3768 int (*u)() = i_nungetc;
3779 if (buf[i] == 'x' || buf[i] == 'X'){
3780 for (j = 0; j < 5; j++){
3782 if (!nkf_isxdigit(buf[i])){
3789 c |= hex2bin(buf[i]);
3792 for (j = 0; j < 6; j++){
3796 if (!nkf_isdigit(buf[i])){
3803 c += hex2bin(buf[i]);
3809 return CLASS_UTF16 | c;
3819 numchar_ungetc(c, f)
3823 return (*i_nungetc)(c, f);
3832 int c1, c2, c3, c4, cc;
3833 int t1, t2, t3, t4, mode, exit_mode;
3837 int lwsp_size = 128;
3839 if (mime_top != mime_last) { /* Something is in FIFO */
3840 return Fifo(mime_top++);
3842 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
3843 mime_decode_mode=FALSE;
3844 unswitch_mime_getc();
3845 return (*i_getc)(f);
3848 if (mimebuf_f == FIXED_MIME)
3849 exit_mode = mime_decode_mode;
3852 if (mime_decode_mode == 'Q') {
3853 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3855 if (c1=='_') return ' ';
3856 if (c1!='=' && c1!='?') {
3860 mime_decode_mode = exit_mode; /* prepare for quit */
3861 if (c1<=' ') return c1;
3862 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
3863 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
3864 /* end Q encoding */
3865 input_mode = exit_mode;
3867 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
3868 if (lwsp_buf==NULL) {
3869 perror("can't malloc");
3872 while ((c1=(*i_getc)(f))!=EOF) {
3877 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
3885 if ((c1=(*i_getc)(f))!=EOF && c1 == NL) {
3886 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
3901 lwsp_buf[lwsp_count] = c1;
3902 if (lwsp_count++>lwsp_size){
3904 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
3905 if (lwsp_buf_new==NULL) {
3908 perror("can't realloc");
3911 lwsp_buf = lwsp_buf_new;
3917 if (lwsp_count > 0) {
3918 if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) {
3922 for(lwsp_count--;lwsp_count>0;lwsp_count--)
3923 i_ungetc(lwsp_buf[lwsp_count],f);
3931 if (c1=='='&&c2<' ') { /* this is soft wrap */
3932 while((c1 = (*i_mgetc)(f)) <=' ') {
3933 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3935 mime_decode_mode = 'Q'; /* still in MIME */
3936 goto restart_mime_q;
3939 mime_decode_mode = 'Q'; /* still in MIME */
3943 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
3944 if (c2<=' ') return c2;
3945 mime_decode_mode = 'Q'; /* still in MIME */
3946 #define hex(c) (('0'<=c&&c<='9')?(c-'0'):\
3947 ('A'<=c&&c<='F')?(c-'A'+10):('a'<=c&&c<='f')?(c-'a'+10):0)
3948 return ((hex(c2)<<4) + hex(c3));
3951 if (mime_decode_mode != 'B') {
3952 mime_decode_mode = FALSE;
3953 return (*i_mgetc)(f);
3957 /* Base64 encoding */
3959 MIME allows line break in the middle of
3960 Base64, but we are very pessimistic in decoding
3961 in unbuf mode because MIME encoded code may broken by
3962 less or editor's control sequence (such as ESC-[-K in unbuffered
3963 mode. ignore incomplete MIME.
3965 mode = mime_decode_mode;
3966 mime_decode_mode = exit_mode; /* prepare for quit */
3968 while ((c1 = (*i_mgetc)(f))<=' ') {
3973 if ((c2 = (*i_mgetc)(f))<=' ') {
3976 if (mime_f != STRICT_MIME) goto mime_c2_retry;
3977 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3980 if ((c1 == '?') && (c2 == '=')) {
3983 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
3984 if (lwsp_buf==NULL) {
3985 perror("can't malloc");
3988 while ((c1=(*i_getc)(f))!=EOF) {
3993 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4001 if ((c1=(*i_getc)(f))!=EOF) {
4005 } else if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4020 lwsp_buf[lwsp_count] = c1;
4021 if (lwsp_count++>lwsp_size){
4023 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4024 if (lwsp_buf_new==NULL) {
4027 perror("can't realloc");
4030 lwsp_buf = lwsp_buf_new;
4036 if (lwsp_count > 0) {
4037 if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) {
4041 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4042 i_ungetc(lwsp_buf[lwsp_count],f);
4051 if ((c3 = (*i_mgetc)(f))<=' ') {
4054 if (mime_f != STRICT_MIME) goto mime_c3_retry;
4055 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4059 if ((c4 = (*i_mgetc)(f))<=' ') {
4062 if (mime_f != STRICT_MIME) goto mime_c4_retry;
4063 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4067 mime_decode_mode = mode; /* still in MIME sigh... */
4069 /* BASE 64 decoding */
4071 t1 = 0x3f & base64decode(c1);
4072 t2 = 0x3f & base64decode(c2);
4073 t3 = 0x3f & base64decode(c3);
4074 t4 = 0x3f & base64decode(c4);
4075 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
4077 Fifo(mime_last++) = cc;
4078 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
4080 Fifo(mime_last++) = cc;
4081 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
4083 Fifo(mime_last++) = cc;
4088 return Fifo(mime_top++);
4096 Fifo(--mime_top) = c;
4107 /* In buffered mode, read until =? or NL or buffer full
4109 mime_input = mime_top;
4110 mime_last = mime_top;
4111 while(*p) Fifo(mime_input++) = *p++;
4114 while((c=(*i_getc)(f))!=EOF) {
4115 if (((mime_input-mime_top)&MIME_BUF_MASK)==0) {
4116 break; /* buffer full */
4118 if (c=='=' && d=='?') {
4119 /* checked. skip header, start decode */
4120 Fifo(mime_input++) = c;
4121 /* mime_last_input = mime_input; */
4126 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
4128 /* Should we check length mod 4? */
4129 Fifo(mime_input++) = c;
4132 /* In case of Incomplete MIME, no MIME decode */
4133 Fifo(mime_input++) = c;
4134 mime_last = mime_input; /* point undecoded buffer */
4135 mime_decode_mode = 1; /* no decode on Fifo last in mime_getc */
4136 switch_mime_getc(); /* anyway we need buffered getc */
4147 i = c - 'A'; /* A..Z 0-25 */
4149 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
4151 } else if (c > '/') {
4152 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
4153 } else if (c == '+') {
4154 i = '>' /* 62 */ ; /* + 62 */
4156 i = '?' /* 63 */ ; /* / 63 */
4161 static char basis_64[] =
4162 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
4165 #define MIMEOUT_BUF_LENGTH (60)
4166 char mimeout_buf[MIMEOUT_BUF_LENGTH+1];
4167 int mimeout_buf_count = 0;
4168 int mimeout_preserve_space = 0;
4169 #define itoh4(c) (c>=10?c+'A'-10:c+'0')
4178 p = mime_pattern[0];
4179 for(i=0;mime_encode[i];i++) {
4180 if (mode == mime_encode[i]) {
4181 p = mime_pattern[i];
4185 mimeout_mode = mime_encode_method[i];
4188 if (base64_count>45) {
4192 if (!mimeout_preserve_space && mimeout_buf_count>0
4193 && (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
4194 || mimeout_buf[i]==CR || mimeout_buf[i]==NL )) {
4198 if (!mimeout_preserve_space) {
4199 for (;i<mimeout_buf_count;i++) {
4200 if (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
4201 || mimeout_buf[i]==CR || mimeout_buf[i]==NL ) {
4202 (*o_mputc)(mimeout_buf[i]);
4209 mimeout_preserve_space = FALSE;
4215 j = mimeout_buf_count;
4216 mimeout_buf_count = 0;
4218 mime_putc(mimeout_buf[i]);
4234 switch(mimeout_mode) {
4239 (*o_mputc)(basis_64[((b64c & 0x3)<< 4)]);
4245 (*o_mputc)(basis_64[((b64c & 0xF) << 2)]);
4251 if (mimeout_f!=FIXED_MIME) {
4253 } else if (mimeout_mode != 'Q')
4262 switch(mimeout_mode) {
4267 } else if (c==CR||c==NL) {
4270 } else if(c<SPACE||c=='='||c=='?'||c=='_'||DEL<=c) {
4272 (*o_mputc)(itoh4(((c>>4)&0xf)));
4273 (*o_mputc)(itoh4((c&0xf)));
4282 (*o_mputc)(basis_64[c>>2]);
4287 (*o_mputc)(basis_64[((b64c & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
4293 (*o_mputc)(basis_64[((b64c & 0xF) << 2) | ((c & 0xC0) >>6)]);
4294 (*o_mputc)(basis_64[c & 0x3F]);
4305 int mime_lastchar2, mime_lastchar1;
4307 void mime_prechar(c2, c1)
4312 if (base64_count + mimeout_buf_count/3*4> 66){
4313 (*o_base64conv)(EOF,0);
4314 (*o_base64conv)(0,NL);
4315 (*o_base64conv)(0,SPACE);
4317 }else if (mime_lastchar2){
4318 if (c1 <=DEL && !nkf_isspace(c1)){
4319 (*o_base64conv)(0,SPACE);
4323 if (c2 && mime_lastchar2 == 0
4324 && mime_lastchar1 && !nkf_isspace(mime_lastchar1)){
4325 (*o_base64conv)(0,SPACE);
4328 mime_lastchar2 = c2;
4329 mime_lastchar1 = c1;
4340 if (mimeout_f == FIXED_MIME){
4341 if (mimeout_mode == 'Q'){
4342 if (base64_count > 71){
4343 if (c!=CR && c!=NL) {
4350 if (base64_count > 71){
4355 if (c == EOF) { /* c==EOF */
4359 if (c != EOF) { /* c==EOF */
4365 /* mimeout_f != FIXED_MIME */
4367 if (c == EOF) { /* c==EOF */
4368 j = mimeout_buf_count;
4369 mimeout_buf_count = 0;
4372 if (nkf_isspace(mimeout_buf[i])){
4375 mimeout_addchar(mimeout_buf[i]);
4379 (*o_mputc)(mimeout_buf[i]);
4385 if (mimeout_mode=='Q') {
4386 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
4398 if (mimeout_buf_count > 0){
4399 lastchar = mimeout_buf[mimeout_buf_count - 1];
4404 if (!mimeout_mode) {
4405 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1)) {
4406 if (nkf_isspace(c)) {
4407 if (c==CR || c==NL) {
4410 for (i=0;i<mimeout_buf_count;i++) {
4411 (*o_mputc)(mimeout_buf[i]);
4415 mimeout_buf_count = 1;
4417 if (base64_count > 1
4418 && base64_count + mimeout_buf_count > 76){
4421 if (!nkf_isspace(mimeout_buf[0])){
4426 mimeout_buf[mimeout_buf_count++] = c;
4427 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
4428 open_mime(output_mode);
4433 if (lastchar==CR || lastchar == NL){
4434 for (i=0;i<mimeout_buf_count;i++) {
4435 (*o_mputc)(mimeout_buf[i]);
4438 mimeout_buf_count = 0;
4440 if (lastchar==SPACE) {
4441 for (i=0;i<mimeout_buf_count-1;i++) {
4442 (*o_mputc)(mimeout_buf[i]);
4445 mimeout_buf[0] = SPACE;
4446 mimeout_buf_count = 1;
4448 open_mime(output_mode);
4451 /* mimeout_mode == 'B', 1, 2 */
4452 if ( c<=DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
4453 if (lastchar == CR || lastchar == NL){
4454 if (nkf_isblank(c)) {
4455 for (i=0;i<mimeout_buf_count;i++) {
4456 mimeout_addchar(mimeout_buf[i]);