1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
5 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
6 ** Copyright (C) 1996,1998
8 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
9 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
10 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
11 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
13 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
14 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
15 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
16 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
17 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
18 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
20 ** Everyone is permitted to do anything on this program
21 ** including copying, modifying, improving,
22 ** as long as you don't try to pretend that you wrote it.
23 ** i.e., the above copyright notice has to appear in all copies.
24 ** Binary distribution requires original version messages.
25 ** You don't have to ask before copying, redistribution or publishing.
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
29 /***********************************************************************
30 ** UTF-8
\e$B%5%]!<%H$K$D$$$F
\e(B
31 **
\e$B=>Mh$N
\e(B nkf
\e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9
\e(B
32 ** nkf -e
\e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G
\e(B UTF-8
\e$B$HH=Dj$5$l$l$P!"
\e(B
33 **
\e$B$=$N$^$^
\e(B euc-jp
\e$B$KJQ49$5$l$^$9
\e(B
35 **
\e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#
\e(B
36 ** (
\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O
\e(B)
38 **
\e$B2?$+LdBj$r8+$D$1$?$i!"
\e(B
39 ** E-Mail: furukawa@tcp-ip.or.jp
40 **
\e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#
\e(B
41 ***********************************************************************/
42 /* $Id: nkf.c,v 1.61 2005/02/20 11:57:53 naruse Exp $ */
43 #define NKF_VERSION "2.0.4"
44 #define NKF_RELEASE_DATE "2005-02-20"
47 static char *CopyRight =
48 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2004 Kono, Furukawa";
55 ** USAGE: nkf [flags] [file]
58 ** b Output is buffered (DEFAULT)
59 ** u Output is unbuffered
63 ** j Outout code is JIS 7 bit (DEFAULT SELECT)
64 ** s Output code is MS Kanji (DEFAULT SELECT)
65 ** e Output code is AT&T JIS (DEFAULT SELECT)
66 ** w Output code is AT&T JIS (DEFAULT SELECT)
67 ** l Output code is JIS 7bit and ISO8859-1 Latin-1
69 ** m MIME conversion for ISO-2022-JP
70 ** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
71 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
72 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
73 ** M MIME output conversion
75 ** r {de/en}crypt ROT13/47
79 ** T Text mode output (for MS-DOS)
81 ** x Do not convert X0201 kana into X0208
82 ** Z Convert X0208 alphabet to ASCII
87 ** B try to fix broken JIS, missing Escape
88 ** B[1-9] broken level
90 ** O Output to 'nkf.out' file or last file name
91 ** d Delete \r in line feed
92 ** c Add \r in line feed
93 ** -- other long option
94 ** -- ignore following option (don't use with -O )
98 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__)) && !defined(MSDOS)
100 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
115 #if defined(MSDOS) || defined(__OS2__)
122 #define setbinmode(fp) fsetbin(fp)
123 #else /* Microsoft C, Turbo C */
124 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
126 #else /* UNIX,OS/2 */
127 #define setbinmode(fp)
130 #ifdef _IOFBF /* SysV and MSDOS, Windows */
131 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
133 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
136 /*Borland C++ 4.5 EasyWin*/
137 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
146 /* added by satoru@isoternet.org */
148 #include <sys/stat.h>
149 #ifndef MSDOS /* UNIX, OS/2 */
152 #else /* defined(MSDOS) */
154 #ifdef __BORLANDC__ /* BCC32 */
156 #else /* !defined(__BORLANDC__) */
157 #include <sys/utime.h>
158 #endif /* (__BORLANDC__) */
159 #else /* !defined(__WIN32__) */
160 #if defined(_MSC_VER) || defined(__MINGW32__) /* VC++, MinGW */
161 #include <sys/utime.h>
162 #elif defined(__TURBOC__) /* BCC */
164 #elif defined(LSI_C) /* LSI C */
165 #endif /* (__WIN32__) */
177 /* state of output_mode and input_mode
195 /* Input Assumption */
199 #define LATIN1_INPUT 6
201 #define STRICT_MIME 8
206 #define JAPANESE_EUC 10
210 #define UTF8_INPUT 13
211 #define UTF16LE_INPUT 14
212 #define UTF16BE_INPUT 15
232 #define is_alnum(c) \
233 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
235 #define HOLD_SIZE 1024
236 #define IOBUF_SIZE 16384
238 #define DEFAULT_J 'B'
239 #define DEFAULT_R 'B'
241 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
242 #define SJ6394 0x0161 /* 63 - 94 ku offset */
244 #define RANGE_NUM_MAX 18
249 #if defined( UTF8_OUTPUT_ENABLE ) || defined( UTF8_INPUT_ENABLE )
250 #define sizeof_euc_utf8 94
251 #define sizeof_euc_to_utf8_1byte 94
252 #define sizeof_euc_to_utf8_2bytes 94
253 #define sizeof_utf8_to_euc_C2 64
254 #define sizeof_utf8_to_euc_E5B8 64
255 #define sizeof_utf8_to_euc_2bytes 112
256 #define sizeof_utf8_to_euc_3bytes 112
259 /* MIME preprocessor */
262 #ifdef EASYWIN /*Easy Win */
263 extern POINT _BufferSize;
266 /* function prototype */
268 #ifdef ANSI_C_PROTOTYPE
270 #define STATIC static
282 void (*status_func)PROTO((struct input_code *, int));
283 int (*iconv_func)PROTO((int c2, int c1, int c0));
287 STATIC char *input_codename = "";
289 STATIC int noconvert PROTO((FILE *f));
290 STATIC int kanji_convert PROTO((FILE *f));
291 STATIC int h_conv PROTO((FILE *f,int c2,int c1));
292 STATIC int push_hold_buf PROTO((int c2));
293 STATIC void set_iconv PROTO((int f, int (*iconv_func)(int c2,int c1,int c0)));
294 STATIC int s_iconv PROTO((int c2,int c1,int c0));
295 STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
296 STATIC int e_iconv PROTO((int c2,int c1,int c0));
297 #ifdef UTF8_INPUT_ENABLE
298 STATIC int w2e_conv PROTO((int c2,int c1,int c0,int *p2,int *p1));
299 STATIC int w_iconv PROTO((int c2,int c1,int c0));
300 STATIC int w_iconv16 PROTO((int c2,int c1,int c0));
301 STATIC int w_iconv_common PROTO((int c1,int c0,unsigned short **pp,int psize,int *p2,int *p1));
302 STATIC int ww16_conv PROTO((int c2, int c1, int c0));
304 #ifdef UTF8_OUTPUT_ENABLE
305 STATIC int e2w_conv PROTO((int c2,int c1));
306 STATIC void w_oconv PROTO((int c2,int c1));
307 STATIC void w_oconv16 PROTO((int c2,int c1));
309 STATIC void e_oconv PROTO((int c2,int c1));
310 STATIC int e2s_conv PROTO((int c2, int c1, int *p2, int *p1));
311 STATIC void s_oconv PROTO((int c2,int c1));
312 STATIC void j_oconv PROTO((int c2,int c1));
313 STATIC void fold_conv PROTO((int c2,int c1));
314 STATIC void cr_conv PROTO((int c2,int c1));
315 STATIC void z_conv PROTO((int c2,int c1));
316 STATIC void rot_conv PROTO((int c2,int c1));
317 STATIC void hira_conv PROTO((int c2,int c1));
318 STATIC void base64_conv PROTO((int c2,int c1));
319 STATIC void iso2022jp_check_conv PROTO((int c2,int c1));
320 STATIC void no_connection PROTO((int c2,int c1));
321 STATIC int no_connection2 PROTO((int c2,int c1,int c0));
323 STATIC void code_score PROTO((struct input_code *ptr));
324 STATIC void code_status PROTO((int c));
326 STATIC void std_putc PROTO((int c));
327 STATIC int std_getc PROTO((FILE *f));
328 STATIC int std_ungetc PROTO((int c,FILE *f));
330 STATIC int broken_getc PROTO((FILE *f));
331 STATIC int broken_ungetc PROTO((int c,FILE *f));
333 STATIC int mime_begin PROTO((FILE *f));
334 STATIC int mime_getc PROTO((FILE *f));
335 STATIC int mime_ungetc PROTO((int c,FILE *f));
337 STATIC int mime_begin_strict PROTO((FILE *f));
338 STATIC int mime_getc_buf PROTO((FILE *f));
339 STATIC int mime_ungetc_buf PROTO((int c,FILE *f));
340 STATIC int mime_integrity PROTO((FILE *f,unsigned char *p));
342 STATIC int base64decode PROTO((int c));
343 STATIC void mime_prechar PROTO((int c2, int c1));
344 STATIC void mime_putc PROTO((int c));
345 STATIC void open_mime PROTO((int c));
346 STATIC void close_mime PROTO(());
347 STATIC void usage PROTO(());
348 STATIC void version PROTO(());
349 STATIC void options PROTO((unsigned char *c));
350 #if defined(PERL_XS) || defined(WIN32DLL)
351 STATIC void reinit PROTO(());
356 static unsigned char stdibuf[IOBUF_SIZE];
357 static unsigned char stdobuf[IOBUF_SIZE];
358 static unsigned char hold_buf[HOLD_SIZE*2];
359 static int hold_count;
361 /* MIME preprocessor fifo */
363 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
364 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
365 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
366 static unsigned char mime_buf[MIME_BUF_SIZE];
367 static unsigned int mime_top = 0;
368 static unsigned int mime_last = 0; /* decoded */
369 static unsigned int mime_input = 0; /* undecoded */
372 static int unbuf_f = FALSE;
373 static int estab_f = FALSE;
374 static int nop_f = FALSE;
375 static int binmode_f = TRUE; /* binary mode */
376 static int rot_f = FALSE; /* rot14/43 mode */
377 static int hira_f = FALSE; /* hira/kata henkan */
378 static int input_f = FALSE; /* non fixed input code */
379 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
380 static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
381 static int mime_decode_f = FALSE; /* mime decode is explicitly on */
382 static int mimebuf_f = FALSE; /* MIME buffered input */
383 static int broken_f = FALSE; /* convert ESC-less broken JIS */
384 static int iso8859_f = FALSE; /* ISO8859 through */
385 static int mimeout_f = FALSE; /* base64 mode */
386 #if defined(MSDOS) || defined(__OS2__)
387 static int x0201_f = TRUE; /* Assume JISX0201 kana */
389 static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
391 static int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
392 #ifdef UTF8_OUTPUT_ENABLE
393 static int unicode_bom_f= 0; /* Output Unicode BOM */
394 static int w_oconv16_LE = 0; /* utf-16 little endian */
395 static int ms_ucs_map_f = FALSE; /* Microsoft UCS Mapping Compatible */
399 #ifdef NUMCHAR_OPTION
401 #define CLASS_MASK 0x0f000000
402 #define CLASS_UTF16 0x01000000
406 static int cap_f = FALSE;
407 static int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */
408 static int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc;
409 STATIC int cap_getc PROTO((FILE *f));
410 STATIC int cap_ungetc PROTO((int c,FILE *f));
412 static int url_f = FALSE;
413 static int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
414 static int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc;
415 STATIC int url_getc PROTO((FILE *f));
416 STATIC int url_ungetc PROTO((int c,FILE *f));
418 static int numchar_f = FALSE;
419 static int (*i_ngetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
420 static int (*i_nungetc)PROTO((int c ,FILE *f)) = std_ungetc;
421 STATIC int numchar_getc PROTO((FILE *f));
422 STATIC int numchar_ungetc PROTO((int c,FILE *f));
426 static int noout_f = FALSE;
427 STATIC void no_putc PROTO((int c));
428 static int debug_f = FALSE;
429 STATIC void debug PROTO((char *str));
432 static int guess_f = FALSE;
433 STATIC void print_guessed_code PROTO((char *filename));
434 STATIC void set_input_codename PROTO((char *codename));
435 static int is_inputcode_mixed = FALSE;
436 static int is_inputcode_set = FALSE;
439 static int exec_f = 0;
442 #ifdef SHIFTJIS_CP932
443 STATIC int cp932_f = TRUE;
444 #define CP932_TABLE_BEGIN (0xfa)
445 #define CP932_TABLE_END (0xfc)
447 STATIC int cp932inv_f = TRUE;
448 #define CP932INV_TABLE_BEGIN (0xed)
449 #define CP932INV_TABLE_END (0xee)
451 /* STATIC int cp932_conv PROTO((int c2, int c1)); */
452 #endif /* SHIFTJIS_CP932 */
455 STATIC int x0212_f = FALSE;
456 static int x0212_shift PROTO((int c));
457 static int x0212_unshift PROTO((int c));
460 STATIC unsigned char prefix_table[256];
462 STATIC void e_status PROTO((struct input_code *, int));
463 STATIC void s_status PROTO((struct input_code *, int));
465 #ifdef UTF8_INPUT_ENABLE
466 STATIC void w_status PROTO((struct input_code *, int));
467 STATIC void w16_status PROTO((struct input_code *, int));
468 static int utf16_mode = UTF16LE_INPUT;
471 struct input_code input_code_list[] = {
472 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
473 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
474 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
475 {"UTF-16", 0, 0, 0, {0, 0, 0}, w16_status, w_iconv16, 0},
479 static int mimeout_mode = 0;
480 static int base64_count = 0;
482 /* X0208 -> ASCII converter */
485 static int f_line = 0; /* chars in line */
486 static int f_prev = 0;
487 static int fold_preserve_f = FALSE; /* preserve new lines */
488 static int fold_f = FALSE;
489 static int fold_len = 0;
492 static unsigned char kanji_intro = DEFAULT_J;
493 static unsigned char ascii_intro = DEFAULT_R;
497 #define FOLD_MARGIN 10
498 #define DEFAULT_FOLD 60
500 static int fold_margin = FOLD_MARGIN;
504 #ifdef DEFAULT_CODE_JIS
505 # define DEFAULT_CONV j_oconv
507 #ifdef DEFAULT_CODE_SJIS
508 # define DEFAULT_CONV s_oconv
510 #ifdef DEFAULT_CODE_EUC
511 # define DEFAULT_CONV e_oconv
513 #ifdef DEFAULT_CODE_UTF8
514 # define DEFAULT_CONV w_oconv
517 /* process default */
518 static void (*output_conv)PROTO((int c2,int c1)) = DEFAULT_CONV;
520 static void (*oconv)PROTO((int c2,int c1)) = no_connection;
521 /* s_iconv or oconv */
522 static int (*iconv)PROTO((int c2,int c1,int c0)) = no_connection2;
524 static void (*o_zconv)PROTO((int c2,int c1)) = no_connection;
525 static void (*o_fconv)PROTO((int c2,int c1)) = no_connection;
526 static void (*o_crconv)PROTO((int c2,int c1)) = no_connection;
527 static void (*o_rot_conv)PROTO((int c2,int c1)) = no_connection;
528 static void (*o_hira_conv)PROTO((int c2,int c1)) = no_connection;
529 static void (*o_base64conv)PROTO((int c2,int c1)) = no_connection;
530 static void (*o_iso2022jp_check_conv)PROTO((int c2,int c1)) = no_connection;
532 /* static redirections */
534 static void (*o_putc)PROTO((int c)) = std_putc;
536 static int (*i_getc)PROTO((FILE *f)) = std_getc; /* general input */
537 static int (*i_ungetc)PROTO((int c,FILE *f)) =std_ungetc;
539 static int (*i_bgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
540 static int (*i_bungetc)PROTO((int c ,FILE *f)) = std_ungetc;
542 static void (*o_mputc)PROTO((int c)) = std_putc ; /* output of mputc */
544 static int (*i_mgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
545 static int (*i_mungetc)PROTO((int c ,FILE *f)) = std_ungetc;
547 /* for strict mime */
548 static int (*i_mgetc_buf)PROTO((FILE *)) = std_getc; /* input of mgetc_buf */
549 static int (*i_mungetc_buf)PROTO((int c,FILE *f)) = std_ungetc;
552 static int output_mode = ASCII, /* output kanji mode */
553 input_mode = ASCII, /* input kanji mode */
554 shift_mode = FALSE; /* TRUE shift out, or X0201 */
555 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
557 /* X0201 / X0208 conversion tables */
559 /* X0201 kana conversion table */
562 unsigned char cv[]= {
563 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
564 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
565 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
566 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
567 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
568 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
569 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
570 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
571 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
572 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
573 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
574 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
575 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
576 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
577 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
578 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
582 /* X0201 kana conversion table for daguten */
585 unsigned char dv[]= {
586 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
587 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
588 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
589 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
590 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
591 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
592 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
593 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
594 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
595 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
596 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
597 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
598 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
601 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
604 /* X0201 kana conversion table for han-daguten */
607 unsigned char ev[]= {
608 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
610 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
611 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
614 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
615 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
616 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
617 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
618 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
619 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
620 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
621 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
622 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
623 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
627 /* X0208 kigou conversion table */
628 /* 0x8140 - 0x819e */
630 unsigned char fv[] = {
632 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
633 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
634 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
635 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
636 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
637 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
638 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
639 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
640 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
641 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
642 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
643 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
649 static int file_out = FALSE;
651 static int overwrite = FALSE;
654 static int crmode_f = 0; /* CR, NL, CRLF */
655 #ifdef EASYWIN /*Easy Win */
656 static int end_check;
659 #define STD_GC_BUFSIZE (256)
660 int std_gc_buf[STD_GC_BUFSIZE];
664 #include "nkf32dll.c"
665 #elif defined(PERL_XS)
675 char *outfname = NULL;
678 #ifdef EASYWIN /*Easy Win */
679 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
682 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
683 cp = (unsigned char *)*argv;
688 if (pipe(fds) < 0 || (pid = fork()) < 0){
699 execvp(argv[1], &argv[1]);
713 if(x0201_f == WISH_TRUE)
714 x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
716 if (binmode_f == TRUE)
718 if (freopen("","wb",stdout) == NULL)
725 setbuf(stdout, (char *) NULL);
727 setvbuffer(stdout, stdobuf, IOBUF_SIZE);
730 if (binmode_f == TRUE)
732 if (freopen("","rb",stdin) == NULL) return (-1);
736 setvbuffer(stdin, stdibuf, IOBUF_SIZE);
740 kanji_convert(stdin);
741 if (guess_f) print_guessed_code(NULL);
746 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
755 /* reopen file for stdout */
756 if (file_out == TRUE) {
759 outfname = malloc(strlen(origfname)
760 + strlen(".nkftmpXXXXXX")
766 strcpy(outfname, origfname);
770 for (i = strlen(outfname); i; --i){
771 if (outfname[i - 1] == '/'
772 || outfname[i - 1] == '\\'){
778 strcat(outfname, "ntXXXXXX");
780 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC,
783 strcat(outfname, ".nkftmpXXXXXX");
784 fd = mkstemp(outfname);
787 || (fd_backup = dup(fileno(stdout))) < 0
788 || dup2(fd, fileno(stdout)) < 0
799 outfname = "nkf.out";
802 if(freopen(outfname, "w", stdout) == NULL) {
806 if (binmode_f == TRUE) {
808 if (freopen("","wb",stdout) == NULL)
815 if (binmode_f == TRUE)
817 if (freopen("","rb",fin) == NULL)
822 setvbuffer(fin, stdibuf, IOBUF_SIZE);
826 char *filename = NULL;
828 if (nfiles > 1) filename = origfname;
829 if (guess_f) print_guessed_code(filename);
835 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
843 if (dup2(fd_backup, fileno(stdout)) < 0){
846 if (stat(origfname, &sb)) {
847 fprintf(stderr, "Can't stat %s\n", origfname);
849 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
850 if (chmod(outfname, sb.st_mode)) {
851 fprintf(stderr, "Can't set permission %s\n", outfname);
854 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
855 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
856 tb[0] = tb[1] = sb.st_mtime;
857 if (utime(outfname, tb)) {
858 fprintf(stderr, "Can't set timestamp %s\n", outfname);
861 tb.actime = sb.st_atime;
862 tb.modtime = sb.st_mtime;
863 if (utime(outfname, &tb)) {
864 fprintf(stderr, "Can't set timestamp %s\n", outfname);
868 if (unlink(origfname)){
872 if (rename(outfname, origfname)) {
874 fprintf(stderr, "Can't rename %s to %s\n",
875 outfname, origfname);
883 #ifdef EASYWIN /*Easy Win */
884 if (file_out == FALSE)
885 scanf("%d",&end_check);
888 #else /* for Other OS */
889 if (file_out == TRUE)
894 #endif /* WIN32DLL */
919 {"katakana-hiragana","h3"},
926 #ifdef UTF8_OUTPUT_ENABLE
931 #ifdef UTF8_INPUT_ENABLE
933 {"utf16-input", "W16"},
942 #ifdef NUMCHAR_OPTION
943 {"numchar-input", ""},
949 #ifdef SHIFTJIS_CP932
959 static int option_mode = 0;
966 unsigned char *p = NULL;
978 case '-': /* literal options */
979 if (!*cp) { /* ignore the rest of arguments */
983 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
985 p = (unsigned char *)long_option[i].name;
986 for (j=0;*p && (*p != '=') && *p == cp[j];p++, j++);
994 cp = (unsigned char *)long_option[i].alias;
997 if (strcmp(long_option[i].name, "overwrite") == 0){
1004 if (strcmp(long_option[i].name, "cap-input") == 0){
1008 if (strcmp(long_option[i].name, "url-input") == 0){
1013 #ifdef NUMCHAR_OPTION
1014 if (strcmp(long_option[i].name, "numchar-input") == 0){
1020 if (strcmp(long_option[i].name, "no-output") == 0){
1024 if (strcmp(long_option[i].name, "debug") == 0){
1029 if (strcmp(long_option[i].name, "cp932") == 0){
1030 #ifdef SHIFTJIS_CP932
1034 #ifdef UTF8_OUTPUT_ENABLE
1035 ms_ucs_map_f = TRUE;
1039 if (strcmp(long_option[i].name, "no-cp932") == 0){
1040 #ifdef SHIFTJIS_CP932
1044 #ifdef UTF8_OUTPUT_ENABLE
1045 ms_ucs_map_f = FALSE;
1049 #ifdef SHIFTJIS_CP932
1050 if (strcmp(long_option[i].name, "cp932inv") == 0){
1057 if (strcmp(long_option[i].name, "x0212") == 0){
1064 if (strcmp(long_option[i].name, "exec-in") == 0){
1068 if (strcmp(long_option[i].name, "exec-out") == 0){
1073 #ifdef UTF8_OUTPUT_ENABLE
1074 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1075 ms_ucs_map_f = TRUE;
1079 if (strcmp(long_option[i].name, "prefix=") == 0){
1080 if (*p == '=' && ' ' < p[1] && p[1] < 128){
1081 for (i = 2; ' ' < p[i] && p[i] < 128; i++){
1082 prefix_table[p[i]] = p[1];
1089 case 'b': /* buffered mode */
1092 case 'u': /* non bufferd mode */
1095 case 't': /* transparent mode */
1098 case 'j': /* JIS output */
1100 output_conv = j_oconv;
1102 case 'e': /* AT&T EUC output */
1103 output_conv = e_oconv;
1105 case 's': /* SJIS output */
1106 output_conv = s_oconv;
1108 case 'l': /* ISO8859 Latin-1 support, no conversion */
1109 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
1110 input_f = LATIN1_INPUT;
1112 case 'i': /* Kanji IN ESC-$-@/B */
1113 if (*cp=='@'||*cp=='B')
1114 kanji_intro = *cp++;
1116 case 'o': /* ASCII IN ESC-(-J/B */
1117 if (*cp=='J'||*cp=='B'||*cp=='H')
1118 ascii_intro = *cp++;
1125 if ('9'>= *cp && *cp>='0')
1126 hira_f |= (*cp++ -'0');
1133 #if defined(MSDOS) || defined(__OS2__)
1148 #ifdef UTF8_OUTPUT_ENABLE
1149 case 'w': /* UTF-8 output */
1150 if ('1'== cp[0] && '6'==cp[1]) {
1151 output_conv = w_oconv16; cp+=2;
1153 unicode_bom_f=2; cp++;
1156 unicode_bom_f=1; cp++;
1158 } else if (cp[0] == 'B') {
1159 unicode_bom_f=2; cp++;
1161 unicode_bom_f=1; cp++;
1164 } else if (cp[0] == '8') {
1165 output_conv = w_oconv; cp++;
1168 unicode_bom_f=1; cp++;
1171 output_conv = w_oconv;
1174 #ifdef UTF8_INPUT_ENABLE
1175 case 'W': /* UTF-8 input */
1176 if ('1'== cp[0] && '6'==cp[1]) {
1177 input_f = UTF16LE_INPUT;
1180 } else if (cp[0] == 'B') {
1182 input_f = UTF16BE_INPUT;
1184 } else if (cp[0] == '8') {
1186 input_f = UTF8_INPUT;
1188 input_f = UTF8_INPUT;
1191 /* Input code assumption */
1192 case 'J': /* JIS input */
1193 case 'E': /* AT&T EUC input */
1194 input_f = JIS_INPUT;
1196 case 'S': /* MS Kanji input */
1197 input_f = SJIS_INPUT;
1198 if (x0201_f==NO_X0201) x0201_f=TRUE;
1200 case 'Z': /* Convert X0208 alphabet to asii */
1201 /* bit:0 Convert X0208
1202 bit:1 Convert Kankaku to one space
1203 bit:2 Convert Kankaku to two spaces
1204 bit:3 Convert HTML Entity
1206 if ('9'>= *cp && *cp>='0')
1207 alpha_f |= 1<<(*cp++ -'0');
1211 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
1212 x0201_f = FALSE; /* No X0201->X0208 conversion */
1214 ESC-(-I in JIS, EUC, MS Kanji
1215 SI/SO in JIS, EUC, MS Kanji
1216 SSO in EUC, JIS, not in MS Kanji
1217 MS Kanji (0xa0-0xdf)
1219 ESC-(-I in JIS (0x20-0x5f)
1220 SSO in EUC (0xa0-0xdf)
1221 0xa0-0xd in MS Kanji (0xa0-0xdf)
1224 case 'X': /* Assume X0201 kana */
1225 /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1228 case 'F': /* prserve new lines */
1229 fold_preserve_f = TRUE;
1230 case 'f': /* folding -f60 or -f */
1233 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1235 fold_len += *cp++ - '0';
1237 if (!(0<fold_len && fold_len<BUFSIZ))
1238 fold_len = DEFAULT_FOLD;
1242 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1244 fold_margin += *cp++ - '0';
1248 case 'm': /* MIME support */
1249 mime_decode_f = TRUE;
1250 if (*cp=='B'||*cp=='Q') {
1251 mime_decode_mode = *cp++;
1252 mimebuf_f = FIXED_MIME;
1253 } else if (*cp=='N') {
1254 mime_f = TRUE; cp++;
1255 } else if (*cp=='S') {
1256 mime_f = STRICT_MIME; cp++;
1257 } else if (*cp=='0') {
1258 mime_f = FALSE; cp++;
1261 case 'M': /* MIME output */
1264 mimeout_f = FIXED_MIME; cp++;
1265 } else if (*cp=='Q') {
1267 mimeout_f = FIXED_MIME; cp++;
1272 case 'B': /* Broken JIS support */
1274 bit:1 allow any x on ESC-(-x or ESC-$-x
1275 bit:2 reset to ascii on NL
1277 if ('9'>= *cp && *cp>='0')
1278 broken_f |= 1<<(*cp++ -'0');
1283 case 'O':/* for Output file */
1287 case 'c':/* add cr code */
1290 case 'd':/* delete cr code */
1293 case 'I': /* ISO-2022-JP output */
1296 case 'L': /* line mode */
1297 if (*cp=='u') { /* unix */
1298 crmode_f = NL; cp++;
1299 } else if (*cp=='m') { /* mac */
1300 crmode_f = CR; cp++;
1301 } else if (*cp=='w') { /* windows */
1302 crmode_f = CRLF; cp++;
1303 } else if (*cp=='0') { /* no conversion */
1313 /* module muliple options in a string are allowed for Perl moudle */
1314 while(*cp && *cp!='-') cp++;
1318 /* bogus option but ignored */
1324 #ifdef ANSI_C_PROTOTYPE
1325 struct input_code * find_inputcode_byfunc(int (*iconv_func)(int c2,int c1,int c0))
1327 struct input_code * find_inputcode_byfunc(iconv_func)
1328 int (*iconv_func)();
1332 struct input_code *p = input_code_list;
1334 if (iconv_func == p->iconv_func){
1344 static int (*iconv_for_check)() = 0;
1347 #ifdef ANSI_C_PROTOTYPE
1348 void set_iconv(int f, int (*iconv_func)(int c2,int c1,int c0))
1350 void set_iconv(f, iconv_func)
1352 int (*iconv_func)();
1355 #ifdef INPUT_CODE_FIX
1363 #ifdef INPUT_CODE_FIX
1364 && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1370 if (estab_f && iconv_for_check != iconv){
1371 struct input_code *p = find_inputcode_byfunc(iconv);
1373 set_input_codename(p->name);
1374 debug(input_codename);
1376 iconv_for_check = iconv;
1381 #define SCORE_L2 (1) /*
\e$BBh
\e(B2
\e$B?e=`4A;z
\e(B */
1382 #define SCORE_KANA (SCORE_L2 << 1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
1383 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
1384 #ifdef SHIFTJIS_CP932
1385 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932
\e$B$K$h$kFI$_49$(
\e(B */
1386 #define SCORE_NO_EXIST (SCORE_CP932 << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1388 #define SCORE_NO_EXIST (SCORE_DEPEND << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1390 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME
\e$B$K$h$k;XDj
\e(B */
1391 #define SCORE_ERROR (SCORE_iMIME << 1) /*
\e$B%(%i!<
\e(B */
1393 #define SCORE_INIT (SCORE_iMIME)
1395 int score_table_A0[] = {
1398 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1399 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1402 int score_table_F0[] = {
1403 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1404 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1405 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1406 SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1409 void set_code_score(ptr, score)
1410 struct input_code *ptr;
1414 ptr->score |= score;
1418 void clr_code_score(ptr, score)
1419 struct input_code *ptr;
1423 ptr->score &= ~score;
1427 void code_score(ptr)
1428 struct input_code *ptr;
1430 int c2 = ptr->buf[0];
1431 int c1 = ptr->buf[1];
1433 set_code_score(ptr, SCORE_ERROR);
1434 }else if (c2 == SSO){
1435 set_code_score(ptr, SCORE_KANA);
1436 #ifdef UTF8_OUTPUT_ENABLE
1437 }else if (!e2w_conv(c2, c1)){
1438 set_code_score(ptr, SCORE_NO_EXIST);
1440 }else if ((c2 & 0x70) == 0x20){
1441 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1442 }else if ((c2 & 0x70) == 0x70){
1443 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1444 }else if ((c2 & 0x70) >= 0x50){
1445 set_code_score(ptr, SCORE_L2);
1449 void status_disable(ptr)
1450 struct input_code *ptr;
1455 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1458 void status_push_ch(ptr, c)
1459 struct input_code *ptr;
1462 ptr->buf[ptr->index++] = c;
1465 void status_clear(ptr)
1466 struct input_code *ptr;
1472 void status_reset(ptr)
1473 struct input_code *ptr;
1476 ptr->score = SCORE_INIT;
1479 void status_reinit(ptr)
1480 struct input_code *ptr;
1483 ptr->_file_stat = 0;
1486 void status_check(ptr, c)
1487 struct input_code *ptr;
1490 if (c <= DEL && estab_f){
1495 void s_status(ptr, c)
1496 struct input_code *ptr;
1501 status_check(ptr, c);
1506 #ifdef NUMCHAR_OPTION
1507 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1510 }else if (0xa1 <= c && c <= 0xdf){
1511 status_push_ch(ptr, SSO);
1512 status_push_ch(ptr, c);
1515 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
1517 status_push_ch(ptr, c);
1518 #ifdef SHIFTJIS_CP932
1520 && CP932_TABLE_BEGIN <= c && c <= CP932_TABLE_END){
1522 status_push_ch(ptr, c);
1523 #endif /* SHIFTJIS_CP932 */
1525 }else if (x0212_f && 0xf0 <= c && c <= 0xfc){
1527 status_push_ch(ptr, c);
1528 #endif /* X0212_ENABLE */
1530 status_disable(ptr);
1534 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1535 status_push_ch(ptr, c);
1536 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
1540 status_disable(ptr);
1544 #ifdef SHIFTJIS_CP932
1545 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1546 status_push_ch(ptr, c);
1547 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
1548 set_code_score(ptr, SCORE_CP932);
1553 #endif /* SHIFTJIS_CP932 */
1554 #ifndef X0212_ENABLE
1555 status_disable(ptr);
1561 void e_status(ptr, c)
1562 struct input_code *ptr;
1567 status_check(ptr, c);
1572 #ifdef NUMCHAR_OPTION
1573 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1576 }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
1578 status_push_ch(ptr, c);
1580 }else if (0x8f == c){
1582 status_push_ch(ptr, c);
1583 #endif /* X0212_ENABLE */
1585 status_disable(ptr);
1589 if (0xa1 <= c && c <= 0xfe){
1590 status_push_ch(ptr, c);
1594 status_disable(ptr);
1599 if (0xa1 <= c && c <= 0xfe){
1601 status_push_ch(ptr, c);
1603 status_disable(ptr);
1605 #endif /* X0212_ENABLE */
1609 #ifdef UTF8_INPUT_ENABLE
1610 void w16_status(ptr, c)
1611 struct input_code *ptr;
1618 if (ptr->_file_stat == 0){
1619 if (c == 0xfe || c == 0xff){
1621 status_push_ch(ptr, c);
1622 ptr->_file_stat = 1;
1624 status_disable(ptr);
1625 ptr->_file_stat = -1;
1627 }else if (ptr->_file_stat > 0){
1629 status_push_ch(ptr, c);
1630 }else if (ptr->_file_stat < 0){
1631 status_disable(ptr);
1637 status_disable(ptr);
1638 ptr->_file_stat = -1;
1640 status_push_ch(ptr, c);
1647 if (ptr->stat != c && (c == 0xfe || c == 0xff)){
1648 status_push_ch(ptr, c);
1651 status_disable(ptr);
1652 ptr->_file_stat = -1;
1658 void w_status(ptr, c)
1659 struct input_code *ptr;
1664 status_check(ptr, c);
1669 #ifdef NUMCHAR_OPTION
1670 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1673 }else if (0xc0 <= c && c <= 0xdf){
1675 status_push_ch(ptr, c);
1676 }else if (0xe0 <= c && c <= 0xef){
1678 status_push_ch(ptr, c);
1680 status_disable(ptr);
1685 if (0x80 <= c && c <= 0xbf){
1686 status_push_ch(ptr, c);
1687 if (ptr->index > ptr->stat){
1688 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
1689 && ptr->buf[2] == 0xbf);
1690 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
1691 &ptr->buf[0], &ptr->buf[1]);
1698 status_disable(ptr);
1709 int action_flag = 1;
1710 struct input_code *result = 0;
1711 struct input_code *p = input_code_list;
1713 (p->status_func)(p, c);
1716 }else if(p->stat == 0){
1727 if (result && !estab_f){
1728 set_iconv(TRUE, result->iconv_func);
1729 }else if (c <= DEL){
1730 struct input_code *ptr = input_code_list;
1745 return std_gc_buf[--std_gc_ndx];
1756 if (std_gc_ndx == STD_GC_BUFSIZE){
1759 std_gc_buf[std_gc_ndx++] = c;
1779 while ((c = (*i_getc)(f)) != EOF)
1788 oconv = output_conv;
1791 /* replace continucation module, from output side */
1793 /* output redicrection */
1795 if (noout_f || guess_f){
1802 if (mimeout_f == TRUE) {
1803 o_base64conv = oconv; oconv = base64_conv;
1805 /* base64_count = 0; */
1809 o_crconv = oconv; oconv = cr_conv;
1812 o_rot_conv = oconv; oconv = rot_conv;
1815 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
1818 o_hira_conv = oconv; oconv = hira_conv;
1821 o_fconv = oconv; oconv = fold_conv;
1824 if (alpha_f || x0201_f) {
1825 o_zconv = oconv; oconv = z_conv;
1829 i_ungetc = std_ungetc;
1830 /* input redicrection */
1833 i_cgetc = i_getc; i_getc = cap_getc;
1834 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
1837 i_ugetc = i_getc; i_getc = url_getc;
1838 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
1841 #ifdef NUMCHAR_OPTION
1843 i_ngetc = i_getc; i_getc = numchar_getc;
1844 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
1847 if (mime_f && mimebuf_f==FIXED_MIME) {
1848 i_mgetc = i_getc; i_getc = mime_getc;
1849 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
1852 i_bgetc = i_getc; i_getc = broken_getc;
1853 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
1855 if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
1856 set_iconv(-TRUE, e_iconv);
1857 } else if (input_f == SJIS_INPUT) {
1858 set_iconv(-TRUE, s_iconv);
1859 #ifdef UTF8_INPUT_ENABLE
1860 } else if (input_f == UTF8_INPUT) {
1861 set_iconv(-TRUE, w_iconv);
1862 } else if (input_f == UTF16LE_INPUT) {
1863 set_iconv(-TRUE, w_iconv16);
1866 set_iconv(FALSE, e_iconv);
1870 struct input_code *p = input_code_list;
1878 Conversion main loop. Code detection only.
1888 module_connection();
1893 output_mode = ASCII;
1896 #define NEXT continue /* no output, get next */
1897 #define SEND ; /* output c1 and c2, get next */
1898 #define LAST break /* end of loop, go closing */
1900 while ((c1 = (*i_getc)(f)) != EOF) {
1905 /* in case of 8th bit is on */
1906 if (!estab_f&&!mime_decode_mode) {
1907 /* in case of not established yet */
1908 /* It is still ambiguious */
1909 if (h_conv(f, c2, c1)==EOF)
1915 /* in case of already established */
1917 /* ignore bogus code */
1923 /* second byte, 7 bit code */
1924 /* it might be kanji shitfted */
1925 if ((c1 == DEL) || (c1 <= SPACE)) {
1926 /* ignore bogus first code */
1934 #ifdef UTF8_INPUT_ENABLE
1943 #ifdef NUMCHAR_OPTION
1944 } else if ((c1 & CLASS_MASK) == CLASS_UTF16){
1947 } else if (c1 > DEL) {
1949 if (!estab_f && !iso8859_f) {
1950 /* not established yet */
1953 } else { /* estab_f==TRUE */
1958 } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
1959 /* SJIS X0201 Case... */
1960 if(iso2022jp_f && x0201_f==NO_X0201) {
1961 (*oconv)(GETA1, GETA2);
1968 } else if (c1==SSO && iconv != s_iconv) {
1969 /* EUC X0201 Case */
1970 c1 = (*i_getc)(f); /* skip SSO */
1972 if (SSP<=c1 && c1<0xe0) {
1973 if(iso2022jp_f && x0201_f==NO_X0201) {
1974 (*oconv)(GETA1, GETA2);
1981 } else { /* bogus code, skip SSO and one byte */
1985 /* already established */
1990 } else if ((c1 > SPACE) && (c1 != DEL)) {
1991 /* in case of Roman characters */
1993 /* output 1 shifted byte */
1997 } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
1998 /* output 1 shifted byte */
1999 if(iso2022jp_f && x0201_f==NO_X0201) {
2000 (*oconv)(GETA1, GETA2);
2007 /* look like bogus code */
2010 } else if (input_mode == X0208) {
2011 /* in case of Kanji shifted */
2014 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
2015 /* Check MIME code */
2016 if ((c1 = (*i_getc)(f)) == EOF) {
2019 } else if (c1 == '?') {
2020 /* =? is mime conversion start sequence */
2021 if(mime_f == STRICT_MIME) {
2022 /* check in real detail */
2023 if (mime_begin_strict(f) == EOF)
2027 } else if (mime_begin(f) == EOF)
2037 /* normal ASCII code */
2040 } else if (c1 == SI) {
2043 } else if (c1 == SO) {
2046 } else if (c1 == ESC ) {
2047 if ((c1 = (*i_getc)(f)) == EOF) {
2048 /* (*oconv)(0, ESC); don't send bogus code */
2050 } else if (c1 == '$') {
2051 if ((c1 = (*i_getc)(f)) == EOF) {
2053 (*oconv)(0, ESC); don't send bogus code
2054 (*oconv)(0, '$'); */
2056 } else if (c1 == '@'|| c1 == 'B') {
2057 /* This is kanji introduction */
2060 set_input_codename("ISO-2022-JP");
2061 debug(input_codename);
2063 } else if (c1 == '(') {
2064 if ((c1 = (*i_getc)(f)) == EOF) {
2065 /* don't send bogus code
2071 } else if (c1 == '@'|| c1 == 'B') {
2072 /* This is kanji introduction */
2077 } else if (c1 == 'D'){
2081 #endif /* X0212_ENABLE */
2083 /* could be some special code */
2090 } else if (broken_f&0x2) {
2091 /* accept any ESC-(-x as broken code ... */
2101 } else if (c1 == '(') {
2102 if ((c1 = (*i_getc)(f)) == EOF) {
2103 /* don't send bogus code
2105 (*oconv)(0, '('); */
2109 /* This is X0201 kana introduction */
2110 input_mode = X0201; shift_mode = X0201;
2112 } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2113 /* This is X0208 kanji introduction */
2114 input_mode = ASCII; shift_mode = FALSE;
2116 } else if (broken_f&0x2) {
2117 input_mode = ASCII; shift_mode = FALSE;
2122 /* maintain various input_mode here */
2126 } else if ( c1 == 'N' || c1 == 'n' ){
2128 c3 = (*i_getc)(f); /* skip SS2 */
2129 if ( (SPACE<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2144 } else if ((c1 == NL || c1 == CR) && broken_f&4) {
2145 input_mode = ASCII; set_iconv(FALSE, 0);
2147 } else if (c1 == NL && mime_decode_f && !mime_decode_mode ) {
2148 if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2156 } else if (c1 == CR && mime_decode_f && !mime_decode_mode ) {
2157 if ((c1=(*i_getc)(f))!=EOF) {
2161 } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2177 if (input_mode == X0208)
2178 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2180 else if (input_mode == X0212)
2181 (*oconv)((0x8f << 8) | c2, c1);
2182 #endif /* X0212_ENABLE */
2183 else if (input_mode)
2184 (*oconv)(input_mode, c1); /* other special case */
2185 else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */
2186 int c0 = (*i_getc)(f);
2189 (*iconv)(c2, c1, c0);
2195 /* goto next_word */
2199 (*iconv)(EOF, 0, 0);
2212 /** it must NOT be in the kanji shifte sequence */
2213 /** it must NOT be written in JIS7 */
2214 /** and it must be after 2 byte 8bit code */
2221 while ((c1 = (*i_getc)(f)) != EOF) {
2227 if (push_hold_buf(c1) == EOF || estab_f){
2233 struct input_code *p = input_code_list;
2234 struct input_code *result = p;
2239 if (p->score < result->score){
2244 set_iconv(FALSE, result->iconv_func);
2249 ** 1) EOF is detected, or
2250 ** 2) Code is established, or
2251 ** 3) Buffer is FULL (but last word is pushed)
2253 ** in 1) and 3) cases, we continue to use
2254 ** Kanji codes by oconv and leave estab_f unchanged.
2259 while (wc < hold_count){
2260 c2 = hold_buf[wc++];
2262 #ifdef NUMCHAR_OPTION
2263 || (c2 & CLASS_MASK) == CLASS_UTF16
2268 }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
2269 (*iconv)(X0201, c2, 0);
2272 if (wc < hold_count){
2273 c1 = hold_buf[wc++];
2282 if ((*iconv)(c2, c1, 0) < 0){
2284 if (wc < hold_count){
2285 c0 = hold_buf[wc++];
2294 (*iconv)(c2, c1, c0);
2307 if (hold_count >= HOLD_SIZE*2)
2309 hold_buf[hold_count++] = c2;
2310 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
2313 int s2e_conv(c2, c1, p2, p1)
2318 #ifdef SHIFTJIS_CP932
2319 if (cp932_f && CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){
2320 extern unsigned short shiftjis_cp932[3][189];
2321 val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
2327 #endif /* SHIFTJIS_CP932 */
2329 if (x0212_f && 0xfa <= c2 && c2 <= 0xfc){
2330 extern unsigned short shiftjis_x0212[3][189];
2331 val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
2334 c2 = (0x8f << 8) | (val >> 8);
2346 c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
2348 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
2356 c2 = x0212_unshift(c2);
2371 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2374 int ret = s2e_conv(c2, c1, &c2, &c1);
2375 if (ret) return ret;
2389 }else if (c2 == 0x8f){
2393 c2 = (c2 << 8) | (c1 & 0x7f);
2395 #ifdef SHIFTJIS_CP932
2398 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2399 s2e_conv(s2, s1, &c2, &c1);
2400 if ((c2 & 0xff00) == 0){
2406 #endif /* SHIFTJIS_CP932 */
2407 #endif /* X0212_ENABLE */
2408 } else if (c2 == SSO){
2411 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2421 #ifdef UTF8_INPUT_ENABLE
2423 w2e_conv(c2, c1, c0, p2, p1)
2427 extern unsigned short * utf8_to_euc_2bytes[];
2428 extern unsigned short ** utf8_to_euc_3bytes[];
2431 if (0xc0 <= c2 && c2 <= 0xef) {
2432 unsigned short **pp;
2435 if (c0 == 0) return -1;
2436 pp = utf8_to_euc_3bytes[c2 - 0x80];
2437 ret = w_iconv_common(c1, c0, pp, sizeof_utf8_to_euc_C2, p2, p1);
2439 ret = w_iconv_common(c2, c1, utf8_to_euc_2bytes, sizeof_utf8_to_euc_2bytes, p2, p1);
2441 #ifdef NUMCHAR_OPTION
2444 if (p1) *p1 = CLASS_UTF16 | ww16_conv(c2, c1, c0);
2449 } else if (c2 == X0201) {
2462 int ret = w2e_conv(c2, c1, c0, &c2, &c1);
2470 w16w_conv(val, p2, p1, p0)
2478 }else if (val < 0x800){
2479 *p2 = 0xc0 | (val >> 6);
2480 *p1 = 0x80 | (val & 0x3f);
2483 *p2 = 0xe0 | (val >> 12);
2484 *p1 = 0x80 | ((val >> 6) & 0x3f);
2485 *p0 = 0x80 | (val & 0x3f);
2490 ww16_conv(c2, c1, c0)
2495 val = (c2 & 0x0f) << 12;
2496 val |= (c1 & 0x3f) << 6;
2498 }else if (c2 >= 0xc0){
2499 val = (c2 & 0x1f) << 6;
2508 w16e_conv(val, p2, p1)
2512 extern unsigned short * utf8_to_euc_2bytes[];
2513 extern unsigned short ** utf8_to_euc_3bytes[];
2515 unsigned short **pp;
2519 w16w_conv(val, &c2, &c1, &c0);
2522 pp = utf8_to_euc_3bytes[c2 - 0x80];
2523 psize = sizeof_utf8_to_euc_C2;
2524 ret = w_iconv_common(c1, c0, pp, psize, p2, p1);
2526 pp = utf8_to_euc_2bytes;
2527 psize = sizeof_utf8_to_euc_2bytes;
2528 ret = w_iconv_common(c2, c1, pp, psize, p2, p1);
2530 #ifdef NUMCHAR_OPTION
2533 *p1 = CLASS_UTF16 | val;
2545 w_iconv16(c2, c1, c0)
2550 if (c2==0376 && c1==0377){
2551 utf16_mode = UTF16LE_INPUT;
2553 } else if (c2==0377 && c1==0376){
2554 utf16_mode = UTF16BE_INPUT;
2557 if (c2 != EOF && utf16_mode == UTF16BE_INPUT) {
2559 tmp=c1; c1=c2; c2=tmp;
2561 if ((c2==0 && c1 < 0x80) || c2==EOF) {
2565 ret = w16e_conv(((c2<<8)&0xff00) + c1, &c2, &c1);
2566 if (ret) return ret;
2572 w_iconv_common(c1, c0, pp, psize, p2, p1)
2574 unsigned short **pp;
2582 if (pp == 0) return 1;
2585 if (c1 < 0 || psize <= c1) return 1;
2587 if (p == 0) return 1;
2590 if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1;
2592 if (val == 0) return 1;
2599 if (c2 == SO) c2 = X0201;
2608 #ifdef UTF8_OUTPUT_ENABLE
2613 extern unsigned short euc_to_utf8_1byte[];
2614 extern unsigned short * euc_to_utf8_2bytes[];
2615 extern unsigned short * euc_to_utf8_2bytes_ms[];
2619 p = euc_to_utf8_1byte;
2621 } else if (c2 >> 8 == 0x8f){
2622 extern unsigned short * x0212_to_utf8_2bytes[];
2623 c2 = (c2&0x7f) - 0x21;
2624 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2625 p = x0212_to_utf8_2bytes[c2];
2631 c2 = (c2&0x7f) - 0x21;
2632 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2633 p = ms_ucs_map_f ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
2638 c1 = (c1 & 0x7f) - 0x21;
2639 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
2655 if (unicode_bom_f==2) {
2662 #ifdef NUMCHAR_OPTION
2663 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2664 w16w_conv(c1, &c2, &c1, &c0);
2668 if (c0) (*o_putc)(c0);
2675 output_mode = ASCII;
2677 } else if (c2 == ISO8859_1) {
2678 output_mode = ISO8859_1;
2679 (*o_putc)(c1 | 0x080);
2683 val = e2w_conv(c2, c1);
2685 w16w_conv(val, &c2, &c1, &c0);
2689 if (c0) (*o_putc)(c0);
2705 if (unicode_bom_f==2) {
2707 (*o_putc)((unsigned char)'\377');
2711 (*o_putc)((unsigned char)'\377');
2716 if (c2 == ISO8859_1) {
2719 #ifdef NUMCHAR_OPTION
2720 } else if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16) {
2721 c2 = (c1 >> 8) & 0xff;
2725 unsigned short val = e2w_conv(c2, c1);
2726 c2 = (val >> 8) & 0xff;
2745 #ifdef NUMCHAR_OPTION
2746 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2747 w16e_conv(c1, &c2, &c1);
2748 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2756 } else if (c2 == 0) {
2757 output_mode = ASCII;
2759 } else if (c2 == X0201) {
2760 output_mode = JAPANESE_EUC;
2761 (*o_putc)(SSO); (*o_putc)(c1|0x80);
2762 } else if (c2 == ISO8859_1) {
2763 output_mode = ISO8859_1;
2764 (*o_putc)(c1 | 0x080);
2766 } else if ((c2 & 0xff00) >> 8 == 0x8f){
2767 output_mode = JAPANESE_EUC;
2768 #ifdef SHIFTJIS_CP932
2771 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2772 s2e_conv(s2, s1, &c2, &c1);
2776 if ((c2 & 0xff00) >> 8 == 0x8f){
2779 (*o_putc)((c2 & 0x7f) | 0x080);
2780 (*o_putc)(c1 | 0x080);
2783 (*o_putc)((c2 & 0x7f) | 0x080);
2784 (*o_putc)(c1 | 0x080);
2788 if ((c1<0x21 || 0x7e<c1) ||
2789 (c2<0x21 || 0x7e<c2)) {
2790 set_iconv(FALSE, 0);
2791 return; /* too late to rescue this char */
2793 output_mode = JAPANESE_EUC;
2794 (*o_putc)(c2 | 0x080);
2795 (*o_putc)(c1 | 0x080);
2805 if ((ret & 0xff00) == 0x8f00){
2806 if (0x75 <= c && c <= 0x7f){
2807 ret = c + (0x109 - 0x75);
2810 if (0x75 <= c && c <= 0x7f){
2811 ret = c + (0x113 - 0x75);
2818 int x0212_unshift(c)
2822 if (0x7f <= c && c <= 0x88){
2823 ret = c + (0x75 - 0x7f);
2824 }else if (0x89 <= c && c <= 0x92){
2825 ret = (0x8f << 8) | 0x80 | (c + (0x75 - 0x89));
2829 #endif /* X0212_ENABLE */
2832 e2s_conv(c2, c1, p2, p1)
2833 int c2, c1, *p2, *p1;
2836 unsigned short *ptr;
2838 extern unsigned short *x0212_shiftjis[];
2840 if ((c2 & 0xff00) == 0x8f00){
2842 if (0x21 <= ndx && ndx <= 0x7e){
2843 ptr = x0212_shiftjis[ndx - 0x21];
2845 val = ptr[(c1 & 0x7f) - 0x21];
2855 c2 = x0212_shift(c2);
2857 #endif /* X0212_ENABLE */
2858 if ((c2 & 0xff00) == 0x8f00){
2861 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
2862 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
2871 #ifdef NUMCHAR_OPTION
2872 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2873 w16e_conv(c1, &c2, &c1);
2879 } else if (c2 == 0) {
2880 output_mode = ASCII;
2882 } else if (c2 == X0201) {
2883 output_mode = SHIFT_JIS;
2885 } else if (c2 == ISO8859_1) {
2886 output_mode = ISO8859_1;
2887 (*o_putc)(c1 | 0x080);
2889 } else if ((c2 & 0xff00) >> 8 == 0x8f){
2890 output_mode = SHIFT_JIS;
2891 if (e2s_conv(c2, c1, &c2, &c1) == 0){
2897 if ((c1<0x20 || 0x7e<c1) ||
2898 (c2<0x20 || 0x7e<c2)) {
2899 set_iconv(FALSE, 0);
2900 return; /* too late to rescue this char */
2902 output_mode = SHIFT_JIS;
2903 e2s_conv(c2, c1, &c2, &c1);
2905 #ifdef SHIFTJIS_CP932
2907 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2908 extern unsigned short cp932inv[2][189];
2909 int c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
2915 #endif /* SHIFTJIS_CP932 */
2918 if (prefix_table[(unsigned char)c1]){
2919 (*o_putc)(prefix_table[(unsigned char)c1]);
2930 #ifdef NUMCHAR_OPTION
2931 if ((c1 & CLASS_MASK) == CLASS_UTF16){
2932 w16e_conv(c1, &c2, &c1);
2936 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2939 (*o_putc)(ascii_intro);
2940 output_mode = ASCII;
2944 } else if ((c2 & 0xff00) >> 8 == 0x8f){
2945 if (output_mode!=X0212) {
2946 output_mode = X0212;
2952 (*o_putc)(c2 & 0x7f);
2955 } else if (c2==X0201) {
2956 if (output_mode!=X0201) {
2957 output_mode = X0201;
2963 } else if (c2==ISO8859_1) {
2964 /* iso8859 introduction, or 8th bit on */
2965 /* Can we convert in 7bit form using ESC-'-'-A ?
2967 output_mode = ISO8859_1;
2969 } else if (c2 == 0) {
2970 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2973 (*o_putc)(ascii_intro);
2974 output_mode = ASCII;
2978 if (output_mode != X0208) {
2979 output_mode = X0208;
2982 (*o_putc)(kanji_intro);
2984 if (c1<0x20 || 0x7e<c1)
2986 if (c2<0x20 || 0x7e<c2)
2998 mime_prechar(c2, c1);
2999 (*o_base64conv)(c2,c1);
3003 static int broken_buf[3];
3004 static int broken_counter = 0;
3005 static int broken_last = 0;
3012 if (broken_counter>0) {
3013 return broken_buf[--broken_counter];
3016 if (c=='$' && broken_last != ESC
3017 && (input_mode==ASCII || input_mode==X0201)) {
3020 if (c1=='@'|| c1=='B') {
3021 broken_buf[0]=c1; broken_buf[1]=c;
3028 } else if (c=='(' && broken_last != ESC
3029 && (input_mode==X0208 || input_mode==X0201)) { /* ) */
3032 if (c1=='J'|| c1=='B') {
3033 broken_buf[0]=c1; broken_buf[1]=c;
3051 if (broken_counter<2)
3052 broken_buf[broken_counter++]=c;
3056 static int prev_cr = 0;
3064 if (! (c2==0&&c1==NL) ) {
3070 } else if (c1=='\r') {
3072 } else if (c1=='\n') {
3073 if (crmode_f==CRLF) {
3074 (*o_crconv)(0,'\r');
3075 } else if (crmode_f==CR) {
3076 (*o_crconv)(0,'\r');
3080 } else if (c1!='\032' || crmode_f!=NL){
3086 Return value of fold_conv()
3088 \n add newline and output char
3089 \r add newline and output nothing
3092 1 (or else) normal output
3094 fold state in prev (previous character)
3096 >0x80 Japanese (X0208/X0201)
3101 This fold algorthm does not preserve heading space in a line.
3102 This is the main difference from fmt.
3105 #define char_size(c2,c1) (c2?2:1)
3114 if (c1== '\r' && !fold_preserve_f) {
3115 fold_state=0; /* ignore cr */
3116 }else if (c1== '\n'&&f_prev=='\r' && fold_preserve_f) {
3118 fold_state=0; /* ignore cr */
3119 } else if (c1== BS) {
3120 if (f_line>0) f_line--;
3122 } else if (c2==EOF && f_line != 0) { /* close open last line */
3124 } else if ((c1=='\n' && !fold_preserve_f)
3125 || ((c1=='\r'||(c1=='\n'&&f_prev!='\r'))
3126 && fold_preserve_f)) {
3128 if (fold_preserve_f) {
3132 } else if ((f_prev == c1 && !fold_preserve_f)
3133 || (f_prev == '\n' && fold_preserve_f)
3134 ) { /* duplicate newline */
3137 fold_state = '\n'; /* output two newline */
3143 if (f_prev&0x80) { /* Japanese? */
3145 fold_state = 0; /* ignore given single newline */
3146 } else if (f_prev==' ') {
3150 if (++f_line<=fold_len)
3154 fold_state = '\r'; /* fold and output nothing */
3158 } else if (c1=='\f') {
3163 fold_state = '\n'; /* output newline and clear */
3164 } else if ( (c2==0 && c1==' ')||
3165 (c2==0 && c1=='\t')||
3166 (c2=='!'&& c1=='!')) {
3167 /* X0208 kankaku or ascii space */
3168 if (f_prev == ' ') {
3169 fold_state = 0; /* remove duplicate spaces */
3172 if (++f_line<=fold_len)
3173 fold_state = ' '; /* output ASCII space only */
3175 f_prev = ' '; f_line = 0;
3176 fold_state = '\r'; /* fold and output nothing */
3180 prev0 = f_prev; /* we still need this one... , but almost done */
3182 if (c2 || c2==X0201)
3183 f_prev |= 0x80; /* this is Japanese */
3184 f_line += char_size(c2,c1);
3185 if (f_line<=fold_len) { /* normal case */
3188 if (f_line>=fold_len+fold_margin) { /* too many kinsou suspension */
3189 f_line = char_size(c2,c1);
3190 fold_state = '\n'; /* We can't wait, do fold now */
3191 } else if (c2==X0201) {
3192 /* simple kinsoku rules return 1 means no folding */
3193 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
3194 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
3195 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
3196 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
3197 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
3198 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
3199 else if (SPACE<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
3201 fold_state = '\n';/* add one new f_line before this character */
3204 fold_state = '\n';/* add one new f_line before this character */
3207 /* kinsoku point in ASCII */
3208 if ( c1==')'|| /* { [ ( */
3219 /* just after special */
3220 } else if (!is_alnum(prev0)) {
3221 f_line = char_size(c2,c1);
3223 } else if ((prev0==' ') || /* ignored new f_line */
3224 (prev0=='\n')|| /* ignored new f_line */
3225 (prev0&0x80)) { /* X0208 - ASCII */
3226 f_line = char_size(c2,c1);
3227 fold_state = '\n';/* add one new f_line before this character */
3229 fold_state = 1; /* default no fold in ASCII */
3233 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
3234 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
3235 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
3236 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
3237 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
3238 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
3239 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
3240 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
3241 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
3242 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
3243 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
3244 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
3245 /* default no fold in kinsoku */
3248 f_line = char_size(c2,c1);
3249 /* add one new f_line before this character */
3252 f_line = char_size(c2,c1);
3254 /* add one new f_line before this character */
3259 /* terminator process */
3260 switch(fold_state) {
3279 int z_prev2=0,z_prev1=0;
3286 /* if (c2) c1 &= 0x7f; assertion */
3288 if (x0201_f && z_prev2==X0201) { /* X0201 */
3289 if (c1==(0xde&0x7f)) { /*
\e$BByE@
\e(B */
3291 (*o_zconv)(dv[(z_prev1-SPACE)*2],dv[(z_prev1-SPACE)*2+1]);
3293 } else if (c1==(0xdf&0x7f)&&ev[(z_prev1-SPACE)*2]) { /*
\e$BH>ByE@
\e(B */
3295 (*o_zconv)(ev[(z_prev1-SPACE)*2],ev[(z_prev1-SPACE)*2+1]);
3299 (*o_zconv)(cv[(z_prev1-SPACE)*2],cv[(z_prev1-SPACE)*2+1]);
3308 if (x0201_f && c2==X0201) {
3309 if (dv[(c1-SPACE)*2]||ev[(c1-SPACE)*2]) {
3310 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
3311 z_prev1 = c1; z_prev2 = c2;
3314 (*o_zconv)(cv[(c1-SPACE)*2],cv[(c1-SPACE)*2+1]);
3319 /* JISX0208 Alphabet */
3320 if (alpha_f && c2 == 0x23 ) {
3322 } else if (alpha_f && c2 == 0x21 ) {
3323 /* JISX0208 Kigou */
3328 } else if (alpha_f&0x4) {
3333 } else if (0x20<c1 && c1<0x7f && fv[c1-0x20]) {
3339 case '>': entity = ">"; break;
3340 case '<': entity = "<"; break;
3341 case '\"': entity = """; break;
3342 case '&': entity = "&"; break;
3345 while (*entity) (*o_zconv)(0, *entity++);
3355 #define rot13(c) ( \
3357 (c <= 'M') ? (c + 13): \
3358 (c <= 'Z') ? (c - 13): \
3360 (c <= 'm') ? (c + 13): \
3361 (c <= 'z') ? (c - 13): \
3365 #define rot47(c) ( \
3367 ( c <= 'O' ) ? (c + 47) : \
3368 ( c <= '~' ) ? (c - 47) : \
3376 if (c2==0 || c2==X0201 || c2==ISO8859_1) {
3382 (*o_rot_conv)(c2,c1);
3389 if ((hira_f & 1) && c2==0x25 && 0x20<c1 && c1<0x74) {
3391 } else if ((hira_f & 2) && c2==0x24 && 0x20<c1 && c1<0x74) {
3394 (*o_hira_conv)(c2,c1);
3399 iso2022jp_check_conv(c2,c1)
3402 static int range[RANGE_NUM_MAX][2] = {
3425 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
3429 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
3434 for (i = 0; i < RANGE_NUM_MAX; i++) {
3435 start = range[i][0];
3438 if (c >= start && c <= end) {
3443 (*o_iso2022jp_check_conv)(c2,c1);
3447 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
3449 unsigned char *mime_pattern[] = {
3450 (unsigned char *)"\075?EUC-JP?B?",
3451 (unsigned char *)"\075?SHIFT_JIS?B?",
3452 (unsigned char *)"\075?ISO-8859-1?Q?",
3453 (unsigned char *)"\075?ISO-8859-1?B?",
3454 (unsigned char *)"\075?ISO-2022-JP?B?",
3455 (unsigned char *)"\075?ISO-2022-JP?Q?",
3456 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3457 (unsigned char *)"\075?UTF-8?B?",
3458 (unsigned char *)"\075?UTF-8?Q?",
3460 (unsigned char *)"\075?US-ASCII?Q?",
3465 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
3466 int (*mime_priority_func[])PROTO((int c2, int c1, int c0)) = {
3467 e_iconv, s_iconv, 0, 0, 0, 0,
3468 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3474 int mime_encode[] = {
3475 JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201,
3476 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3483 int mime_encode_method[] = {
3484 'B', 'B','Q', 'B', 'B', 'Q',
3485 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3493 #define MAXRECOVER 20
3495 /* I don't trust portablity of toupper */
3496 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
3497 #define nkf_isdigit(c) ('0'<=c && c<='9')
3498 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
3499 #define nkf_isblank(c) (c == SPACE || c == TAB)
3500 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == NL)
3501 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
3502 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
3507 if (i_getc!=mime_getc) {
3508 i_mgetc = i_getc; i_getc = mime_getc;
3509 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
3510 if(mime_f==STRICT_MIME) {
3511 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
3512 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
3518 unswitch_mime_getc()
3520 if(mime_f==STRICT_MIME) {
3521 i_mgetc = i_mgetc_buf;
3522 i_mungetc = i_mungetc_buf;
3525 i_ungetc = i_mungetc;
3529 mime_begin_strict(f)
3534 unsigned char *p,*q;
3535 int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
3537 mime_decode_mode = FALSE;
3538 /* =? has been checked */
3540 p = mime_pattern[j];
3543 for(i=2;p[i]>' ';i++) { /* start at =? */
3544 if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) {
3545 /* pattern fails, try next one */
3547 while ((p = mime_pattern[++j])) {
3548 for(k=2;k<i;k++) /* assume length(p) > i */
3549 if (p[k]!=q[k]) break;
3550 if (k==i && nkf_toupper(c1)==p[k]) break;
3552 if (p) continue; /* found next one, continue */
3553 /* all fails, output from recovery buffer */
3561 mime_decode_mode = p[i-2];
3563 set_iconv(FALSE, mime_priority_func[j]);
3564 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
3566 if (mime_decode_mode=='B') {
3567 mimebuf_f = unbuf_f;
3569 /* do MIME integrity check */
3570 return mime_integrity(f,mime_pattern[j]);
3582 /* we don't keep eof of Fifo, becase it contains ?= as
3583 a terminator. It was checked in mime_integrity. */
3584 return ((mimebuf_f)?
3585 (*i_mgetc_buf)(f):Fifo(mime_input++));
3589 mime_ungetc_buf(c,f)
3594 (*i_mungetc_buf)(c,f);
3596 Fifo(--mime_input)=c;
3607 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
3608 /* re-read and convert again from mime_buffer. */
3610 /* =? has been checked */
3612 Fifo(mime_last++)='='; Fifo(mime_last++)='?';
3613 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
3614 /* We accept any character type even if it is breaked by new lines */
3615 c1 = (*i_getc)(f); Fifo(mime_last++)= c1 ;
3616 if (c1=='\n'||c1==' '||c1=='\r'||
3617 c1=='-'||c1=='_'||is_alnum(c1) ) continue;
3619 /* Failed. But this could be another MIME preemble */
3627 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3628 if (!(++i<MAXRECOVER) || c1==EOF) break;
3629 if (c1=='b'||c1=='B') {
3630 mime_decode_mode = 'B';
3631 } else if (c1=='q'||c1=='Q') {
3632 mime_decode_mode = 'Q';
3636 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3637 if (!(++i<MAXRECOVER) || c1==EOF) break;
3639 mime_decode_mode = FALSE;
3645 if (!mime_decode_mode) {
3646 /* false MIME premble, restart from mime_buffer */
3647 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
3648 /* Since we are in MIME mode until buffer becomes empty, */
3649 /* we never go into mime_begin again for a while. */
3652 /* discard mime preemble, and goto MIME mode */
3654 /* do no MIME integrity check */
3655 return c1; /* used only for checking EOF */
3670 fprintf(stderr, "%s\n", str);
3676 set_input_codename (codename)
3681 strcmp(codename, "") != 0 &&
3682 strcmp(codename, input_codename) != 0)
3684 is_inputcode_mixed = TRUE;
3686 input_codename = codename;
3687 is_inputcode_set = TRUE;
3692 print_guessed_code (filename)
3695 char *codename = "BINARY";
3696 if (!is_inputcode_mixed) {
3697 if (strcmp(input_codename, "") == 0) {
3700 codename = input_codename;
3703 if (filename != NULL) printf("%s:", filename);
3704 printf("%s\n", codename);
3712 if (nkf_isdigit(x)) return x - '0';
3713 return nkf_toupper(x) - 'A' + 10;
3718 #ifdef ANSI_C_PROTOTYPE
3719 int hex_getc(int ch, FILE *f, int (*g)(FILE *f), int (*u)(int c, FILE *f))
3722 hex_getc(ch, f, g, u)
3735 if (!nkf_isxdigit(c2)){
3740 if (!nkf_isxdigit(c3)){
3745 return (hex2bin(c2) << 4) | hex2bin(c3);
3752 return hex_getc(':', f, i_cgetc, i_cungetc);
3760 return (*i_cungetc)(c, f);
3767 return hex_getc('%', f, i_ugetc, i_uungetc);
3775 return (*i_uungetc)(c, f);
3779 #ifdef NUMCHAR_OPTION
3784 int (*g)() = i_ngetc;
3785 int (*u)() = i_nungetc;
3796 if (buf[i] == 'x' || buf[i] == 'X'){
3797 for (j = 0; j < 5; j++){
3799 if (!nkf_isxdigit(buf[i])){
3806 c |= hex2bin(buf[i]);
3809 for (j = 0; j < 6; j++){
3813 if (!nkf_isdigit(buf[i])){
3820 c += hex2bin(buf[i]);
3826 return CLASS_UTF16 | c;
3836 numchar_ungetc(c, f)
3840 return (*i_nungetc)(c, f);
3849 int c1, c2, c3, c4, cc;
3850 int t1, t2, t3, t4, mode, exit_mode;
3854 int lwsp_size = 128;
3856 if (mime_top != mime_last) { /* Something is in FIFO */
3857 return Fifo(mime_top++);
3859 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
3860 mime_decode_mode=FALSE;
3861 unswitch_mime_getc();
3862 return (*i_getc)(f);
3865 if (mimebuf_f == FIXED_MIME)
3866 exit_mode = mime_decode_mode;
3869 if (mime_decode_mode == 'Q') {
3870 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3872 if (c1=='_') return ' ';
3873 if (c1!='=' && c1!='?') {
3877 mime_decode_mode = exit_mode; /* prepare for quit */
3878 if (c1<=' ') return c1;
3879 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
3880 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
3881 /* end Q encoding */
3882 input_mode = exit_mode;
3884 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
3885 if (lwsp_buf==NULL) {
3886 perror("can't malloc");
3889 while ((c1=(*i_getc)(f))!=EOF) {
3894 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
3902 if ((c1=(*i_getc)(f))!=EOF && c1 == NL) {
3903 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
3918 lwsp_buf[lwsp_count] = c1;
3919 if (lwsp_count++>lwsp_size){
3921 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
3922 if (lwsp_buf_new==NULL) {
3925 perror("can't realloc");
3928 lwsp_buf = lwsp_buf_new;
3934 if (lwsp_count > 0) {
3935 if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) {
3939 for(lwsp_count--;lwsp_count>0;lwsp_count--)
3940 i_ungetc(lwsp_buf[lwsp_count],f);
3948 if (c1=='='&&c2<' ') { /* this is soft wrap */
3949 while((c1 = (*i_mgetc)(f)) <=' ') {
3950 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3952 mime_decode_mode = 'Q'; /* still in MIME */
3953 goto restart_mime_q;
3956 mime_decode_mode = 'Q'; /* still in MIME */
3960 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
3961 if (c2<=' ') return c2;
3962 mime_decode_mode = 'Q'; /* still in MIME */
3963 #define hex(c) (('0'<=c&&c<='9')?(c-'0'):\
3964 ('A'<=c&&c<='F')?(c-'A'+10):('a'<=c&&c<='f')?(c-'a'+10):0)
3965 return ((hex(c2)<<4) + hex(c3));
3968 if (mime_decode_mode != 'B') {
3969 mime_decode_mode = FALSE;
3970 return (*i_mgetc)(f);
3974 /* Base64 encoding */
3976 MIME allows line break in the middle of
3977 Base64, but we are very pessimistic in decoding
3978 in unbuf mode because MIME encoded code may broken by
3979 less or editor's control sequence (such as ESC-[-K in unbuffered
3980 mode. ignore incomplete MIME.
3982 mode = mime_decode_mode;
3983 mime_decode_mode = exit_mode; /* prepare for quit */
3985 while ((c1 = (*i_mgetc)(f))<=' ') {
3990 if ((c2 = (*i_mgetc)(f))<=' ') {
3993 if (mime_f != STRICT_MIME) goto mime_c2_retry;
3994 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3997 if ((c1 == '?') && (c2 == '=')) {
4000 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
4001 if (lwsp_buf==NULL) {
4002 perror("can't malloc");
4005 while ((c1=(*i_getc)(f))!=EOF) {
4010 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4018 if ((c1=(*i_getc)(f))!=EOF) {
4022 } else if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4037 lwsp_buf[lwsp_count] = c1;
4038 if (lwsp_count++>lwsp_size){
4040 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4041 if (lwsp_buf_new==NULL) {
4044 perror("can't realloc");
4047 lwsp_buf = lwsp_buf_new;
4053 if (lwsp_count > 0) {
4054 if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) {
4058 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4059 i_ungetc(lwsp_buf[lwsp_count],f);
4068 if ((c3 = (*i_mgetc)(f))<=' ') {
4071 if (mime_f != STRICT_MIME) goto mime_c3_retry;
4072 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4076 if ((c4 = (*i_mgetc)(f))<=' ') {
4079 if (mime_f != STRICT_MIME) goto mime_c4_retry;
4080 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4084 mime_decode_mode = mode; /* still in MIME sigh... */
4086 /* BASE 64 decoding */
4088 t1 = 0x3f & base64decode(c1);
4089 t2 = 0x3f & base64decode(c2);
4090 t3 = 0x3f & base64decode(c3);
4091 t4 = 0x3f & base64decode(c4);
4092 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
4094 Fifo(mime_last++) = cc;
4095 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
4097 Fifo(mime_last++) = cc;
4098 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
4100 Fifo(mime_last++) = cc;
4105 return Fifo(mime_top++);
4113 Fifo(--mime_top) = c;
4124 /* In buffered mode, read until =? or NL or buffer full
4126 mime_input = mime_top;
4127 mime_last = mime_top;
4128 while(*p) Fifo(mime_input++) = *p++;
4131 while((c=(*i_getc)(f))!=EOF) {
4132 if (((mime_input-mime_top)&MIME_BUF_MASK)==0) {
4133 break; /* buffer full */
4135 if (c=='=' && d=='?') {
4136 /* checked. skip header, start decode */
4137 Fifo(mime_input++) = c;
4138 /* mime_last_input = mime_input; */
4143 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
4145 /* Should we check length mod 4? */
4146 Fifo(mime_input++) = c;
4149 /* In case of Incomplete MIME, no MIME decode */
4150 Fifo(mime_input++) = c;
4151 mime_last = mime_input; /* point undecoded buffer */
4152 mime_decode_mode = 1; /* no decode on Fifo last in mime_getc */
4153 switch_mime_getc(); /* anyway we need buffered getc */
4164 i = c - 'A'; /* A..Z 0-25 */
4166 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
4168 } else if (c > '/') {
4169 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
4170 } else if (c == '+') {
4171 i = '>' /* 62 */ ; /* + 62 */
4173 i = '?' /* 63 */ ; /* / 63 */
4178 static char basis_64[] =
4179 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
4182 #define MIMEOUT_BUF_LENGTH (60)
4183 char mimeout_buf[MIMEOUT_BUF_LENGTH+1];
4184 int mimeout_buf_count = 0;
4185 int mimeout_preserve_space = 0;
4186 #define itoh4(c) (c>=10?c+'A'-10:c+'0')
4195 p = mime_pattern[0];
4196 for(i=0;mime_encode[i];i++) {
4197 if (mode == mime_encode[i]) {
4198 p = mime_pattern[i];
4202 mimeout_mode = mime_encode_method[i];
4205 if (base64_count>45) {
4209 if (!mimeout_preserve_space && mimeout_buf_count>0
4210 && (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
4211 || mimeout_buf[i]==CR || mimeout_buf[i]==NL )) {
4215 if (!mimeout_preserve_space) {
4216 for (;i<mimeout_buf_count;i++) {
4217 if (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
4218 || mimeout_buf[i]==CR || mimeout_buf[i]==NL ) {
4219 (*o_mputc)(mimeout_buf[i]);
4226 mimeout_preserve_space = FALSE;
4232 j = mimeout_buf_count;
4233 mimeout_buf_count = 0;
4235 mime_putc(mimeout_buf[i]);
4251 switch(mimeout_mode) {
4256 (*o_mputc)(basis_64[((b64c & 0x3)<< 4)]);
4262 (*o_mputc)(basis_64[((b64c & 0xF) << 2)]);
4268 if (mimeout_f!=FIXED_MIME) {
4270 } else if (mimeout_mode != 'Q')
4279 switch(mimeout_mode) {
4284 } else if (c==CR||c==NL) {
4287 } else if(c<SPACE||c=='='||c=='?'||c=='_'||DEL<=c) {
4289 (*o_mputc)(itoh4(((c>>4)&0xf)));
4290 (*o_mputc)(itoh4((c&0xf)));
4299 (*o_mputc)(basis_64[c>>2]);
4304 (*o_mputc)(basis_64[((b64c & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
4310 (*o_mputc)(basis_64[((b64c & 0xF) << 2) | ((c & 0xC0) >>6)]);
4311 (*o_mputc)(basis_64[c & 0x3F]);
4322 int mime_lastchar2, mime_lastchar1;
4324 void mime_prechar(c2, c1)
4329 if (base64_count + mimeout_buf_count/3*4> 66){
4330 (*o_base64conv)(EOF,0);
4331 (*o_base64conv)(0,NL);
4332 (*o_base64conv)(0,SPACE);
4334 }/*else if (mime_lastchar2){
4335 if (c1 <=DEL && !nkf_isspace(c1)){
4336 (*o_base64conv)(0,SPACE);
4340 if (c2 && mime_lastchar2 == 0
4341 && mime_lastchar1 && !nkf_isspace(mime_lastchar1)){
4342 (*o_base64conv)(0,SPACE);
4345 mime_lastchar2 = c2;
4346 mime_lastchar1 = c1;
4357 if (mimeout_f == FIXED_MIME){
4358 if (mimeout_mode == 'Q'){
4359 if (base64_count > 71){
4360 if (c!=CR && c!=NL) {
4367 if (base64_count > 71){
4372 if (c == EOF) { /* c==EOF */
4376 if (c != EOF) { /* c==EOF */
4382 /* mimeout_f != FIXED_MIME */
4384 if (c == EOF) { /* c==EOF */
4385 j = mimeout_buf_count;
4386 mimeout_buf_count = 0;
4389 /*if (nkf_isspace(mimeout_buf[i])){
4392 mimeout_addchar(mimeout_buf[i]);
4396 (*o_mputc)(mimeout_buf[i]);
4402 if (mimeout_mode=='Q') {
4403 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
4415 if (mimeout_buf_count > 0){
4416 lastchar = mimeout_buf[mimeout_buf_count - 1];
4421 if (!mimeout_mode) {
4422 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1)) {
4423 if (nkf_isspace(c)) {
4424 if (c==CR || c==NL) {
4427 for (i=0;i<mimeout_buf_count;i++) {
4428 (*o_mputc)(mimeout_buf[i]);
4432 mimeout_buf_count = 1;
4434 if (base64_count > 1
4435 && base64_count + mimeout_buf_count > 76){
4438 if (!nkf_isspace(mimeout_buf[0])){
4443 mimeout_buf[mimeout_buf_count++] = c;
4444 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
4445 open_mime(output_mode);
4450 if (lastchar==CR || lastchar == NL){
4451 for (i=0;i<mimeout_buf_count;i++) {
4452 (*o_mputc)(mimeout_buf[i]);
4455 mimeout_buf_count = 0;
4457 if (lastchar==SPACE) {
4458 for (i=0;i<mimeout_buf_count-1;i++) {
4459 (*o_mputc)(mimeout_buf[i]);
4462 mimeout_buf[0] = SPACE;
4463 mimeout_buf_count = 1;
4465 open_mime(output_mode);
4468 /* mimeout_mode == 'B', 1, 2 */
4469 if ( c<=DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
4470 if (lastchar == CR || lastchar == NL){
4471 if (nkf_isblank(c)) {
4472 for (i=0;i<mimeout_buf_count;i++) {
4473 mimeout_addchar(mimeout_buf[i]);
4475 mimeout_buf_count = 0;
4476 } else if (SPACE<c && c<DEL) {
4478 for (i=0;i<mimeout_buf_count;i++) {
4479 (*o_mputc)(mimeout_buf[i]);
4482 mimeout_buf_count = 0;
4485 if (c==SPACE || c==TAB || c==CR || c==NL) {
4486 for (i=0;i<mimeout_buf_count;i++) {
4487 if (SPACE<mimeout_buf[i] && mimeout_buf[i]<DEL) {
4489 for (i=0;i<mimeout_buf_count;i++) {
4490 (*o_mputc)(mimeout_buf[i]);
4493 mimeout_buf_count = 0;
4496 mimeout_buf[mimeout_buf_count++] = c;
4497 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
4499 for (i=0;i<mimeout_buf_count;i++) {
4500 (*o_mputc)(mimeout_buf[i]);
4503 mimeout_buf_count = 0;
4507 if (mimeout_buf_count>0 && SPACE<c && c!='=') {
4508 mimeout_buf[mimeout_buf_count++] = c;
4509 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
4510 j = mimeout_buf_count;
4511 mimeout_buf_count = 0;
4513 mimeout_addchar(mimeout_buf[i]);
4520 if (mimeout_buf_count>0) {
4521 j = mimeout_buf_count;
4522 mimeout_buf_count = 0;
4524 if (mimeout_buf[i]==CR || mimeout_buf[i]==NL)
4526 mimeout_addchar(mimeout_buf[i]);
4532 (*o_mputc)(mimeout_buf[i]);
4534 open_mime(output_mode);
4541 #if defined(PERL_XS) || defined(WIN32DLL)
4546 struct input_code *p = input_code_list;
4559 mime_f = STRICT_MIME;
4560 mime_decode_f = FALSE;
4565 #if defined(MSDOS) || defined(__OS2__)
4570 iso2022jp_f = FALSE;
4571 #ifdef UTF8_OUTPUT_ENABLE
4574 ms_ucs_map_f = FALSE;
4586 is_inputcode_mixed = FALSE;
4587 is_inputcode_set = FALSE;
4591 #ifdef SHIFTJIS_CP932
4597 for (i = 0; i < 256; i++){
4598 prefix_table[i] = 0;
4601 #ifdef UTF8_INPUT_ENABLE
4602 utf16_mode = UTF16LE_INPUT;
4604 mimeout_buf_count = 0;
4609 fold_preserve_f = FALSE;
4612 kanji_intro = DEFAULT_J;
4613 ascii_intro = DEFAULT_R;
4614 fold_margin = FOLD_MARGIN;
4615 output_conv = DEFAULT_CONV;
4616 oconv = DEFAULT_CONV;
4617 o_zconv = no_connection;
4618 o_fconv = no_connection;
4619 o_crconv = no_connection;
4620 o_rot_conv = no_connection;
4621 o_hira_conv = no_connection;
4622 o_base64conv = no_connection;
4623 o_iso2022jp_check_conv = no_connection;
4626 i_ungetc = std_ungetc;
4628 i_bungetc = std_ungetc;
4631 i_mungetc = std_ungetc;
4632 i_mgetc_buf = std_getc;
4633 i_mungetc_buf = std_ungetc;
4634 output_mode = ASCII;
4637 mime_decode_mode = FALSE;
4643 z_prev2=0,z_prev1=0;
4645 iconv_for_check = 0;
4647 input_codename = "";
4655 no_connection(c2,c1)
4658 no_connection2(c2,c1,0);
4662 no_connection2(c2,c1,c0)
4665 fprintf(stderr,"nkf internal module connection failure.\n");
4667 return 0; /* LINT */
4672 #define fprintf dllprintf
4677 fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
4678 fprintf(stderr,"Flags:\n");
4679 fprintf(stderr,"b,u Output is buffered (DEFAULT),Output is unbuffered\n");
4680 #ifdef DEFAULT_CODE_SJIS
4681 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC), UTF-8\n");
4683 #ifdef DEFAULT_CODE_JIS
4684 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC), UTF-8\n");
4686 #ifdef DEFAULT_CODE_EUC
4687 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT), UTF-8\n");
4689 #ifdef DEFAULT_CODE_UTF8
4690 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC), UTF-8 (DEFAULT)\n");
4692 #ifdef UTF8_OUTPUT_ENABLE
4693 fprintf(stderr," After 'w' you can add more options. (80?|16((B|L)0?)?) \n");
4695 fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC), UTF-8\n");
4696 #ifdef UTF8_INPUT_ENABLE
4697 fprintf(stderr," After 'W' you can add more options. (8|16(B|L)?) \n");
4699 fprintf(stderr,"t no conversion\n");
4700 fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n");
4701 fprintf(stderr,"r {de/en}crypt ROT13/47\n");
4702 fprintf(stderr,"h 1 hirakana->katakana, 2 katakana->hirakana,3 both\n");
4703 fprintf(stderr,"v Show this usage. V: show version\n");
4704 fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
4705 fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
4706 fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
4707 fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
4708 fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces,\n");
4709 fprintf(stderr," 3: Convert HTML Entity\n");
4710 fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
4711 fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
4713 fprintf(stderr,"T Text mode output\n");
4715 fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
4716 fprintf(stderr,"d,c Delete \\r in line feed and \\032, Add \\r in line feed\n");
4717 fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n");
4718 fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
4719 fprintf(stderr,"long name options\n");
4720 fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n");
4721 fprintf(stderr," --jis,--euc,--sjis,--utf8,--utf16,--mime,--base64 convert for the code\n");
4722 fprintf(stderr," --hiragana, --katakana Hiragana/Katakana Conversion\n");
4723 fprintf(stderr," --x0212 Convert JISX0212\n");
4724 fprintf(stderr," --cp932, --no-cp932 CP932 compatibility\n");
4726 fprintf(stderr," --cap-input, --url-input Convert hex after ':' or '%%'\n");
4728 #ifdef NUMCHAR_OPTION
4729 fprintf(stderr," --numchar-input Convert Unicode Character Reference\n");
4731 #ifdef UTF8_OUTPUT_ENABLE
4732 fprintf(stderr," --ms-ucs-map Microsoft UCS Mapping Compatible\n");
4735 fprintf(stderr," --overwrite Overwrite original listed files by filtered result\n");
4737 fprintf(stderr," -g, --guess Guess the input code\n");
4738 fprintf(stderr," --help,--version\n");
4745 fprintf(stderr,"Network Kanji Filter Version %s (%s) "
4746 #if defined(MSDOS) && !defined(__WIN32__) && !defined(__WIN16__)
4749 #if defined(MSDOS) && defined(__WIN16__)
4752 #if defined(MSDOS) && defined(__WIN32__)
4758 ,NKF_VERSION,NKF_RELEASE_DATE);
4759 fprintf(stderr,"\n%s\n",CopyRight);
4764 **
\e$B%Q%C%A@):n<T
\e(B
4765 ** void@merope.pleiades.or.jp (Kusakabe Youichi)
4766 ** NIDE Naoyuki <nide@ics.nara-wu.ac.jp>
4767 ** ohta@src.ricoh.co.jp (Junn Ohta)
4768 ** inouet@strl.nhk.or.jp (Tomoyuki Inoue)
4769 ** kiri@pulser.win.or.jp (Tetsuaki Kiriyama)
4770 ** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp>
4771 ** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe)
4772 ** kono@ie.u-ryukyu.ac.jp (Shinji Kono)
4773 ** GHG00637@nifty-serve.or.jp (COW)