1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
5 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
6 ** Copyright (C) 1996,1998
8 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
9 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
10 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
11 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
13 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
14 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
15 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
16 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
17 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
18 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
20 ** Everyone is permitted to do anything on this program
21 ** including copying, modifying, improving,
22 ** as long as you don't try to pretend that you wrote it.
23 ** i.e., the above copyright notice has to appear in all copies.
24 ** Binary distribution requires original version messages.
25 ** You don't have to ask before copying, redistribution or publishing.
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
29 /***********************************************************************
30 ** UTF-8
\e$B%5%]!<%H$K$D$$$F
\e(B
31 **
\e$B=>Mh$N
\e(B nkf
\e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9
\e(B
32 ** nkf -e
\e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G
\e(B UTF-8
\e$B$HH=Dj$5$l$l$P!"
\e(B
33 **
\e$B$=$N$^$^
\e(B euc-jp
\e$B$KJQ49$5$l$^$9
\e(B
35 **
\e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#
\e(B
36 ** (
\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O
\e(B)
38 **
\e$B2?$+LdBj$r8+$D$1$?$i!"
\e(B
39 ** E-Mail: furukawa@tcp-ip.or.jp
40 **
\e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#
\e(B
41 ***********************************************************************/
42 /* $Id: nkf.c,v 1.68 2005/04/17 18:47:15 rei_furukawa Exp $ */
43 #define NKF_VERSION "2.0.5"
44 #define NKF_RELEASE_DATE "2005-04-10"
47 static char *CopyRight =
48 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2005 Kono, Furukawa, Naruse";
55 ** USAGE: nkf [flags] [file]
58 ** b Output is buffered (DEFAULT)
59 ** u Output is unbuffered
63 ** j Outout code is JIS 7 bit (DEFAULT SELECT)
64 ** s Output code is MS Kanji (DEFAULT SELECT)
65 ** e Output code is AT&T JIS (DEFAULT SELECT)
66 ** w Output code is AT&T JIS (DEFAULT SELECT)
67 ** l Output code is JIS 7bit and ISO8859-1 Latin-1
69 ** m MIME conversion for ISO-2022-JP
70 ** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
71 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
72 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
73 ** M MIME output conversion
75 ** r {de/en}crypt ROT13/47
79 ** T Text mode output (for MS-DOS)
81 ** x Do not convert X0201 kana into X0208
82 ** Z Convert X0208 alphabet to ASCII
87 ** B try to fix broken JIS, missing Escape
88 ** B[1-9] broken level
90 ** O Output to 'nkf.out' file or last file name
91 ** d Delete \r in line feed
92 ** c Add \r in line feed
93 ** -- other long option
94 ** -- ignore following option (don't use with -O )
98 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__)) && !defined(MSDOS)
100 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
115 #if defined(MSDOS) || defined(__OS2__)
122 #define setbinmode(fp) fsetbin(fp)
123 #else /* Microsoft C, Turbo C */
124 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
126 #else /* UNIX,OS/2 */
127 #define setbinmode(fp)
130 #ifdef _IOFBF /* SysV and MSDOS, Windows */
131 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
133 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
136 /*Borland C++ 4.5 EasyWin*/
137 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
146 /* added by satoru@isoternet.org */
148 #include <sys/stat.h>
149 #ifndef MSDOS /* UNIX, OS/2 */
152 #else /* defined(MSDOS) */
154 #ifdef __BORLANDC__ /* BCC32 */
156 #else /* !defined(__BORLANDC__) */
157 #include <sys/utime.h>
158 #endif /* (__BORLANDC__) */
159 #else /* !defined(__WIN32__) */
160 #if defined(_MSC_VER) || defined(__MINGW32__) /* VC++, MinGW */
161 #include <sys/utime.h>
162 #elif defined(__TURBOC__) /* BCC */
164 #elif defined(LSI_C) /* LSI C */
165 #endif /* (__WIN32__) */
177 /* state of output_mode and input_mode
195 /* Input Assumption */
199 #define LATIN1_INPUT 6
201 #define STRICT_MIME 8
206 #define JAPANESE_EUC 10
210 #define UTF8_INPUT 13
211 #define UTF16BE_INPUT 14
212 #define UTF16LE_INPUT 15
232 #define is_alnum(c) \
233 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
235 #define HOLD_SIZE 1024
236 #define IOBUF_SIZE 16384
238 #define DEFAULT_J 'B'
239 #define DEFAULT_R 'B'
241 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
242 #define SJ6394 0x0161 /* 63 - 94 ku offset */
244 #define RANGE_NUM_MAX 18
249 #if defined( UTF8_OUTPUT_ENABLE ) || defined( UTF8_INPUT_ENABLE )
250 #define sizeof_euc_utf8 94
251 #define sizeof_euc_to_utf8_1byte 94
252 #define sizeof_euc_to_utf8_2bytes 94
253 #define sizeof_utf8_to_euc_C2 64
254 #define sizeof_utf8_to_euc_E5B8 64
255 #define sizeof_utf8_to_euc_2bytes 112
256 #define sizeof_utf8_to_euc_3bytes 112
259 /* MIME preprocessor */
262 #ifdef EASYWIN /*Easy Win */
263 extern POINT _BufferSize;
266 /* function prototype */
268 #ifdef ANSI_C_PROTOTYPE
270 #define STATIC static
282 void (*status_func)PROTO((struct input_code *, int));
283 int (*iconv_func)PROTO((int c2, int c1, int c0));
287 STATIC char *input_codename = "";
289 STATIC int noconvert PROTO((FILE *f));
290 STATIC int kanji_convert PROTO((FILE *f));
291 STATIC int h_conv PROTO((FILE *f,int c2,int c1));
292 STATIC int push_hold_buf PROTO((int c2));
293 STATIC void set_iconv PROTO((int f, int (*iconv_func)(int c2,int c1,int c0)));
294 STATIC int s_iconv PROTO((int c2,int c1,int c0));
295 STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
296 STATIC int e_iconv PROTO((int c2,int c1,int c0));
297 #ifdef UTF8_INPUT_ENABLE
298 STATIC int w2e_conv PROTO((int c2,int c1,int c0,int *p2,int *p1));
299 STATIC int w_iconv PROTO((int c2,int c1,int c0));
300 STATIC int w_iconv16 PROTO((int c2,int c1,int c0));
301 STATIC int w_iconv_common PROTO((int c1,int c0,unsigned short **pp,int psize,int *p2,int *p1));
302 STATIC int ww16_conv PROTO((int c2, int c1, int c0));
304 #ifdef UTF8_OUTPUT_ENABLE
305 STATIC int e2w_conv PROTO((int c2,int c1));
306 STATIC void w_oconv PROTO((int c2,int c1));
307 STATIC void w_oconv16 PROTO((int c2,int c1));
309 STATIC void e_oconv PROTO((int c2,int c1));
310 STATIC int e2s_conv PROTO((int c2, int c1, int *p2, int *p1));
311 STATIC void s_oconv PROTO((int c2,int c1));
312 STATIC void j_oconv PROTO((int c2,int c1));
313 STATIC void fold_conv PROTO((int c2,int c1));
314 STATIC void cr_conv PROTO((int c2,int c1));
315 STATIC void z_conv PROTO((int c2,int c1));
316 STATIC void rot_conv PROTO((int c2,int c1));
317 STATIC void hira_conv PROTO((int c2,int c1));
318 STATIC void base64_conv PROTO((int c2,int c1));
319 STATIC void iso2022jp_check_conv PROTO((int c2,int c1));
320 STATIC void no_connection PROTO((int c2,int c1));
321 STATIC int no_connection2 PROTO((int c2,int c1,int c0));
323 STATIC void code_score PROTO((struct input_code *ptr));
324 STATIC void code_status PROTO((int c));
326 STATIC void std_putc PROTO((int c));
327 STATIC int std_getc PROTO((FILE *f));
328 STATIC int std_ungetc PROTO((int c,FILE *f));
330 STATIC int broken_getc PROTO((FILE *f));
331 STATIC int broken_ungetc PROTO((int c,FILE *f));
333 STATIC int mime_begin PROTO((FILE *f));
334 STATIC int mime_getc PROTO((FILE *f));
335 STATIC int mime_ungetc PROTO((int c,FILE *f));
337 STATIC int mime_begin_strict PROTO((FILE *f));
338 STATIC int mime_getc_buf PROTO((FILE *f));
339 STATIC int mime_ungetc_buf PROTO((int c,FILE *f));
340 STATIC int mime_integrity PROTO((FILE *f,unsigned char *p));
342 STATIC int base64decode PROTO((int c));
343 STATIC void mime_prechar PROTO((int c2, int c1));
344 STATIC void mime_putc PROTO((int c));
345 STATIC void open_mime PROTO((int c));
346 STATIC void close_mime PROTO(());
347 STATIC void usage PROTO(());
348 STATIC void version PROTO(());
349 STATIC void options PROTO((unsigned char *c));
350 #if defined(PERL_XS) || defined(WIN32DLL)
351 STATIC void reinit PROTO(());
356 static unsigned char stdibuf[IOBUF_SIZE];
357 static unsigned char stdobuf[IOBUF_SIZE];
358 static unsigned char hold_buf[HOLD_SIZE*2];
359 static int hold_count;
361 /* MIME preprocessor fifo */
363 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
364 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
365 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
366 static unsigned char mime_buf[MIME_BUF_SIZE];
367 static unsigned int mime_top = 0;
368 static unsigned int mime_last = 0; /* decoded */
369 static unsigned int mime_input = 0; /* undecoded */
372 static int unbuf_f = FALSE;
373 static int estab_f = FALSE;
374 static int nop_f = FALSE;
375 static int binmode_f = TRUE; /* binary mode */
376 static int rot_f = FALSE; /* rot14/43 mode */
377 static int hira_f = FALSE; /* hira/kata henkan */
378 static int input_f = FALSE; /* non fixed input code */
379 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
380 static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
381 static int mime_decode_f = FALSE; /* mime decode is explicitly on */
382 static int mimebuf_f = FALSE; /* MIME buffered input */
383 static int broken_f = FALSE; /* convert ESC-less broken JIS */
384 static int iso8859_f = FALSE; /* ISO8859 through */
385 static int mimeout_f = FALSE; /* base64 mode */
386 #if defined(MSDOS) || defined(__OS2__)
387 static int x0201_f = TRUE; /* Assume JISX0201 kana */
389 static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
391 static int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
392 #ifdef UTF8_OUTPUT_ENABLE
393 static int unicode_bom_f= 0; /* Output Unicode BOM */
394 static int w_oconv16_LE = 0; /* utf-16 little endian */
395 static int ms_ucs_map_f = FALSE; /* Microsoft UCS Mapping Compatible */
399 #ifdef NUMCHAR_OPTION
401 #define CLASS_MASK 0x0f000000
402 #define CLASS_UTF16 0x01000000
406 static int cap_f = FALSE;
407 static int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */
408 static int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc;
409 STATIC int cap_getc PROTO((FILE *f));
410 STATIC int cap_ungetc PROTO((int c,FILE *f));
412 static int url_f = FALSE;
413 static int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
414 static int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc;
415 STATIC int url_getc PROTO((FILE *f));
416 STATIC int url_ungetc PROTO((int c,FILE *f));
418 static int numchar_f = FALSE;
419 static int (*i_ngetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
420 static int (*i_nungetc)PROTO((int c ,FILE *f)) = std_ungetc;
421 STATIC int numchar_getc PROTO((FILE *f));
422 STATIC int numchar_ungetc PROTO((int c,FILE *f));
426 static int noout_f = FALSE;
427 STATIC void no_putc PROTO((int c));
428 static int debug_f = FALSE;
429 STATIC void debug PROTO((char *str));
432 static int guess_f = FALSE;
433 STATIC void print_guessed_code PROTO((char *filename));
434 STATIC void set_input_codename PROTO((char *codename));
435 static int is_inputcode_mixed = FALSE;
436 static int is_inputcode_set = FALSE;
439 static int exec_f = 0;
442 #ifdef SHIFTJIS_CP932
443 STATIC int cp932_f = TRUE;
444 #define CP932_TABLE_BEGIN (0xfa)
445 #define CP932_TABLE_END (0xfc)
447 STATIC int cp932inv_f = TRUE;
448 #define CP932INV_TABLE_BEGIN (0xed)
449 #define CP932INV_TABLE_END (0xee)
451 /* STATIC int cp932_conv PROTO((int c2, int c1)); */
452 #endif /* SHIFTJIS_CP932 */
455 STATIC int x0212_f = FALSE;
456 static int x0212_shift PROTO((int c));
457 static int x0212_unshift PROTO((int c));
460 STATIC unsigned char prefix_table[256];
462 STATIC void e_status PROTO((struct input_code *, int));
463 STATIC void s_status PROTO((struct input_code *, int));
465 #ifdef UTF8_INPUT_ENABLE
466 STATIC void w_status PROTO((struct input_code *, int));
467 STATIC void w16_status PROTO((struct input_code *, int));
468 static int utf16_mode = UTF16BE_INPUT;
471 struct input_code input_code_list[] = {
472 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
473 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
474 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
475 {"UTF-16", 0, 0, 0, {0, 0, 0}, w16_status, w_iconv16, 0},
479 static int mimeout_mode = 0;
480 static int base64_count = 0;
482 /* X0208 -> ASCII converter */
485 static int f_line = 0; /* chars in line */
486 static int f_prev = 0;
487 static int fold_preserve_f = FALSE; /* preserve new lines */
488 static int fold_f = FALSE;
489 static int fold_len = 0;
492 static unsigned char kanji_intro = DEFAULT_J;
493 static unsigned char ascii_intro = DEFAULT_R;
497 #define FOLD_MARGIN 10
498 #define DEFAULT_FOLD 60
500 static int fold_margin = FOLD_MARGIN;
504 #ifdef DEFAULT_CODE_JIS
505 # define DEFAULT_CONV j_oconv
507 #ifdef DEFAULT_CODE_SJIS
508 # define DEFAULT_CONV s_oconv
510 #ifdef DEFAULT_CODE_EUC
511 # define DEFAULT_CONV e_oconv
513 #ifdef DEFAULT_CODE_UTF8
514 # define DEFAULT_CONV w_oconv
517 /* process default */
518 static void (*output_conv)PROTO((int c2,int c1)) = DEFAULT_CONV;
520 static void (*oconv)PROTO((int c2,int c1)) = no_connection;
521 /* s_iconv or oconv */
522 static int (*iconv)PROTO((int c2,int c1,int c0)) = no_connection2;
524 static void (*o_zconv)PROTO((int c2,int c1)) = no_connection;
525 static void (*o_fconv)PROTO((int c2,int c1)) = no_connection;
526 static void (*o_crconv)PROTO((int c2,int c1)) = no_connection;
527 static void (*o_rot_conv)PROTO((int c2,int c1)) = no_connection;
528 static void (*o_hira_conv)PROTO((int c2,int c1)) = no_connection;
529 static void (*o_base64conv)PROTO((int c2,int c1)) = no_connection;
530 static void (*o_iso2022jp_check_conv)PROTO((int c2,int c1)) = no_connection;
532 /* static redirections */
534 static void (*o_putc)PROTO((int c)) = std_putc;
536 static int (*i_getc)PROTO((FILE *f)) = std_getc; /* general input */
537 static int (*i_ungetc)PROTO((int c,FILE *f)) =std_ungetc;
539 static int (*i_bgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
540 static int (*i_bungetc)PROTO((int c ,FILE *f)) = std_ungetc;
542 static void (*o_mputc)PROTO((int c)) = std_putc ; /* output of mputc */
544 static int (*i_mgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
545 static int (*i_mungetc)PROTO((int c ,FILE *f)) = std_ungetc;
547 /* for strict mime */
548 static int (*i_mgetc_buf)PROTO((FILE *)) = std_getc; /* input of mgetc_buf */
549 static int (*i_mungetc_buf)PROTO((int c,FILE *f)) = std_ungetc;
552 static int output_mode = ASCII, /* output kanji mode */
553 input_mode = ASCII, /* input kanji mode */
554 shift_mode = FALSE; /* TRUE shift out, or X0201 */
555 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
557 /* X0201 / X0208 conversion tables */
559 /* X0201 kana conversion table */
562 unsigned char cv[]= {
563 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
564 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
565 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
566 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
567 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
568 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
569 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
570 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
571 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
572 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
573 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
574 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
575 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
576 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
577 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
578 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
582 /* X0201 kana conversion table for daguten */
585 unsigned char dv[]= {
586 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
587 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
588 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
589 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
590 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
591 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
592 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
593 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
594 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
595 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
596 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
597 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
598 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
601 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
604 /* X0201 kana conversion table for han-daguten */
607 unsigned char ev[]= {
608 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
610 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
611 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
614 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
615 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
616 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
617 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
618 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
619 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
620 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
621 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
622 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
623 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
627 /* X0208 kigou conversion table */
628 /* 0x8140 - 0x819e */
630 unsigned char fv[] = {
632 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
633 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
634 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
635 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
636 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
637 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
638 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
639 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
640 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
641 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
642 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
643 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
649 static int file_out = FALSE;
651 static int overwrite = FALSE;
654 static int crmode_f = 0; /* CR, NL, CRLF */
655 #ifdef EASYWIN /*Easy Win */
656 static int end_check;
659 #define STD_GC_BUFSIZE (256)
660 int std_gc_buf[STD_GC_BUFSIZE];
664 #include "nkf32dll.c"
665 #elif defined(PERL_XS)
675 char *outfname = NULL;
678 #ifdef EASYWIN /*Easy Win */
679 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
682 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
683 cp = (unsigned char *)*argv;
688 if (pipe(fds) < 0 || (pid = fork()) < 0){
699 execvp(argv[1], &argv[1]);
713 if(x0201_f == WISH_TRUE)
714 x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
716 if (binmode_f == TRUE)
718 if (freopen("","wb",stdout) == NULL)
725 setbuf(stdout, (char *) NULL);
727 setvbuffer(stdout, stdobuf, IOBUF_SIZE);
730 if (binmode_f == TRUE)
732 if (freopen("","rb",stdin) == NULL) return (-1);
736 setvbuffer(stdin, stdibuf, IOBUF_SIZE);
740 kanji_convert(stdin);
741 if (guess_f) print_guessed_code(NULL);
746 is_inputcode_mixed = FALSE;
747 is_inputcode_set = FALSE;
749 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
758 /* reopen file for stdout */
759 if (file_out == TRUE) {
762 outfname = malloc(strlen(origfname)
763 + strlen(".nkftmpXXXXXX")
769 strcpy(outfname, origfname);
773 for (i = strlen(outfname); i; --i){
774 if (outfname[i - 1] == '/'
775 || outfname[i - 1] == '\\'){
781 strcat(outfname, "ntXXXXXX");
783 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC,
786 strcat(outfname, ".nkftmpXXXXXX");
787 fd = mkstemp(outfname);
790 || (fd_backup = dup(fileno(stdout))) < 0
791 || dup2(fd, fileno(stdout)) < 0
802 outfname = "nkf.out";
805 if(freopen(outfname, "w", stdout) == NULL) {
809 if (binmode_f == TRUE) {
811 if (freopen("","wb",stdout) == NULL)
818 if (binmode_f == TRUE)
820 if (freopen("","rb",fin) == NULL)
825 setvbuffer(fin, stdibuf, IOBUF_SIZE);
829 char *filename = NULL;
831 if (nfiles > 1) filename = origfname;
832 if (guess_f) print_guessed_code(filename);
838 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
846 if (dup2(fd_backup, fileno(stdout)) < 0){
849 if (stat(origfname, &sb)) {
850 fprintf(stderr, "Can't stat %s\n", origfname);
852 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
853 if (chmod(outfname, sb.st_mode)) {
854 fprintf(stderr, "Can't set permission %s\n", outfname);
857 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
858 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
859 tb[0] = tb[1] = sb.st_mtime;
860 if (utime(outfname, tb)) {
861 fprintf(stderr, "Can't set timestamp %s\n", outfname);
864 tb.actime = sb.st_atime;
865 tb.modtime = sb.st_mtime;
866 if (utime(outfname, &tb)) {
867 fprintf(stderr, "Can't set timestamp %s\n", outfname);
871 if (unlink(origfname)){
875 if (rename(outfname, origfname)) {
877 fprintf(stderr, "Can't rename %s to %s\n",
878 outfname, origfname);
886 #ifdef EASYWIN /*Easy Win */
887 if (file_out == FALSE)
888 scanf("%d",&end_check);
891 #else /* for Other OS */
892 if (file_out == TRUE)
897 #endif /* WIN32DLL */
922 {"katakana-hiragana","h3"},
929 #ifdef UTF8_OUTPUT_ENABLE
934 #ifdef UTF8_INPUT_ENABLE
936 {"utf16-input", "W16"},
945 #ifdef NUMCHAR_OPTION
946 {"numchar-input", ""},
952 #ifdef SHIFTJIS_CP932
962 static int option_mode = 0;
969 unsigned char *p = NULL;
981 case '-': /* literal options */
982 if (!*cp) { /* ignore the rest of arguments */
986 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
988 p = (unsigned char *)long_option[i].name;
989 for (j=0;*p && (*p != '=') && *p == cp[j];p++, j++);
997 cp = (unsigned char *)long_option[i].alias;
1000 if (strcmp(long_option[i].name, "overwrite") == 0){
1007 if (strcmp(long_option[i].name, "cap-input") == 0){
1011 if (strcmp(long_option[i].name, "url-input") == 0){
1016 #ifdef NUMCHAR_OPTION
1017 if (strcmp(long_option[i].name, "numchar-input") == 0){
1023 if (strcmp(long_option[i].name, "no-output") == 0){
1027 if (strcmp(long_option[i].name, "debug") == 0){
1032 if (strcmp(long_option[i].name, "cp932") == 0){
1033 #ifdef SHIFTJIS_CP932
1037 #ifdef UTF8_OUTPUT_ENABLE
1038 ms_ucs_map_f = TRUE;
1042 if (strcmp(long_option[i].name, "no-cp932") == 0){
1043 #ifdef SHIFTJIS_CP932
1047 #ifdef UTF8_OUTPUT_ENABLE
1048 ms_ucs_map_f = FALSE;
1052 #ifdef SHIFTJIS_CP932
1053 if (strcmp(long_option[i].name, "cp932inv") == 0){
1060 if (strcmp(long_option[i].name, "x0212") == 0){
1067 if (strcmp(long_option[i].name, "exec-in") == 0){
1071 if (strcmp(long_option[i].name, "exec-out") == 0){
1076 #ifdef UTF8_OUTPUT_ENABLE
1077 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1078 ms_ucs_map_f = TRUE;
1082 if (strcmp(long_option[i].name, "prefix=") == 0){
1083 if (*p == '=' && ' ' < p[1] && p[1] < 128){
1084 for (i = 2; ' ' < p[i] && p[i] < 128; i++){
1085 prefix_table[p[i]] = p[1];
1092 case 'b': /* buffered mode */
1095 case 'u': /* non bufferd mode */
1098 case 't': /* transparent mode */
1101 case 'j': /* JIS output */
1103 output_conv = j_oconv;
1105 case 'e': /* AT&T EUC output */
1106 output_conv = e_oconv;
1108 case 's': /* SJIS output */
1109 output_conv = s_oconv;
1111 case 'l': /* ISO8859 Latin-1 support, no conversion */
1112 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
1113 input_f = LATIN1_INPUT;
1115 case 'i': /* Kanji IN ESC-$-@/B */
1116 if (*cp=='@'||*cp=='B')
1117 kanji_intro = *cp++;
1119 case 'o': /* ASCII IN ESC-(-J/B */
1120 if (*cp=='J'||*cp=='B'||*cp=='H')
1121 ascii_intro = *cp++;
1128 if ('9'>= *cp && *cp>='0')
1129 hira_f |= (*cp++ -'0');
1136 #if defined(MSDOS) || defined(__OS2__)
1151 #ifdef UTF8_OUTPUT_ENABLE
1152 case 'w': /* UTF-8 output */
1153 if ('1'== cp[0] && '6'==cp[1]) {
1154 output_conv = w_oconv16; cp+=2;
1156 unicode_bom_f=2; cp++;
1159 unicode_bom_f=1; cp++;
1161 } else if (cp[0] == 'B') {
1162 unicode_bom_f=2; cp++;
1164 unicode_bom_f=1; cp++;
1167 } else if (cp[0] == '8') {
1168 output_conv = w_oconv; cp++;
1171 unicode_bom_f=1; cp++;
1174 output_conv = w_oconv;
1177 #ifdef UTF8_INPUT_ENABLE
1178 case 'W': /* UTF-8 input */
1179 if ('1'== cp[0] && '6'==cp[1]) {
1180 input_f = UTF16BE_INPUT;
1181 utf16_mode = UTF16BE_INPUT;
1185 input_f = UTF16LE_INPUT;
1186 utf16_mode = UTF16LE_INPUT;
1187 } else if (cp[0] == 'B') {
1189 input_f = UTF16BE_INPUT;
1190 utf16_mode = UTF16BE_INPUT;
1192 } else if (cp[0] == '8') {
1194 input_f = UTF8_INPUT;
1196 input_f = UTF8_INPUT;
1199 /* Input code assumption */
1200 case 'J': /* JIS input */
1201 case 'E': /* AT&T EUC input */
1202 input_f = JIS_INPUT;
1204 case 'S': /* MS Kanji input */
1205 input_f = SJIS_INPUT;
1206 if (x0201_f==NO_X0201) x0201_f=TRUE;
1208 case 'Z': /* Convert X0208 alphabet to asii */
1209 /* bit:0 Convert X0208
1210 bit:1 Convert Kankaku to one space
1211 bit:2 Convert Kankaku to two spaces
1212 bit:3 Convert HTML Entity
1214 if ('9'>= *cp && *cp>='0')
1215 alpha_f |= 1<<(*cp++ -'0');
1219 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
1220 x0201_f = FALSE; /* No X0201->X0208 conversion */
1222 ESC-(-I in JIS, EUC, MS Kanji
1223 SI/SO in JIS, EUC, MS Kanji
1224 SSO in EUC, JIS, not in MS Kanji
1225 MS Kanji (0xa0-0xdf)
1227 ESC-(-I in JIS (0x20-0x5f)
1228 SSO in EUC (0xa0-0xdf)
1229 0xa0-0xd in MS Kanji (0xa0-0xdf)
1232 case 'X': /* Assume X0201 kana */
1233 /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1236 case 'F': /* prserve new lines */
1237 fold_preserve_f = TRUE;
1238 case 'f': /* folding -f60 or -f */
1241 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1243 fold_len += *cp++ - '0';
1245 if (!(0<fold_len && fold_len<BUFSIZ))
1246 fold_len = DEFAULT_FOLD;
1250 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1252 fold_margin += *cp++ - '0';
1256 case 'm': /* MIME support */
1257 /* mime_decode_f = TRUE; */ /* this has too large side effects... */
1258 if (*cp=='B'||*cp=='Q') {
1259 mime_decode_mode = *cp++;
1260 mimebuf_f = FIXED_MIME;
1261 } else if (*cp=='N') {
1262 mime_f = TRUE; cp++;
1263 } else if (*cp=='S') {
1264 mime_f = STRICT_MIME; cp++;
1265 } else if (*cp=='0') {
1266 mime_decode_f = FALSE;
1267 mime_f = FALSE; cp++;
1270 case 'M': /* MIME output */
1273 mimeout_f = FIXED_MIME; cp++;
1274 } else if (*cp=='Q') {
1276 mimeout_f = FIXED_MIME; cp++;
1281 case 'B': /* Broken JIS support */
1283 bit:1 allow any x on ESC-(-x or ESC-$-x
1284 bit:2 reset to ascii on NL
1286 if ('9'>= *cp && *cp>='0')
1287 broken_f |= 1<<(*cp++ -'0');
1292 case 'O':/* for Output file */
1296 case 'c':/* add cr code */
1299 case 'd':/* delete cr code */
1302 case 'I': /* ISO-2022-JP output */
1305 case 'L': /* line mode */
1306 if (*cp=='u') { /* unix */
1307 crmode_f = NL; cp++;
1308 } else if (*cp=='m') { /* mac */
1309 crmode_f = CR; cp++;
1310 } else if (*cp=='w') { /* windows */
1311 crmode_f = CRLF; cp++;
1312 } else if (*cp=='0') { /* no conversion */
1322 /* module muliple options in a string are allowed for Perl moudle */
1323 while(*cp && *cp!='-') cp++;
1327 /* bogus option but ignored */
1333 #ifdef ANSI_C_PROTOTYPE
1334 struct input_code * find_inputcode_byfunc(int (*iconv_func)(int c2,int c1,int c0))
1336 struct input_code * find_inputcode_byfunc(iconv_func)
1337 int (*iconv_func)();
1341 struct input_code *p = input_code_list;
1343 if (iconv_func == p->iconv_func){
1353 static int (*iconv_for_check)() = 0;
1356 #ifdef ANSI_C_PROTOTYPE
1357 void set_iconv(int f, int (*iconv_func)(int c2,int c1,int c0))
1359 void set_iconv(f, iconv_func)
1361 int (*iconv_func)();
1364 #ifdef INPUT_CODE_FIX
1372 #ifdef INPUT_CODE_FIX
1373 && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1379 if (estab_f && iconv_for_check != iconv){
1380 struct input_code *p = find_inputcode_byfunc(iconv);
1382 set_input_codename(p->name);
1383 debug(input_codename);
1385 iconv_for_check = iconv;
1390 #define SCORE_L2 (1) /*
\e$BBh
\e(B2
\e$B?e=`4A;z
\e(B */
1391 #define SCORE_KANA (SCORE_L2 << 1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
1392 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
1393 #ifdef SHIFTJIS_CP932
1394 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932
\e$B$K$h$kFI$_49$(
\e(B */
1395 #define SCORE_NO_EXIST (SCORE_CP932 << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1397 #define SCORE_NO_EXIST (SCORE_DEPEND << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1399 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME
\e$B$K$h$k;XDj
\e(B */
1400 #define SCORE_ERROR (SCORE_iMIME << 1) /*
\e$B%(%i!<
\e(B */
1402 #define SCORE_INIT (SCORE_iMIME)
1404 int score_table_A0[] = {
1407 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1408 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1411 int score_table_F0[] = {
1412 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1413 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1414 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1415 SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1418 void set_code_score(ptr, score)
1419 struct input_code *ptr;
1423 ptr->score |= score;
1427 void clr_code_score(ptr, score)
1428 struct input_code *ptr;
1432 ptr->score &= ~score;
1436 void code_score(ptr)
1437 struct input_code *ptr;
1439 int c2 = ptr->buf[0];
1440 int c1 = ptr->buf[1];
1442 set_code_score(ptr, SCORE_ERROR);
1443 }else if (c2 == SSO){
1444 set_code_score(ptr, SCORE_KANA);
1445 #ifdef UTF8_OUTPUT_ENABLE
1446 }else if (!e2w_conv(c2, c1)){
1447 set_code_score(ptr, SCORE_NO_EXIST);
1449 }else if ((c2 & 0x70) == 0x20){
1450 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1451 }else if ((c2 & 0x70) == 0x70){
1452 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1453 }else if ((c2 & 0x70) >= 0x50){
1454 set_code_score(ptr, SCORE_L2);
1458 void status_disable(ptr)
1459 struct input_code *ptr;
1464 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1467 void status_push_ch(ptr, c)
1468 struct input_code *ptr;
1471 ptr->buf[ptr->index++] = c;
1474 void status_clear(ptr)
1475 struct input_code *ptr;
1481 void status_reset(ptr)
1482 struct input_code *ptr;
1485 ptr->score = SCORE_INIT;
1488 void status_reinit(ptr)
1489 struct input_code *ptr;
1492 ptr->_file_stat = 0;
1495 void status_check(ptr, c)
1496 struct input_code *ptr;
1499 if (c <= DEL && estab_f){
1504 void s_status(ptr, c)
1505 struct input_code *ptr;
1510 status_check(ptr, c);
1515 #ifdef NUMCHAR_OPTION
1516 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1519 }else if (0xa1 <= c && c <= 0xdf){
1520 status_push_ch(ptr, SSO);
1521 status_push_ch(ptr, c);
1524 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
1526 status_push_ch(ptr, c);
1527 #ifdef SHIFTJIS_CP932
1529 && CP932_TABLE_BEGIN <= c && c <= CP932_TABLE_END){
1531 status_push_ch(ptr, c);
1532 #endif /* SHIFTJIS_CP932 */
1534 }else if (x0212_f && 0xf0 <= c && c <= 0xfc){
1536 status_push_ch(ptr, c);
1537 #endif /* X0212_ENABLE */
1539 status_disable(ptr);
1543 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1544 status_push_ch(ptr, c);
1545 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
1549 status_disable(ptr);
1553 #ifdef SHIFTJIS_CP932
1554 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1555 status_push_ch(ptr, c);
1556 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
1557 set_code_score(ptr, SCORE_CP932);
1562 #endif /* SHIFTJIS_CP932 */
1563 #ifndef X0212_ENABLE
1564 status_disable(ptr);
1570 void e_status(ptr, c)
1571 struct input_code *ptr;
1576 status_check(ptr, c);
1581 #ifdef NUMCHAR_OPTION
1582 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1585 }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
1587 status_push_ch(ptr, c);
1589 }else if (0x8f == c){
1591 status_push_ch(ptr, c);
1592 #endif /* X0212_ENABLE */
1594 status_disable(ptr);
1598 if (0xa1 <= c && c <= 0xfe){
1599 status_push_ch(ptr, c);
1603 status_disable(ptr);
1608 if (0xa1 <= c && c <= 0xfe){
1610 status_push_ch(ptr, c);
1612 status_disable(ptr);
1614 #endif /* X0212_ENABLE */
1618 #ifdef UTF8_INPUT_ENABLE
1619 void w16_status(ptr, c)
1620 struct input_code *ptr;
1627 if (ptr->_file_stat == 0){
1628 if (c == 0xfe || c == 0xff){
1630 status_push_ch(ptr, c);
1631 ptr->_file_stat = 1;
1633 status_disable(ptr);
1634 ptr->_file_stat = -1;
1636 }else if (ptr->_file_stat > 0){
1638 status_push_ch(ptr, c);
1639 }else if (ptr->_file_stat < 0){
1640 status_disable(ptr);
1646 status_disable(ptr);
1647 ptr->_file_stat = -1;
1649 status_push_ch(ptr, c);
1656 if (ptr->stat != c && (c == 0xfe || c == 0xff)){
1657 status_push_ch(ptr, c);
1660 status_disable(ptr);
1661 ptr->_file_stat = -1;
1667 void w_status(ptr, c)
1668 struct input_code *ptr;
1673 status_check(ptr, c);
1678 #ifdef NUMCHAR_OPTION
1679 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1682 }else if (0xc0 <= c && c <= 0xdf){
1684 status_push_ch(ptr, c);
1685 }else if (0xe0 <= c && c <= 0xef){
1687 status_push_ch(ptr, c);
1689 status_disable(ptr);
1694 if (0x80 <= c && c <= 0xbf){
1695 status_push_ch(ptr, c);
1696 if (ptr->index > ptr->stat){
1697 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
1698 && ptr->buf[2] == 0xbf);
1699 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
1700 &ptr->buf[0], &ptr->buf[1]);
1707 status_disable(ptr);
1718 int action_flag = 1;
1719 struct input_code *result = 0;
1720 struct input_code *p = input_code_list;
1722 (p->status_func)(p, c);
1725 }else if(p->stat == 0){
1736 if (result && !estab_f){
1737 set_iconv(TRUE, result->iconv_func);
1738 }else if (c <= DEL){
1739 struct input_code *ptr = input_code_list;
1754 return std_gc_buf[--std_gc_ndx];
1765 if (std_gc_ndx == STD_GC_BUFSIZE){
1768 std_gc_buf[std_gc_ndx++] = c;
1788 while ((c = (*i_getc)(f)) != EOF)
1797 oconv = output_conv;
1800 /* replace continucation module, from output side */
1802 /* output redicrection */
1804 if (noout_f || guess_f){
1811 if (mimeout_f == TRUE) {
1812 o_base64conv = oconv; oconv = base64_conv;
1814 /* base64_count = 0; */
1818 o_crconv = oconv; oconv = cr_conv;
1821 o_rot_conv = oconv; oconv = rot_conv;
1824 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
1827 o_hira_conv = oconv; oconv = hira_conv;
1830 o_fconv = oconv; oconv = fold_conv;
1833 if (alpha_f || x0201_f) {
1834 o_zconv = oconv; oconv = z_conv;
1838 i_ungetc = std_ungetc;
1839 /* input redicrection */
1842 i_cgetc = i_getc; i_getc = cap_getc;
1843 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
1846 i_ugetc = i_getc; i_getc = url_getc;
1847 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
1850 #ifdef NUMCHAR_OPTION
1852 i_ngetc = i_getc; i_getc = numchar_getc;
1853 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
1856 if (mime_f && mimebuf_f==FIXED_MIME) {
1857 i_mgetc = i_getc; i_getc = mime_getc;
1858 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
1861 i_bgetc = i_getc; i_getc = broken_getc;
1862 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
1864 if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
1865 set_iconv(-TRUE, e_iconv);
1866 } else if (input_f == SJIS_INPUT) {
1867 set_iconv(-TRUE, s_iconv);
1868 #ifdef UTF8_INPUT_ENABLE
1869 } else if (input_f == UTF8_INPUT) {
1870 set_iconv(-TRUE, w_iconv);
1871 } else if (input_f == UTF16BE_INPUT) {
1872 set_iconv(-TRUE, w_iconv16);
1873 } else if (input_f == UTF16LE_INPUT) {
1874 set_iconv(-TRUE, w_iconv16);
1877 set_iconv(FALSE, e_iconv);
1881 struct input_code *p = input_code_list;
1889 Conversion main loop. Code detection only.
1898 int is_8bit = FALSE;
1900 module_connection();
1905 output_mode = ASCII;
1908 #define NEXT continue /* no output, get next */
1909 #define SEND ; /* output c1 and c2, get next */
1910 #define LAST break /* end of loop, go closing */
1912 while ((c1 = (*i_getc)(f)) != EOF) {
1917 /* in case of 8th bit is on */
1918 if (!estab_f&&!mime_decode_mode) {
1919 /* in case of not established yet */
1920 /* It is still ambiguious */
1921 if (h_conv(f, c2, c1)==EOF)
1927 /* in case of already established */
1929 /* ignore bogus code */
1935 /* second byte, 7 bit code */
1936 /* it might be kanji shitfted */
1937 if ((c1 == DEL) || (c1 <= SPACE)) {
1938 /* ignore bogus first code */
1946 #ifdef UTF8_INPUT_ENABLE
1955 #ifdef NUMCHAR_OPTION
1956 } else if ((c1 & CLASS_MASK) == CLASS_UTF16){
1959 } else if (c1 > DEL) {
1961 if (!estab_f && !iso8859_f) {
1962 /* not established yet */
1963 if (!is_8bit) is_8bit = TRUE;
1966 } else { /* estab_f==TRUE */
1971 } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
1972 /* SJIS X0201 Case... */
1973 if(iso2022jp_f && x0201_f==NO_X0201) {
1974 (*oconv)(GETA1, GETA2);
1981 } else if (c1==SSO && iconv != s_iconv) {
1982 /* EUC X0201 Case */
1983 c1 = (*i_getc)(f); /* skip SSO */
1985 if (SSP<=c1 && c1<0xe0) {
1986 if(iso2022jp_f && x0201_f==NO_X0201) {
1987 (*oconv)(GETA1, GETA2);
1994 } else { /* bogus code, skip SSO and one byte */
1998 /* already established */
2003 } else if ((c1 > SPACE) && (c1 != DEL)) {
2004 /* in case of Roman characters */
2006 /* output 1 shifted byte */
2010 } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
2011 /* output 1 shifted byte */
2012 if(iso2022jp_f && x0201_f==NO_X0201) {
2013 (*oconv)(GETA1, GETA2);
2020 /* look like bogus code */
2023 } else if (input_mode == X0208) {
2024 /* in case of Kanji shifted */
2027 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
2028 /* Check MIME code */
2029 if ((c1 = (*i_getc)(f)) == EOF) {
2032 } else if (c1 == '?') {
2033 /* =? is mime conversion start sequence */
2034 if(mime_f == STRICT_MIME) {
2035 /* check in real detail */
2036 if (mime_begin_strict(f) == EOF)
2040 } else if (mime_begin(f) == EOF)
2050 /* normal ASCII code */
2053 } else if (c1 == SI) {
2056 } else if (c1 == SO) {
2059 } else if (c1 == ESC ) {
2060 if ((c1 = (*i_getc)(f)) == EOF) {
2061 /* (*oconv)(0, ESC); don't send bogus code */
2063 } else if (c1 == '$') {
2064 if ((c1 = (*i_getc)(f)) == EOF) {
2066 (*oconv)(0, ESC); don't send bogus code
2067 (*oconv)(0, '$'); */
2069 } else if (c1 == '@'|| c1 == 'B') {
2070 /* This is kanji introduction */
2073 set_input_codename("ISO-2022-JP");
2074 debug(input_codename);
2076 } else if (c1 == '(') {
2077 if ((c1 = (*i_getc)(f)) == EOF) {
2078 /* don't send bogus code
2084 } else if (c1 == '@'|| c1 == 'B') {
2085 /* This is kanji introduction */
2090 } else if (c1 == 'D'){
2094 #endif /* X0212_ENABLE */
2096 /* could be some special code */
2103 } else if (broken_f&0x2) {
2104 /* accept any ESC-(-x as broken code ... */
2114 } else if (c1 == '(') {
2115 if ((c1 = (*i_getc)(f)) == EOF) {
2116 /* don't send bogus code
2118 (*oconv)(0, '('); */
2122 /* This is X0201 kana introduction */
2123 input_mode = X0201; shift_mode = X0201;
2125 } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2126 /* This is X0208 kanji introduction */
2127 input_mode = ASCII; shift_mode = FALSE;
2129 } else if (broken_f&0x2) {
2130 input_mode = ASCII; shift_mode = FALSE;
2135 /* maintain various input_mode here */
2139 } else if ( c1 == 'N' || c1 == 'n' ){
2141 c3 = (*i_getc)(f); /* skip SS2 */
2142 if ( (SPACE<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2157 } else if ((c1 == NL || c1 == CR) && broken_f&4) {
2158 input_mode = ASCII; set_iconv(FALSE, 0);
2160 } else if (c1 == NL && mime_decode_f && !mime_decode_mode ) {
2161 if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2169 } else if (c1 == CR && mime_decode_f && !mime_decode_mode ) {
2170 if ((c1=(*i_getc)(f))!=EOF) {
2174 } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2190 if (input_mode == X0208)
2191 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2193 else if (input_mode == X0212)
2194 (*oconv)((0x8f << 8) | c2, c1);
2195 #endif /* X0212_ENABLE */
2196 else if (input_mode)
2197 (*oconv)(input_mode, c1); /* other special case */
2198 else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */
2199 int c0 = (*i_getc)(f);
2202 (*iconv)(c2, c1, c0);
2208 /* goto next_word */
2212 (*iconv)(EOF, 0, 0);
2213 if (!is_inputcode_set)
2216 struct input_code *p = input_code_list;
2217 struct input_code *result = p;
2219 if (p->score < result->score) result = p;
2222 set_input_codename(result->name);
2237 /** it must NOT be in the kanji shifte sequence */
2238 /** it must NOT be written in JIS7 */
2239 /** and it must be after 2 byte 8bit code */
2246 while ((c1 = (*i_getc)(f)) != EOF) {
2252 if (push_hold_buf(c1) == EOF || estab_f){
2258 struct input_code *p = input_code_list;
2259 struct input_code *result = p;
2264 if (p->score < result->score){
2269 set_iconv(FALSE, result->iconv_func);
2274 ** 1) EOF is detected, or
2275 ** 2) Code is established, or
2276 ** 3) Buffer is FULL (but last word is pushed)
2278 ** in 1) and 3) cases, we continue to use
2279 ** Kanji codes by oconv and leave estab_f unchanged.
2284 while (wc < hold_count){
2285 c2 = hold_buf[wc++];
2287 #ifdef NUMCHAR_OPTION
2288 || (c2 & CLASS_MASK) == CLASS_UTF16
2293 }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
2294 (*iconv)(X0201, c2, 0);
2297 if (wc < hold_count){
2298 c1 = hold_buf[wc++];
2307 if ((*iconv)(c2, c1, 0) < 0){
2309 if (wc < hold_count){
2310 c0 = hold_buf[wc++];
2319 (*iconv)(c2, c1, c0);
2332 if (hold_count >= HOLD_SIZE*2)
2334 hold_buf[hold_count++] = c2;
2335 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
2338 int s2e_conv(c2, c1, p2, p1)
2343 #ifdef SHIFTJIS_CP932
2344 if (cp932_f && CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){
2345 extern unsigned short shiftjis_cp932[3][189];
2346 val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
2352 #endif /* SHIFTJIS_CP932 */
2354 if (x0212_f && 0xfa <= c2 && c2 <= 0xfc){
2355 extern unsigned short shiftjis_x0212[3][189];
2356 val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
2359 c2 = (0x8f << 8) | (val >> 8);
2371 c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
2373 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
2381 c2 = x0212_unshift(c2);
2396 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2399 int ret = s2e_conv(c2, c1, &c2, &c1);
2400 if (ret) return ret;
2414 }else if (c2 == 0x8f){
2418 c2 = (c2 << 8) | (c1 & 0x7f);
2420 #ifdef SHIFTJIS_CP932
2423 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2424 s2e_conv(s2, s1, &c2, &c1);
2425 if ((c2 & 0xff00) == 0){
2431 #endif /* SHIFTJIS_CP932 */
2432 #endif /* X0212_ENABLE */
2433 } else if (c2 == SSO){
2436 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2446 #ifdef UTF8_INPUT_ENABLE
2448 w2e_conv(c2, c1, c0, p2, p1)
2452 extern unsigned short * utf8_to_euc_2bytes[];
2453 extern unsigned short ** utf8_to_euc_3bytes[];
2456 if (0xc0 <= c2 && c2 <= 0xef) {
2457 unsigned short **pp;
2460 if (c0 == 0) return -1;
2461 pp = utf8_to_euc_3bytes[c2 - 0x80];
2462 ret = w_iconv_common(c1, c0, pp, sizeof_utf8_to_euc_C2, p2, p1);
2464 ret = w_iconv_common(c2, c1, utf8_to_euc_2bytes, sizeof_utf8_to_euc_2bytes, p2, p1);
2466 #ifdef NUMCHAR_OPTION
2469 if (p1) *p1 = CLASS_UTF16 | ww16_conv(c2, c1, c0);
2474 } else if (c2 == X0201) {
2487 int ret = w2e_conv(c2, c1, c0, &c2, &c1);
2495 w16w_conv(val, p2, p1, p0)
2503 }else if (val < 0x800){
2504 *p2 = 0xc0 | (val >> 6);
2505 *p1 = 0x80 | (val & 0x3f);
2508 *p2 = 0xe0 | (val >> 12);
2509 *p1 = 0x80 | ((val >> 6) & 0x3f);
2510 *p0 = 0x80 | (val & 0x3f);
2515 ww16_conv(c2, c1, c0)
2520 val = (c2 & 0x0f) << 12;
2521 val |= (c1 & 0x3f) << 6;
2523 }else if (c2 >= 0xc0){
2524 val = (c2 & 0x1f) << 6;
2533 w16e_conv(val, p2, p1)
2537 extern unsigned short * utf8_to_euc_2bytes[];
2538 extern unsigned short ** utf8_to_euc_3bytes[];
2540 unsigned short **pp;
2544 w16w_conv(val, &c2, &c1, &c0);
2547 pp = utf8_to_euc_3bytes[c2 - 0x80];
2548 psize = sizeof_utf8_to_euc_C2;
2549 ret = w_iconv_common(c1, c0, pp, psize, p2, p1);
2551 pp = utf8_to_euc_2bytes;
2552 psize = sizeof_utf8_to_euc_2bytes;
2553 ret = w_iconv_common(c2, c1, pp, psize, p2, p1);
2555 #ifdef NUMCHAR_OPTION
2558 *p1 = CLASS_UTF16 | val;
2570 w_iconv16(c2, c1, c0)
2575 if (c2==0376 && c1==0377){
2576 utf16_mode = UTF16BE_INPUT;
2578 } else if (c2==0377 && c1==0376){
2579 utf16_mode = UTF16LE_INPUT;
2582 if (c2 != EOF && utf16_mode == UTF16LE_INPUT) {
2584 tmp=c1; c1=c2; c2=tmp;
2586 if ((c2==0 && c1 < 0x80) || c2==EOF) {
2590 ret = w16e_conv(((c2<<8)&0xff00) + c1, &c2, &c1);
2591 if (ret) return ret;
2597 w_iconv_common(c1, c0, pp, psize, p2, p1)
2599 unsigned short **pp;
2607 if (pp == 0) return 1;
2610 if (c1 < 0 || psize <= c1) return 1;
2612 if (p == 0) return 1;
2615 if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1;
2617 if (val == 0) return 1;
2624 if (c2 == SO) c2 = X0201;
2633 #ifdef UTF8_OUTPUT_ENABLE
2638 extern unsigned short euc_to_utf8_1byte[];
2639 extern unsigned short * euc_to_utf8_2bytes[];
2640 extern unsigned short * euc_to_utf8_2bytes_ms[];
2644 p = euc_to_utf8_1byte;
2646 } else if (c2 >> 8 == 0x8f){
2647 extern unsigned short * x0212_to_utf8_2bytes[];
2648 c2 = (c2&0x7f) - 0x21;
2649 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2650 p = x0212_to_utf8_2bytes[c2];
2656 c2 = (c2&0x7f) - 0x21;
2657 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2658 p = ms_ucs_map_f ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
2663 c1 = (c1 & 0x7f) - 0x21;
2664 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
2680 if (unicode_bom_f==2) {
2687 #ifdef NUMCHAR_OPTION
2688 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2689 w16w_conv(c1, &c2, &c1, &c0);
2693 if (c0) (*o_putc)(c0);
2700 output_mode = ASCII;
2702 } else if (c2 == ISO8859_1) {
2703 output_mode = ISO8859_1;
2704 (*o_putc)(c1 | 0x080);
2708 val = e2w_conv(c2, c1);
2710 w16w_conv(val, &c2, &c1, &c0);
2714 if (c0) (*o_putc)(c0);
2730 if (unicode_bom_f==2) {
2732 (*o_putc)((unsigned char)'\377');
2736 (*o_putc)((unsigned char)'\377');
2741 if (c2 == ISO8859_1) {
2744 #ifdef NUMCHAR_OPTION
2745 } else if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16) {
2746 c2 = (c1 >> 8) & 0xff;
2750 unsigned short val = e2w_conv(c2, c1);
2751 c2 = (val >> 8) & 0xff;
2770 #ifdef NUMCHAR_OPTION
2771 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2772 w16e_conv(c1, &c2, &c1);
2773 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2781 } else if (c2 == 0) {
2782 output_mode = ASCII;
2784 } else if (c2 == X0201) {
2785 output_mode = JAPANESE_EUC;
2786 (*o_putc)(SSO); (*o_putc)(c1|0x80);
2787 } else if (c2 == ISO8859_1) {
2788 output_mode = ISO8859_1;
2789 (*o_putc)(c1 | 0x080);
2791 } else if ((c2 & 0xff00) >> 8 == 0x8f){
2792 output_mode = JAPANESE_EUC;
2793 #ifdef SHIFTJIS_CP932
2796 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2797 s2e_conv(s2, s1, &c2, &c1);
2801 if ((c2 & 0xff00) >> 8 == 0x8f){
2804 (*o_putc)((c2 & 0x7f) | 0x080);
2805 (*o_putc)(c1 | 0x080);
2808 (*o_putc)((c2 & 0x7f) | 0x080);
2809 (*o_putc)(c1 | 0x080);
2813 if ((c1<0x21 || 0x7e<c1) ||
2814 (c2<0x21 || 0x7e<c2)) {
2815 set_iconv(FALSE, 0);
2816 return; /* too late to rescue this char */
2818 output_mode = JAPANESE_EUC;
2819 (*o_putc)(c2 | 0x080);
2820 (*o_putc)(c1 | 0x080);
2830 if ((ret & 0xff00) == 0x8f00){
2831 if (0x75 <= c && c <= 0x7f){
2832 ret = c + (0x109 - 0x75);
2835 if (0x75 <= c && c <= 0x7f){
2836 ret = c + (0x113 - 0x75);
2843 int x0212_unshift(c)
2847 if (0x7f <= c && c <= 0x88){
2848 ret = c + (0x75 - 0x7f);
2849 }else if (0x89 <= c && c <= 0x92){
2850 ret = (0x8f << 8) | 0x80 | (c + (0x75 - 0x89));
2854 #endif /* X0212_ENABLE */
2857 e2s_conv(c2, c1, p2, p1)
2858 int c2, c1, *p2, *p1;
2861 unsigned short *ptr;
2863 extern unsigned short *x0212_shiftjis[];
2865 if ((c2 & 0xff00) == 0x8f00){
2867 if (0x21 <= ndx && ndx <= 0x7e){
2868 ptr = x0212_shiftjis[ndx - 0x21];
2870 val = ptr[(c1 & 0x7f) - 0x21];
2880 c2 = x0212_shift(c2);
2882 #endif /* X0212_ENABLE */
2883 if ((c2 & 0xff00) == 0x8f00){
2886 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
2887 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
2896 #ifdef NUMCHAR_OPTION
2897 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2898 w16e_conv(c1, &c2, &c1);
2904 } else if (c2 == 0) {
2905 output_mode = ASCII;
2907 } else if (c2 == X0201) {
2908 output_mode = SHIFT_JIS;
2910 } else if (c2 == ISO8859_1) {
2911 output_mode = ISO8859_1;
2912 (*o_putc)(c1 | 0x080);
2914 } else if ((c2 & 0xff00) >> 8 == 0x8f){
2915 output_mode = SHIFT_JIS;
2916 if (e2s_conv(c2, c1, &c2, &c1) == 0){
2922 if ((c1<0x20 || 0x7e<c1) ||
2923 (c2<0x20 || 0x7e<c2)) {
2924 set_iconv(FALSE, 0);
2925 return; /* too late to rescue this char */
2927 output_mode = SHIFT_JIS;
2928 e2s_conv(c2, c1, &c2, &c1);
2930 #ifdef SHIFTJIS_CP932
2932 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2933 extern unsigned short cp932inv[2][189];
2934 int c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
2940 #endif /* SHIFTJIS_CP932 */
2943 if (prefix_table[(unsigned char)c1]){
2944 (*o_putc)(prefix_table[(unsigned char)c1]);
2955 #ifdef NUMCHAR_OPTION
2956 if ((c1 & CLASS_MASK) == CLASS_UTF16){
2957 w16e_conv(c1, &c2, &c1);
2961 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2964 (*o_putc)(ascii_intro);
2965 output_mode = ASCII;
2969 } else if ((c2 & 0xff00) >> 8 == 0x8f){
2970 if (output_mode!=X0212) {
2971 output_mode = X0212;
2977 (*o_putc)(c2 & 0x7f);
2980 } else if (c2==X0201) {
2981 if (output_mode!=X0201) {
2982 output_mode = X0201;
2988 } else if (c2==ISO8859_1) {
2989 /* iso8859 introduction, or 8th bit on */
2990 /* Can we convert in 7bit form using ESC-'-'-A ?
2992 output_mode = ISO8859_1;
2994 } else if (c2 == 0) {
2995 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2998 (*o_putc)(ascii_intro);
2999 output_mode = ASCII;
3003 if (output_mode != X0208) {
3004 output_mode = X0208;
3007 (*o_putc)(kanji_intro);
3009 if (c1<0x20 || 0x7e<c1)
3011 if (c2<0x20 || 0x7e<c2)
3023 mime_prechar(c2, c1);
3024 (*o_base64conv)(c2,c1);
3028 static int broken_buf[3];
3029 static int broken_counter = 0;
3030 static int broken_last = 0;
3037 if (broken_counter>0) {
3038 return broken_buf[--broken_counter];
3041 if (c=='$' && broken_last != ESC
3042 && (input_mode==ASCII || input_mode==X0201)) {
3045 if (c1=='@'|| c1=='B') {
3046 broken_buf[0]=c1; broken_buf[1]=c;
3053 } else if (c=='(' && broken_last != ESC
3054 && (input_mode==X0208 || input_mode==X0201)) { /* ) */
3057 if (c1=='J'|| c1=='B') {
3058 broken_buf[0]=c1; broken_buf[1]=c;
3076 if (broken_counter<2)
3077 broken_buf[broken_counter++]=c;
3081 static int prev_cr = 0;
3089 if (! (c2==0&&c1==NL) ) {
3095 } else if (c1=='\r') {
3097 } else if (c1=='\n') {
3098 if (crmode_f==CRLF) {
3099 (*o_crconv)(0,'\r');
3100 } else if (crmode_f==CR) {
3101 (*o_crconv)(0,'\r');
3105 } else if (c1!='\032' || crmode_f!=NL){
3111 Return value of fold_conv()
3113 \n add newline and output char
3114 \r add newline and output nothing
3117 1 (or else) normal output
3119 fold state in prev (previous character)
3121 >0x80 Japanese (X0208/X0201)
3126 This fold algorthm does not preserve heading space in a line.
3127 This is the main difference from fmt.
3130 #define char_size(c2,c1) (c2?2:1)
3139 if (c1== '\r' && !fold_preserve_f) {
3140 fold_state=0; /* ignore cr */
3141 }else if (c1== '\n'&&f_prev=='\r' && fold_preserve_f) {
3143 fold_state=0; /* ignore cr */
3144 } else if (c1== BS) {
3145 if (f_line>0) f_line--;
3147 } else if (c2==EOF && f_line != 0) { /* close open last line */
3149 } else if ((c1=='\n' && !fold_preserve_f)
3150 || ((c1=='\r'||(c1=='\n'&&f_prev!='\r'))
3151 && fold_preserve_f)) {
3153 if (fold_preserve_f) {
3157 } else if ((f_prev == c1 && !fold_preserve_f)
3158 || (f_prev == '\n' && fold_preserve_f)
3159 ) { /* duplicate newline */
3162 fold_state = '\n'; /* output two newline */
3168 if (f_prev&0x80) { /* Japanese? */
3170 fold_state = 0; /* ignore given single newline */
3171 } else if (f_prev==' ') {
3175 if (++f_line<=fold_len)
3179 fold_state = '\r'; /* fold and output nothing */
3183 } else if (c1=='\f') {
3188 fold_state = '\n'; /* output newline and clear */
3189 } else if ( (c2==0 && c1==' ')||
3190 (c2==0 && c1=='\t')||
3191 (c2=='!'&& c1=='!')) {
3192 /* X0208 kankaku or ascii space */
3193 if (f_prev == ' ') {
3194 fold_state = 0; /* remove duplicate spaces */
3197 if (++f_line<=fold_len)
3198 fold_state = ' '; /* output ASCII space only */
3200 f_prev = ' '; f_line = 0;
3201 fold_state = '\r'; /* fold and output nothing */
3205 prev0 = f_prev; /* we still need this one... , but almost done */
3207 if (c2 || c2==X0201)
3208 f_prev |= 0x80; /* this is Japanese */
3209 f_line += char_size(c2,c1);
3210 if (f_line<=fold_len) { /* normal case */
3213 if (f_line>=fold_len+fold_margin) { /* too many kinsou suspension */
3214 f_line = char_size(c2,c1);
3215 fold_state = '\n'; /* We can't wait, do fold now */
3216 } else if (c2==X0201) {
3217 /* simple kinsoku rules return 1 means no folding */
3218 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
3219 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
3220 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
3221 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
3222 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
3223 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
3224 else if (SPACE<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
3226 fold_state = '\n';/* add one new f_line before this character */
3229 fold_state = '\n';/* add one new f_line before this character */
3232 /* kinsoku point in ASCII */
3233 if ( c1==')'|| /* { [ ( */
3244 /* just after special */
3245 } else if (!is_alnum(prev0)) {
3246 f_line = char_size(c2,c1);
3248 } else if ((prev0==' ') || /* ignored new f_line */
3249 (prev0=='\n')|| /* ignored new f_line */
3250 (prev0&0x80)) { /* X0208 - ASCII */
3251 f_line = char_size(c2,c1);
3252 fold_state = '\n';/* add one new f_line before this character */
3254 fold_state = 1; /* default no fold in ASCII */
3258 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
3259 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
3260 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
3261 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
3262 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
3263 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
3264 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
3265 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
3266 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
3267 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
3268 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
3269 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
3270 /* default no fold in kinsoku */
3273 f_line = char_size(c2,c1);
3274 /* add one new f_line before this character */
3277 f_line = char_size(c2,c1);
3279 /* add one new f_line before this character */
3284 /* terminator process */
3285 switch(fold_state) {
3304 int z_prev2=0,z_prev1=0;
3311 /* if (c2) c1 &= 0x7f; assertion */
3313 if (x0201_f && z_prev2==X0201) { /* X0201 */
3314 if (c1==(0xde&0x7f)) { /*
\e$BByE@
\e(B */
3316 (*o_zconv)(dv[(z_prev1-SPACE)*2],dv[(z_prev1-SPACE)*2+1]);
3318 } else if (c1==(0xdf&0x7f)&&ev[(z_prev1-SPACE)*2]) { /*
\e$BH>ByE@
\e(B */
3320 (*o_zconv)(ev[(z_prev1-SPACE)*2],ev[(z_prev1-SPACE)*2+1]);
3324 (*o_zconv)(cv[(z_prev1-SPACE)*2],cv[(z_prev1-SPACE)*2+1]);
3333 if (x0201_f && c2==X0201) {
3334 if (dv[(c1-SPACE)*2]||ev[(c1-SPACE)*2]) {
3335 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
3336 z_prev1 = c1; z_prev2 = c2;
3339 (*o_zconv)(cv[(c1-SPACE)*2],cv[(c1-SPACE)*2+1]);
3344 /* JISX0208 Alphabet */
3345 if (alpha_f && c2 == 0x23 ) {
3347 } else if (alpha_f && c2 == 0x21 ) {
3348 /* JISX0208 Kigou */
3353 } else if (alpha_f&0x4) {
3358 } else if (0x20<c1 && c1<0x7f && fv[c1-0x20]) {
3364 case '>': entity = ">"; break;
3365 case '<': entity = "<"; break;
3366 case '\"': entity = """; break;
3367 case '&': entity = "&"; break;
3370 while (*entity) (*o_zconv)(0, *entity++);
3380 #define rot13(c) ( \
3382 (c <= 'M') ? (c + 13): \
3383 (c <= 'Z') ? (c - 13): \
3385 (c <= 'm') ? (c + 13): \
3386 (c <= 'z') ? (c - 13): \
3390 #define rot47(c) ( \
3392 ( c <= 'O' ) ? (c + 47) : \
3393 ( c <= '~' ) ? (c - 47) : \
3401 if (c2==0 || c2==X0201 || c2==ISO8859_1) {
3407 (*o_rot_conv)(c2,c1);
3414 if ((hira_f & 1) && c2==0x25 && 0x20<c1 && c1<0x74) {
3416 } else if ((hira_f & 2) && c2==0x24 && 0x20<c1 && c1<0x74) {
3419 (*o_hira_conv)(c2,c1);
3424 iso2022jp_check_conv(c2,c1)
3427 static int range[RANGE_NUM_MAX][2] = {
3450 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
3454 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
3459 for (i = 0; i < RANGE_NUM_MAX; i++) {
3460 start = range[i][0];
3463 if (c >= start && c <= end) {
3468 (*o_iso2022jp_check_conv)(c2,c1);
3472 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
3474 unsigned char *mime_pattern[] = {
3475 (unsigned char *)"\075?EUC-JP?B?",
3476 (unsigned char *)"\075?SHIFT_JIS?B?",
3477 (unsigned char *)"\075?ISO-8859-1?Q?",
3478 (unsigned char *)"\075?ISO-8859-1?B?",
3479 (unsigned char *)"\075?ISO-2022-JP?B?",
3480 (unsigned char *)"\075?ISO-2022-JP?Q?",
3481 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3482 (unsigned char *)"\075?UTF-8?B?",
3483 (unsigned char *)"\075?UTF-8?Q?",
3485 (unsigned char *)"\075?US-ASCII?Q?",
3490 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
3491 int (*mime_priority_func[])PROTO((int c2, int c1, int c0)) = {
3492 e_iconv, s_iconv, 0, 0, 0, 0,
3493 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3499 int mime_encode[] = {
3500 JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201,
3501 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3508 int mime_encode_method[] = {
3509 'B', 'B','Q', 'B', 'B', 'Q',
3510 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3518 #define MAXRECOVER 20
3520 /* I don't trust portablity of toupper */
3521 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
3522 #define nkf_isdigit(c) ('0'<=c && c<='9')
3523 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
3524 #define nkf_isblank(c) (c == SPACE || c == TAB)
3525 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == NL)
3526 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
3527 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
3532 if (i_getc!=mime_getc) {
3533 i_mgetc = i_getc; i_getc = mime_getc;
3534 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
3535 if(mime_f==STRICT_MIME) {
3536 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
3537 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
3543 unswitch_mime_getc()
3545 if(mime_f==STRICT_MIME) {
3546 i_mgetc = i_mgetc_buf;
3547 i_mungetc = i_mungetc_buf;
3550 i_ungetc = i_mungetc;
3554 mime_begin_strict(f)
3559 unsigned char *p,*q;
3560 int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
3562 mime_decode_mode = FALSE;
3563 /* =? has been checked */
3565 p = mime_pattern[j];
3568 for(i=2;p[i]>' ';i++) { /* start at =? */
3569 if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) {
3570 /* pattern fails, try next one */
3572 while ((p = mime_pattern[++j])) {
3573 for(k=2;k<i;k++) /* assume length(p) > i */
3574 if (p[k]!=q[k]) break;
3575 if (k==i && nkf_toupper(c1)==p[k]) break;
3577 if (p) continue; /* found next one, continue */
3578 /* all fails, output from recovery buffer */
3586 mime_decode_mode = p[i-2];
3588 set_iconv(FALSE, mime_priority_func[j]);
3589 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
3591 if (mime_decode_mode=='B') {
3592 mimebuf_f = unbuf_f;
3594 /* do MIME integrity check */
3595 return mime_integrity(f,mime_pattern[j]);
3607 /* we don't keep eof of Fifo, becase it contains ?= as
3608 a terminator. It was checked in mime_integrity. */
3609 return ((mimebuf_f)?
3610 (*i_mgetc_buf)(f):Fifo(mime_input++));
3614 mime_ungetc_buf(c,f)
3619 (*i_mungetc_buf)(c,f);
3621 Fifo(--mime_input)=c;
3632 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
3633 /* re-read and convert again from mime_buffer. */
3635 /* =? has been checked */
3637 Fifo(mime_last++)='='; Fifo(mime_last++)='?';
3638 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
3639 /* We accept any character type even if it is breaked by new lines */
3640 c1 = (*i_getc)(f); Fifo(mime_last++)= c1 ;
3641 if (c1=='\n'||c1==' '||c1=='\r'||
3642 c1=='-'||c1=='_'||is_alnum(c1) ) continue;
3644 /* Failed. But this could be another MIME preemble */
3652 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3653 if (!(++i<MAXRECOVER) || c1==EOF) break;
3654 if (c1=='b'||c1=='B') {
3655 mime_decode_mode = 'B';
3656 } else if (c1=='q'||c1=='Q') {
3657 mime_decode_mode = 'Q';
3661 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3662 if (!(++i<MAXRECOVER) || c1==EOF) break;
3664 mime_decode_mode = FALSE;
3670 if (!mime_decode_mode) {
3671 /* false MIME premble, restart from mime_buffer */
3672 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
3673 /* Since we are in MIME mode until buffer becomes empty, */
3674 /* we never go into mime_begin again for a while. */
3677 /* discard mime preemble, and goto MIME mode */
3679 /* do no MIME integrity check */
3680 return c1; /* used only for checking EOF */
3695 fprintf(stderr, "%s\n", str);
3701 set_input_codename (codename)
3706 strcmp(codename, "") != 0 &&
3707 strcmp(codename, input_codename) != 0)
3709 is_inputcode_mixed = TRUE;
3711 input_codename = codename;
3712 is_inputcode_set = TRUE;
3717 print_guessed_code (filename)
3720 char *codename = "BINARY";
3721 if (!is_inputcode_mixed) {
3722 if (strcmp(input_codename, "") == 0) {
3725 codename = input_codename;
3728 if (filename != NULL) printf("%s:", filename);
3729 printf("%s\n", codename);
3737 if (nkf_isdigit(x)) return x - '0';
3738 return nkf_toupper(x) - 'A' + 10;
3743 #ifdef ANSI_C_PROTOTYPE
3744 int hex_getc(int ch, FILE *f, int (*g)(FILE *f), int (*u)(int c, FILE *f))
3747 hex_getc(ch, f, g, u)
3760 if (!nkf_isxdigit(c2)){
3765 if (!nkf_isxdigit(c3)){
3770 return (hex2bin(c2) << 4) | hex2bin(c3);
3777 return hex_getc(':', f, i_cgetc, i_cungetc);
3785 return (*i_cungetc)(c, f);
3792 return hex_getc('%', f, i_ugetc, i_uungetc);
3800 return (*i_uungetc)(c, f);
3804 #ifdef NUMCHAR_OPTION
3809 int (*g)() = i_ngetc;
3810 int (*u)() = i_nungetc;
3821 if (buf[i] == 'x' || buf[i] == 'X'){
3822 for (j = 0; j < 5; j++){
3824 if (!nkf_isxdigit(buf[i])){
3831 c |= hex2bin(buf[i]);
3834 for (j = 0; j < 6; j++){
3838 if (!nkf_isdigit(buf[i])){
3845 c += hex2bin(buf[i]);
3851 return CLASS_UTF16 | c;
3861 numchar_ungetc(c, f)
3865 return (*i_nungetc)(c, f);
3874 int c1, c2, c3, c4, cc;
3875 int t1, t2, t3, t4, mode, exit_mode;
3879 int lwsp_size = 128;
3881 if (mime_top != mime_last) { /* Something is in FIFO */
3882 return Fifo(mime_top++);
3884 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
3885 mime_decode_mode=FALSE;
3886 unswitch_mime_getc();
3887 return (*i_getc)(f);
3890 if (mimebuf_f == FIXED_MIME)
3891 exit_mode = mime_decode_mode;
3894 if (mime_decode_mode == 'Q') {
3895 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3897 if (c1=='_') return ' ';
3898 if (c1!='=' && c1!='?') {
3902 mime_decode_mode = exit_mode; /* prepare for quit */
3903 if (c1<=' ') return c1;
3904 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
3905 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
3906 /* end Q encoding */
3907 input_mode = exit_mode;
3909 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
3910 if (lwsp_buf==NULL) {
3911 perror("can't malloc");
3914 while ((c1=(*i_getc)(f))!=EOF) {
3919 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
3927 if ((c1=(*i_getc)(f))!=EOF && c1 == NL) {
3928 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
3943 lwsp_buf[lwsp_count] = c1;
3944 if (lwsp_count++>lwsp_size){
3946 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
3947 if (lwsp_buf_new==NULL) {
3950 perror("can't realloc");
3953 lwsp_buf = lwsp_buf_new;
3959 if (lwsp_count > 0) {
3960 if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) {
3964 for(lwsp_count--;lwsp_count>0;lwsp_count--)
3965 i_ungetc(lwsp_buf[lwsp_count],f);
3973 if (c1=='='&&c2<' ') { /* this is soft wrap */
3974 while((c1 = (*i_mgetc)(f)) <=' ') {
3975 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3977 mime_decode_mode = 'Q'; /* still in MIME */
3978 goto restart_mime_q;
3981 mime_decode_mode = 'Q'; /* still in MIME */
3985 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
3986 if (c2<=' ') return c2;
3987 mime_decode_mode = 'Q'; /* still in MIME */
3988 #define hex(c) (('0'<=c&&c<='9')?(c-'0'):\
3989 ('A'<=c&&c<='F')?(c-'A'+10):('a'<=c&&c<='f')?(c-'a'+10):0)
3990 return ((hex(c2)<<4) + hex(c3));
3993 if (mime_decode_mode != 'B') {
3994 mime_decode_mode = FALSE;
3995 return (*i_mgetc)(f);
3999 /* Base64 encoding */
4001 MIME allows line break in the middle of
4002 Base64, but we are very pessimistic in decoding
4003 in unbuf mode because MIME encoded code may broken by
4004 less or editor's control sequence (such as ESC-[-K in unbuffered
4005 mode. ignore incomplete MIME.
4007 mode = mime_decode_mode;
4008 mime_decode_mode = exit_mode; /* prepare for quit */
4010 while ((c1 = (*i_mgetc)(f))<=' ') {
4015 if ((c2 = (*i_mgetc)(f))<=' ') {
4018 if (mime_f != STRICT_MIME) goto mime_c2_retry;
4019 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4022 if ((c1 == '?') && (c2 == '=')) {
4025 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
4026 if (lwsp_buf==NULL) {
4027 perror("can't malloc");
4030 while ((c1=(*i_getc)(f))!=EOF) {
4035 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4043 if ((c1=(*i_getc)(f))!=EOF) {
4047 } else if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4062 lwsp_buf[lwsp_count] = c1;
4063 if (lwsp_count++>lwsp_size){
4065 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4066 if (lwsp_buf_new==NULL) {
4069 perror("can't realloc");
4072 lwsp_buf = lwsp_buf_new;
4078 if (lwsp_count > 0) {
4079 if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) {
4083 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4084 i_ungetc(lwsp_buf[lwsp_count],f);
4093 if ((c3 = (*i_mgetc)(f))<=' ') {
4096 if (mime_f != STRICT_MIME) goto mime_c3_retry;
4097 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4101 if ((c4 = (*i_mgetc)(f))<=' ') {
4104 if (mime_f != STRICT_MIME) goto mime_c4_retry;
4105 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4109 mime_decode_mode = mode; /* still in MIME sigh... */
4111 /* BASE 64 decoding */
4113 t1 = 0x3f & base64decode(c1);
4114 t2 = 0x3f & base64decode(c2);
4115 t3 = 0x3f & base64decode(c3);
4116 t4 = 0x3f & base64decode(c4);
4117 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
4119 Fifo(mime_last++) = cc;
4120 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
4122 Fifo(mime_last++) = cc;
4123 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
4125 Fifo(mime_last++) = cc;
4130 return Fifo(mime_top++);
4138 Fifo(--mime_top) = c;
4149 /* In buffered mode, read until =? or NL or buffer full
4151 mime_input = mime_top;
4152 mime_last = mime_top;
4153 while(*p) Fifo(mime_input++) = *p++;
4156 while((c=(*i_getc)(f))!=EOF) {
4157 if (((mime_input-mime_top)&MIME_BUF_MASK)==0) {
4158 break; /* buffer full */
4160 if (c=='=' && d=='?') {
4161 /* checked. skip header, start decode */
4162 Fifo(mime_input++) = c;
4163 /* mime_last_input = mime_input; */
4168 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
4170 /* Should we check length mod 4? */
4171 Fifo(mime_input++) = c;
4174 /* In case of Incomplete MIME, no MIME decode */
4175 Fifo(mime_input++) = c;
4176 mime_last = mime_input; /* point undecoded buffer */
4177 mime_decode_mode = 1; /* no decode on Fifo last in mime_getc */
4178 switch_mime_getc(); /* anyway we need buffered getc */
4189 i = c - 'A'; /* A..Z 0-25 */
4191 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
4193 } else if (c > '/') {
4194 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
4195 } else if (c == '+') {
4196 i = '>' /* 62 */ ; /* + 62 */
4198 i = '?' /* 63 */ ; /* / 63 */
4203 static char basis_64[] =
4204 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
4207 #define MIMEOUT_BUF_LENGTH (60)
4208 char mimeout_buf[MIMEOUT_BUF_LENGTH+1];
4209 int mimeout_buf_count = 0;
4210 int mimeout_preserve_space = 0;
4211 #define itoh4(c) (c>=10?c+'A'-10:c+'0')
4220 p = mime_pattern[0];
4221 for(i=0;mime_encode[i];i++) {
4222 if (mode == mime_encode[i]) {
4223 p = mime_pattern[i];
4227 mimeout_mode = mime_encode_method[i];
4230 if (base64_count>45) {
4231 if (mimeout_buf_count>0 && nkf_isblank(mimeout_buf[i])){
4232 (*o_mputc)(mimeout_buf[i]);
4238 if (!mimeout_preserve_space && mimeout_buf_count>0
4239 && (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
4240 || mimeout_buf[i]==CR || mimeout_buf[i]==NL )) {
4244 if (!mimeout_preserve_space) {
4245 for (;i<mimeout_buf_count;i++) {
4246 if (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
4247 || mimeout_buf[i]==CR || mimeout_buf[i]==NL ) {
4248 (*o_mputc)(mimeout_buf[i]);
4255 mimeout_preserve_space = FALSE;
4261 j = mimeout_buf_count;
4262 mimeout_buf_count = 0;
4264 mime_putc(mimeout_buf[i]);
4280 switch(mimeout_mode) {
4285 (*o_mputc)(basis_64[((b64c & 0x3)<< 4)]);
4291 (*o_mputc)(basis_64[((b64c & 0xF) << 2)]);
4297 if (mimeout_f!=FIXED_MIME) {
4299 } else if (mimeout_mode != 'Q')
4308 switch(mimeout_mode) {
4313 } else if (c==CR||c==NL) {
4316 } else if(c<SPACE||c=='='||c=='?'||c=='_'||DEL<=c) {
4318 (*o_mputc)(itoh4(((c>>4)&0xf)));
4319 (*o_mputc)(itoh4((c&0xf)));
4328 (*o_mputc)(basis_64[c>>2]);
4333 (*o_mputc)(basis_64[((b64c & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
4339 (*o_mputc)(basis_64[((b64c & 0xF) << 2) | ((c & 0xC0) >>6)]);
4340 (*o_mputc)(basis_64[c & 0x3F]);
4351 int mime_lastchar2, mime_lastchar1;
4353 void mime_prechar(c2, c1)
4358 if (base64_count + mimeout_buf_count/3*4> 66){
4359 (*o_base64conv)(EOF,0);
4360 (*o_base64conv)(0,NL);
4361 (*o_base64conv)(0,SPACE);
4363 }/*else if (mime_lastchar2){
4364 if (c1 <=DEL && !nkf_isspace(c1)){
4365 (*o_base64conv)(0,SPACE);
4369 if (c2 && mime_lastchar2 == 0
4370 && mime_lastchar1 && !nkf_isspace(mime_lastchar1)){
4371 (*o_base64conv)(0,SPACE);
4374 mime_lastchar2 = c2;
4375 mime_lastchar1 = c1;
4386 if (mimeout_f == FIXED_MIME){
4387 if (mimeout_mode == 'Q'){
4388 if (base64_count > 71){
4389 if (c!=CR && c!=NL) {
4396 if (base64_count > 71){
4401 if (c == EOF) { /* c==EOF */
4405 if (c != EOF) { /* c==EOF */
4411 /* mimeout_f != FIXED_MIME */
4413 if (c == EOF) { /* c==EOF */
4414 j = mimeout_buf_count;
4415 mimeout_buf_count = 0;
4418 /*if (nkf_isspace(mimeout_buf[i])){
4421 mimeout_addchar(mimeout_buf[i]);
4425 (*o_mputc)(mimeout_buf[i]);
4431 if (mimeout_mode=='Q') {
4432 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
4444 if (mimeout_buf_count > 0){
4445 lastchar = mimeout_buf[mimeout_buf_count - 1];
4450 if (!mimeout_mode) {
4451 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1)) {
4452 if (nkf_isspace(c)) {
4453 if (c==CR || c==NL) {
4456 for (i=0;i<mimeout_buf_count;i++) {
4457 (*o_mputc)(mimeout_buf[i]);
4458 if (mimeout_buf[i] == CR || mimeout_buf[i] == NL){
4465 mimeout_buf_count = 1;
4467 if (base64_count > 1
4468 && base64_count + mimeout_buf_count > 76){
4471 if (!nkf_isspace(mimeout_buf[0])){
4476 mimeout_buf[mimeout_buf_count++] = c;
4477 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
4478 open_mime(output_mode);
4483 if (lastchar==CR || lastchar == NL){
4484 for (i=0;i<mimeout_buf_count;i++) {
4485 (*o_mputc)(mimeout_buf[i]);
4488 mimeout_buf_count = 0;
4490 if (lastchar==SPACE) {
4491 for (i=0;i<mimeout_buf_count-1;i++) {
4492 (*o_mputc)(mimeout_buf[i]);
4495 mimeout_buf[0] = SPACE;
4496 mimeout_buf_count = 1;
4498 open_mime(output_mode);
4501 /* mimeout_mode == 'B', 1, 2 */
4502 if ( c<=DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
4503 if (lastchar == CR || lastchar == NL){
4504 if (nkf_isblank(c)) {
4505 for (i=0;i<mimeout_buf_count;i++) {
4506 mimeout_addchar(mimeout_buf[i]);
4508 mimeout_buf_count = 0;
4509 } else if (SPACE<c && c<DEL) {
4511 for (i=0;i<mimeout_buf_count;i++) {
4512 (*o_mputc)(mimeout_buf[i]);
4515 mimeout_buf_count = 0;
4518 if (c==SPACE || c==TAB || c==CR || c==NL) {
4519 for (i=0;i<mimeout_buf_count;i++) {
4520 if (SPACE<mimeout_buf[i] && mimeout_buf[i]<DEL) {
4522 for (i=0;i<mimeout_buf_count;i++) {
4523 (*o_mputc)(mimeout_buf[i]);
4526 mimeout_buf_count = 0;
4529 mimeout_buf[mimeout_buf_count++] = c;
4530 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
4532 for (i=0;i<mimeout_buf_count;i++) {
4533 (*o_mputc)(mimeout_buf[i]);
4536 mimeout_buf_count = 0;
4540 if (mimeout_buf_count>0 && SPACE<c && c!='=') {
4541 mimeout_buf[mimeout_buf_count++] = c;
4542 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
4543 j = mimeout_buf_count;
4544 mimeout_buf_count = 0;
4546 mimeout_addchar(mimeout_buf[i]);
4553 if (mimeout_buf_count>0) {
4554 j = mimeout_buf_count;
4555 mimeout_buf_count = 0;
4557 if (mimeout_buf[i]==CR || mimeout_buf[i]==NL)
4559 mimeout_addchar(mimeout_buf[i]);
4565 (*o_mputc)(mimeout_buf[i]);
4567 open_mime(output_mode);
4574 #if defined(PERL_XS) || defined(WIN32DLL)
4579 struct input_code *p = input_code_list;
4592 mime_f = STRICT_MIME;
4593 mime_decode_f = FALSE;
4598 #if defined(MSDOS) || defined(__OS2__)
4603 iso2022jp_f = FALSE;
4604 #ifdef UTF8_OUTPUT_ENABLE
4607 ms_ucs_map_f = FALSE;
4619 is_inputcode_mixed = FALSE;
4620 is_inputcode_set = FALSE;
4624 #ifdef SHIFTJIS_CP932
4630 for (i = 0; i < 256; i++){
4631 prefix_table[i] = 0;
4634 #ifdef UTF8_INPUT_ENABLE
4635 utf16_mode = UTF16BE_INPUT;
4637 mimeout_buf_count = 0;
4642 fold_preserve_f = FALSE;
4645 kanji_intro = DEFAULT_J;
4646 ascii_intro = DEFAULT_R;
4647 fold_margin = FOLD_MARGIN;
4648 output_conv = DEFAULT_CONV;
4649 oconv = DEFAULT_CONV;
4650 o_zconv = no_connection;
4651 o_fconv = no_connection;
4652 o_crconv = no_connection;
4653 o_rot_conv = no_connection;
4654 o_hira_conv = no_connection;
4655 o_base64conv = no_connection;
4656 o_iso2022jp_check_conv = no_connection;
4659 i_ungetc = std_ungetc;
4661 i_bungetc = std_ungetc;
4664 i_mungetc = std_ungetc;
4665 i_mgetc_buf = std_getc;
4666 i_mungetc_buf = std_ungetc;
4667 output_mode = ASCII;
4670 mime_decode_mode = FALSE;
4676 z_prev2=0,z_prev1=0;
4678 iconv_for_check = 0;
4680 input_codename = "";
4688 no_connection(c2,c1)
4691 no_connection2(c2,c1,0);
4695 no_connection2(c2,c1,c0)
4698 fprintf(stderr,"nkf internal module connection failure.\n");
4700 return 0; /* LINT */
4705 #define fprintf dllprintf
4710 fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
4711 fprintf(stderr,"Flags:\n");
4712 fprintf(stderr,"b,u Output is buffered (DEFAULT),Output is unbuffered\n");
4713 #ifdef DEFAULT_CODE_SJIS
4714 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC), UTF-8\n");
4716 #ifdef DEFAULT_CODE_JIS
4717 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC), UTF-8\n");
4719 #ifdef DEFAULT_CODE_EUC
4720 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT), UTF-8\n");
4722 #ifdef DEFAULT_CODE_UTF8
4723 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC), UTF-8 (DEFAULT)\n");
4725 #ifdef UTF8_OUTPUT_ENABLE
4726 fprintf(stderr," After 'w' you can add more options. (80?|16((B|L)0?)?) \n");
4728 fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC), UTF-8\n");
4729 #ifdef UTF8_INPUT_ENABLE
4730 fprintf(stderr," After 'W' you can add more options. (8|16(B|L)?) \n");
4732 fprintf(stderr,"t no conversion\n");
4733 fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n");
4734 fprintf(stderr,"r {de/en}crypt ROT13/47\n");
4735 fprintf(stderr,"h 1 hirakana->katakana, 2 katakana->hirakana,3 both\n");
4736 fprintf(stderr,"v Show this usage. V: show version\n");
4737 fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
4738 fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
4739 fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
4740 fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
4741 fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces,\n");
4742 fprintf(stderr," 3: Convert HTML Entity\n");
4743 fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
4744 fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
4746 fprintf(stderr,"T Text mode output\n");
4748 fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
4749 fprintf(stderr,"d,c Delete \\r in line feed and \\032, Add \\r in line feed\n");
4750 fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n");
4751 fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
4752 fprintf(stderr,"long name options\n");
4753 fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n");
4754 fprintf(stderr," --jis,--euc,--sjis,--utf8,--utf16,--mime,--base64 convert for the code\n");
4755 fprintf(stderr," --hiragana, --katakana Hiragana/Katakana Conversion\n");
4756 fprintf(stderr," --x0212 Convert JISX0212\n");
4757 fprintf(stderr," --cp932, --no-cp932 CP932 compatibility\n");
4759 fprintf(stderr," --cap-input, --url-input Convert hex after ':' or '%%'\n");
4761 #ifdef NUMCHAR_OPTION
4762 fprintf(stderr," --numchar-input Convert Unicode Character Reference\n");
4764 #ifdef UTF8_OUTPUT_ENABLE
4765 fprintf(stderr," --ms-ucs-map Microsoft UCS Mapping Compatible\n");
4768 fprintf(stderr," --overwrite Overwrite original listed files by filtered result\n");
4770 fprintf(stderr," -g, --guess Guess the input code\n");
4771 fprintf(stderr," --help,--version\n");
4778 fprintf(stderr,"Network Kanji Filter Version %s (%s) "
4779 #if defined(MSDOS) && !defined(__WIN32__) && !defined(__WIN16__)
4782 #if defined(MSDOS) && defined(__WIN16__)
4785 #if defined(MSDOS) && defined(__WIN32__)
4791 ,NKF_VERSION,NKF_RELEASE_DATE);
4792 fprintf(stderr,"\n%s\n",CopyRight);
4797 **
\e$B%Q%C%A@):n<T
\e(B
4798 ** void@merope.pleiades.or.jp (Kusakabe Youichi)
4799 ** NIDE Naoyuki <nide@ics.nara-wu.ac.jp>
4800 ** ohta@src.ricoh.co.jp (Junn Ohta)
4801 ** inouet@strl.nhk.or.jp (Tomoyuki Inoue)
4802 ** kiri@pulser.win.or.jp (Tetsuaki Kiriyama)
4803 ** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp>
4804 ** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe)
4805 ** kono@ie.u-ryukyu.ac.jp (Shinji Kono)
4806 ** GHG00637@nifty-serve.or.jp (COW)