1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
5 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
6 ** Copyright (C) 1996,1998
8 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
9 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
10 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
11 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
13 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
14 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
15 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
16 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
17 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
18 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
20 ** Everyone is permitted to do anything on this program
21 ** including copying, modifying, improving,
22 ** as long as you don't try to pretend that you wrote it.
23 ** i.e., the above copyright notice has to appear in all copies.
24 ** Binary distribution requires original version messages.
25 ** You don't have to ask before copying, redistribution or publishing.
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
29 /***********************************************************************
30 ** UTF-8
\e$B%5%]!<%H$K$D$$$F
\e(B
31 **
\e$B=>Mh$N
\e(B nkf
\e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9
\e(B
32 ** nkf -e
\e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G
\e(B UTF-8
\e$B$HH=Dj$5$l$l$P!"
\e(B
33 **
\e$B$=$N$^$^
\e(B euc-jp
\e$B$KJQ49$5$l$^$9
\e(B
35 **
\e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#
\e(B
36 ** (
\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O
\e(B)
38 **
\e$B2?$+LdBj$r8+$D$1$?$i!"
\e(B
39 ** E-Mail: furukawa@tcp-ip.or.jp
40 **
\e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#
\e(B
41 ***********************************************************************/
42 /* $Id: nkf.c,v 1.69 2005/06/27 16:05:59 naruse Exp $ */
43 #define NKF_VERSION "2.0.5"
44 #define NKF_RELEASE_DATE "2005-06-28"
47 static char *CopyRight =
48 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2005 Kono, Furukawa, Naruse";
55 ** USAGE: nkf [flags] [file]
58 ** b Output is buffered (DEFAULT)
59 ** u Output is unbuffered
63 ** j Outout code is JIS 7 bit (DEFAULT SELECT)
64 ** s Output code is MS Kanji (DEFAULT SELECT)
65 ** e Output code is AT&T JIS (DEFAULT SELECT)
66 ** w Output code is AT&T JIS (DEFAULT SELECT)
67 ** l Output code is JIS 7bit and ISO8859-1 Latin-1
69 ** m MIME conversion for ISO-2022-JP
70 ** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
71 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
72 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
73 ** M MIME output conversion
75 ** r {de/en}crypt ROT13/47
79 ** T Text mode output (for MS-DOS)
81 ** x Do not convert X0201 kana into X0208
82 ** Z Convert X0208 alphabet to ASCII
87 ** B try to fix broken JIS, missing Escape
88 ** B[1-9] broken level
90 ** O Output to 'nkf.out' file or last file name
91 ** d Delete \r in line feed
92 ** c Add \r in line feed
93 ** -- other long option
94 ** -- ignore following option (don't use with -O )
98 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__)) && !defined(MSDOS)
100 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
115 #if defined(MSDOS) || defined(__OS2__)
122 #define setbinmode(fp) fsetbin(fp)
123 #else /* Microsoft C, Turbo C */
124 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
126 #else /* UNIX,OS/2 */
127 #define setbinmode(fp)
130 #ifdef _IOFBF /* SysV and MSDOS, Windows */
131 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
133 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
136 /*Borland C++ 4.5 EasyWin*/
137 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
146 /* added by satoru@isoternet.org */
148 #include <sys/stat.h>
149 #ifndef MSDOS /* UNIX, OS/2 */
152 #else /* defined(MSDOS) */
154 #ifdef __BORLANDC__ /* BCC32 */
156 #else /* !defined(__BORLANDC__) */
157 #include <sys/utime.h>
158 #endif /* (__BORLANDC__) */
159 #else /* !defined(__WIN32__) */
160 #if defined(_MSC_VER) || defined(__MINGW32__) /* VC++, MinGW */
161 #include <sys/utime.h>
162 #elif defined(__TURBOC__) /* BCC */
164 #elif defined(LSI_C) /* LSI C */
165 #endif /* (__WIN32__) */
177 /* state of output_mode and input_mode
195 /* Input Assumption */
199 #define LATIN1_INPUT 6
201 #define STRICT_MIME 8
206 #define JAPANESE_EUC 10
210 #define UTF8_INPUT 13
211 #define UTF16BE_INPUT 14
212 #define UTF16LE_INPUT 15
232 #define is_alnum(c) \
233 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
235 #define HOLD_SIZE 1024
236 #define IOBUF_SIZE 16384
238 #define DEFAULT_J 'B'
239 #define DEFAULT_R 'B'
241 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
242 #define SJ6394 0x0161 /* 63 - 94 ku offset */
244 #define RANGE_NUM_MAX 18
249 #if defined( UTF8_OUTPUT_ENABLE ) || defined( UTF8_INPUT_ENABLE )
250 #define sizeof_euc_utf8 94
251 #define sizeof_euc_to_utf8_1byte 94
252 #define sizeof_euc_to_utf8_2bytes 94
253 #define sizeof_utf8_to_euc_C2 64
254 #define sizeof_utf8_to_euc_E5B8 64
255 #define sizeof_utf8_to_euc_2bytes 112
256 #define sizeof_utf8_to_euc_3bytes 112
259 /* MIME preprocessor */
262 #ifdef EASYWIN /*Easy Win */
263 extern POINT _BufferSize;
266 /* function prototype */
268 #ifdef ANSI_C_PROTOTYPE
270 #define STATIC static
282 void (*status_func)PROTO((struct input_code *, int));
283 int (*iconv_func)PROTO((int c2, int c1, int c0));
287 STATIC char *input_codename = "";
289 STATIC int noconvert PROTO((FILE *f));
290 STATIC int kanji_convert PROTO((FILE *f));
291 STATIC int h_conv PROTO((FILE *f,int c2,int c1));
292 STATIC int push_hold_buf PROTO((int c2));
293 STATIC void set_iconv PROTO((int f, int (*iconv_func)(int c2,int c1,int c0)));
294 STATIC int s_iconv PROTO((int c2,int c1,int c0));
295 STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
296 STATIC int e_iconv PROTO((int c2,int c1,int c0));
297 #ifdef UTF8_INPUT_ENABLE
298 STATIC int w2e_conv PROTO((int c2,int c1,int c0,int *p2,int *p1));
299 STATIC int w_iconv PROTO((int c2,int c1,int c0));
300 STATIC int w_iconv16 PROTO((int c2,int c1,int c0));
301 STATIC int w_iconv_common PROTO((int c1,int c0,unsigned short **pp,int psize,int *p2,int *p1));
302 STATIC int ww16_conv PROTO((int c2, int c1, int c0));
304 #ifdef UTF8_OUTPUT_ENABLE
305 STATIC int e2w_conv PROTO((int c2,int c1));
306 STATIC void w_oconv PROTO((int c2,int c1));
307 STATIC void w_oconv16 PROTO((int c2,int c1));
309 STATIC void e_oconv PROTO((int c2,int c1));
310 STATIC int e2s_conv PROTO((int c2, int c1, int *p2, int *p1));
311 STATIC void s_oconv PROTO((int c2,int c1));
312 STATIC void j_oconv PROTO((int c2,int c1));
313 STATIC void fold_conv PROTO((int c2,int c1));
314 STATIC void cr_conv PROTO((int c2,int c1));
315 STATIC void z_conv PROTO((int c2,int c1));
316 STATIC void rot_conv PROTO((int c2,int c1));
317 STATIC void hira_conv PROTO((int c2,int c1));
318 STATIC void base64_conv PROTO((int c2,int c1));
319 STATIC void iso2022jp_check_conv PROTO((int c2,int c1));
320 STATIC void no_connection PROTO((int c2,int c1));
321 STATIC int no_connection2 PROTO((int c2,int c1,int c0));
323 STATIC void code_score PROTO((struct input_code *ptr));
324 STATIC void code_status PROTO((int c));
326 STATIC void std_putc PROTO((int c));
327 STATIC int std_getc PROTO((FILE *f));
328 STATIC int std_ungetc PROTO((int c,FILE *f));
330 STATIC int broken_getc PROTO((FILE *f));
331 STATIC int broken_ungetc PROTO((int c,FILE *f));
333 STATIC int mime_begin PROTO((FILE *f));
334 STATIC int mime_getc PROTO((FILE *f));
335 STATIC int mime_ungetc PROTO((int c,FILE *f));
337 STATIC int mime_begin_strict PROTO((FILE *f));
338 STATIC int mime_getc_buf PROTO((FILE *f));
339 STATIC int mime_ungetc_buf PROTO((int c,FILE *f));
340 STATIC int mime_integrity PROTO((FILE *f,unsigned char *p));
342 STATIC int base64decode PROTO((int c));
343 STATIC void mime_prechar PROTO((int c2, int c1));
344 STATIC void mime_putc PROTO((int c));
345 STATIC void open_mime PROTO((int c));
346 STATIC void close_mime PROTO(());
347 STATIC void usage PROTO(());
348 STATIC void version PROTO(());
349 STATIC void options PROTO((unsigned char *c));
350 #if defined(PERL_XS) || defined(WIN32DLL)
351 STATIC void reinit PROTO(());
356 static unsigned char stdibuf[IOBUF_SIZE];
357 static unsigned char stdobuf[IOBUF_SIZE];
358 static unsigned char hold_buf[HOLD_SIZE*2];
359 static int hold_count;
361 /* MIME preprocessor fifo */
363 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
364 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
365 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
366 static unsigned char mime_buf[MIME_BUF_SIZE];
367 static unsigned int mime_top = 0;
368 static unsigned int mime_last = 0; /* decoded */
369 static unsigned int mime_input = 0; /* undecoded */
372 static int unbuf_f = FALSE;
373 static int estab_f = FALSE;
374 static int nop_f = FALSE;
375 static int binmode_f = TRUE; /* binary mode */
376 static int rot_f = FALSE; /* rot14/43 mode */
377 static int hira_f = FALSE; /* hira/kata henkan */
378 static int input_f = FALSE; /* non fixed input code */
379 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
380 static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
381 static int mime_decode_f = FALSE; /* mime decode is explicitly on */
382 static int mimebuf_f = FALSE; /* MIME buffered input */
383 static int broken_f = FALSE; /* convert ESC-less broken JIS */
384 static int iso8859_f = FALSE; /* ISO8859 through */
385 static int mimeout_f = FALSE; /* base64 mode */
386 #if defined(MSDOS) || defined(__OS2__)
387 static int x0201_f = TRUE; /* Assume JISX0201 kana */
389 static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
391 static int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
392 #ifdef UTF8_OUTPUT_ENABLE
393 static int unicode_bom_f= 0; /* Output Unicode BOM */
394 static int w_oconv16_LE = 0; /* utf-16 little endian */
395 static int ms_ucs_map_f = FALSE; /* Microsoft UCS Mapping Compatible */
399 #ifdef NUMCHAR_OPTION
401 #define CLASS_MASK 0x0f000000
402 #define CLASS_UTF16 0x01000000
406 static int cap_f = FALSE;
407 static int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */
408 static int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc;
409 STATIC int cap_getc PROTO((FILE *f));
410 STATIC int cap_ungetc PROTO((int c,FILE *f));
412 static int url_f = FALSE;
413 static int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
414 static int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc;
415 STATIC int url_getc PROTO((FILE *f));
416 STATIC int url_ungetc PROTO((int c,FILE *f));
418 static int numchar_f = FALSE;
419 static int (*i_ngetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
420 static int (*i_nungetc)PROTO((int c ,FILE *f)) = std_ungetc;
421 STATIC int numchar_getc PROTO((FILE *f));
422 STATIC int numchar_ungetc PROTO((int c,FILE *f));
426 static int noout_f = FALSE;
427 STATIC void no_putc PROTO((int c));
428 static int debug_f = FALSE;
429 STATIC void debug PROTO((char *str));
432 static int guess_f = FALSE;
433 STATIC void print_guessed_code PROTO((char *filename));
434 STATIC void set_input_codename PROTO((char *codename));
435 static int is_inputcode_mixed = FALSE;
436 static int is_inputcode_set = FALSE;
439 static int exec_f = 0;
442 #ifdef SHIFTJIS_CP932
443 STATIC int cp932_f = TRUE;
444 #define CP932_TABLE_BEGIN (0xfa)
445 #define CP932_TABLE_END (0xfc)
447 STATIC int cp932inv_f = TRUE;
448 #define CP932INV_TABLE_BEGIN (0xed)
449 #define CP932INV_TABLE_END (0xee)
451 /* STATIC int cp932_conv PROTO((int c2, int c1)); */
452 #endif /* SHIFTJIS_CP932 */
455 STATIC int x0212_f = FALSE;
456 static int x0212_shift PROTO((int c));
457 static int x0212_unshift PROTO((int c));
460 STATIC unsigned char prefix_table[256];
462 STATIC void e_status PROTO((struct input_code *, int));
463 STATIC void s_status PROTO((struct input_code *, int));
465 #ifdef UTF8_INPUT_ENABLE
466 STATIC void w_status PROTO((struct input_code *, int));
467 STATIC void w16_status PROTO((struct input_code *, int));
468 static int utf16_mode = UTF16BE_INPUT;
471 struct input_code input_code_list[] = {
472 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
473 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
474 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
475 {"UTF-16", 0, 0, 0, {0, 0, 0}, w16_status, w_iconv16, 0},
479 static int mimeout_mode = 0;
480 static int base64_count = 0;
482 /* X0208 -> ASCII converter */
485 static int f_line = 0; /* chars in line */
486 static int f_prev = 0;
487 static int fold_preserve_f = FALSE; /* preserve new lines */
488 static int fold_f = FALSE;
489 static int fold_len = 0;
492 static unsigned char kanji_intro = DEFAULT_J;
493 static unsigned char ascii_intro = DEFAULT_R;
497 #define FOLD_MARGIN 10
498 #define DEFAULT_FOLD 60
500 static int fold_margin = FOLD_MARGIN;
504 #ifdef DEFAULT_CODE_JIS
505 # define DEFAULT_CONV j_oconv
507 #ifdef DEFAULT_CODE_SJIS
508 # define DEFAULT_CONV s_oconv
510 #ifdef DEFAULT_CODE_EUC
511 # define DEFAULT_CONV e_oconv
513 #ifdef DEFAULT_CODE_UTF8
514 # define DEFAULT_CONV w_oconv
517 /* process default */
518 static void (*output_conv)PROTO((int c2,int c1)) = DEFAULT_CONV;
520 static void (*oconv)PROTO((int c2,int c1)) = no_connection;
521 /* s_iconv or oconv */
522 static int (*iconv)PROTO((int c2,int c1,int c0)) = no_connection2;
524 static void (*o_zconv)PROTO((int c2,int c1)) = no_connection;
525 static void (*o_fconv)PROTO((int c2,int c1)) = no_connection;
526 static void (*o_crconv)PROTO((int c2,int c1)) = no_connection;
527 static void (*o_rot_conv)PROTO((int c2,int c1)) = no_connection;
528 static void (*o_hira_conv)PROTO((int c2,int c1)) = no_connection;
529 static void (*o_base64conv)PROTO((int c2,int c1)) = no_connection;
530 static void (*o_iso2022jp_check_conv)PROTO((int c2,int c1)) = no_connection;
532 /* static redirections */
534 static void (*o_putc)PROTO((int c)) = std_putc;
536 static int (*i_getc)PROTO((FILE *f)) = std_getc; /* general input */
537 static int (*i_ungetc)PROTO((int c,FILE *f)) =std_ungetc;
539 static int (*i_bgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
540 static int (*i_bungetc)PROTO((int c ,FILE *f)) = std_ungetc;
542 static void (*o_mputc)PROTO((int c)) = std_putc ; /* output of mputc */
544 static int (*i_mgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
545 static int (*i_mungetc)PROTO((int c ,FILE *f)) = std_ungetc;
547 /* for strict mime */
548 static int (*i_mgetc_buf)PROTO((FILE *)) = std_getc; /* input of mgetc_buf */
549 static int (*i_mungetc_buf)PROTO((int c,FILE *f)) = std_ungetc;
552 static int output_mode = ASCII, /* output kanji mode */
553 input_mode = ASCII, /* input kanji mode */
554 shift_mode = FALSE; /* TRUE shift out, or X0201 */
555 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
557 /* X0201 / X0208 conversion tables */
559 /* X0201 kana conversion table */
562 unsigned char cv[]= {
563 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
564 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
565 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
566 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
567 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
568 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
569 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
570 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
571 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
572 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
573 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
574 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
575 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
576 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
577 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
578 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
582 /* X0201 kana conversion table for daguten */
585 unsigned char dv[]= {
586 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
587 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
588 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
589 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
590 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
591 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
592 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
593 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
594 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
595 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
596 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
597 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
598 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
601 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
604 /* X0201 kana conversion table for han-daguten */
607 unsigned char ev[]= {
608 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
610 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
611 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
614 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
615 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
616 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
617 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
618 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
619 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
620 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
621 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
622 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
623 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
627 /* X0208 kigou conversion table */
628 /* 0x8140 - 0x819e */
630 unsigned char fv[] = {
632 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
633 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
634 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
635 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
636 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
637 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
638 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
639 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
640 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
641 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
642 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
643 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
649 static int file_out = FALSE;
651 static int overwrite = FALSE;
654 static int crmode_f = 0; /* CR, NL, CRLF */
655 #ifdef EASYWIN /*Easy Win */
656 static int end_check;
659 #define STD_GC_BUFSIZE (256)
660 int std_gc_buf[STD_GC_BUFSIZE];
664 #include "nkf32dll.c"
665 #elif defined(PERL_XS)
675 char *outfname = NULL;
678 #ifdef EASYWIN /*Easy Win */
679 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
682 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
683 cp = (unsigned char *)*argv;
688 if (pipe(fds) < 0 || (pid = fork()) < 0){
699 execvp(argv[1], &argv[1]);
713 if(x0201_f == WISH_TRUE)
714 x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
716 if (binmode_f == TRUE)
718 if (freopen("","wb",stdout) == NULL)
725 setbuf(stdout, (char *) NULL);
727 setvbuffer(stdout, stdobuf, IOBUF_SIZE);
730 if (binmode_f == TRUE)
732 if (freopen("","rb",stdin) == NULL) return (-1);
736 setvbuffer(stdin, stdibuf, IOBUF_SIZE);
740 kanji_convert(stdin);
741 if (guess_f) print_guessed_code(NULL);
746 is_inputcode_mixed = FALSE;
747 is_inputcode_set = FALSE;
749 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
758 /* reopen file for stdout */
759 if (file_out == TRUE) {
762 outfname = malloc(strlen(origfname)
763 + strlen(".nkftmpXXXXXX")
769 strcpy(outfname, origfname);
773 for (i = strlen(outfname); i; --i){
774 if (outfname[i - 1] == '/'
775 || outfname[i - 1] == '\\'){
781 strcat(outfname, "ntXXXXXX");
783 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC,
786 strcat(outfname, ".nkftmpXXXXXX");
787 fd = mkstemp(outfname);
790 || (fd_backup = dup(fileno(stdout))) < 0
791 || dup2(fd, fileno(stdout)) < 0
802 outfname = "nkf.out";
805 if(freopen(outfname, "w", stdout) == NULL) {
809 if (binmode_f == TRUE) {
811 if (freopen("","wb",stdout) == NULL)
818 if (binmode_f == TRUE)
820 if (freopen("","rb",fin) == NULL)
825 setvbuffer(fin, stdibuf, IOBUF_SIZE);
829 char *filename = NULL;
831 if (nfiles > 1) filename = origfname;
832 if (guess_f) print_guessed_code(filename);
838 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
846 if (dup2(fd_backup, fileno(stdout)) < 0){
849 if (stat(origfname, &sb)) {
850 fprintf(stderr, "Can't stat %s\n", origfname);
852 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
853 if (chmod(outfname, sb.st_mode)) {
854 fprintf(stderr, "Can't set permission %s\n", outfname);
857 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
858 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
859 tb[0] = tb[1] = sb.st_mtime;
860 if (utime(outfname, tb)) {
861 fprintf(stderr, "Can't set timestamp %s\n", outfname);
864 tb.actime = sb.st_atime;
865 tb.modtime = sb.st_mtime;
866 if (utime(outfname, &tb)) {
867 fprintf(stderr, "Can't set timestamp %s\n", outfname);
871 if (unlink(origfname)){
875 if (rename(outfname, origfname)) {
877 fprintf(stderr, "Can't rename %s to %s\n",
878 outfname, origfname);
886 #ifdef EASYWIN /*Easy Win */
887 if (file_out == FALSE)
888 scanf("%d",&end_check);
891 #else /* for Other OS */
892 if (file_out == TRUE)
897 #endif /* WIN32DLL */
922 {"katakana-hiragana","h3"},
929 #ifdef UTF8_OUTPUT_ENABLE
934 #ifdef UTF8_INPUT_ENABLE
936 {"utf16-input", "W16"},
945 #ifdef NUMCHAR_OPTION
946 {"numchar-input", ""},
952 #ifdef SHIFTJIS_CP932
962 static int option_mode = 0;
969 unsigned char *p = NULL;
973 while(*cp && *cp++!='-');
977 case '-': /* literal options */
978 if (!*cp) { /* ignore the rest of arguments */
982 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
984 p = (unsigned char *)long_option[i].name;
985 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
986 if (*p == cp[j] || cp[j] == ' '){
993 cp = (unsigned char *)long_option[i].alias;
997 if (strcmp(long_option[i].name, "overwrite") == 0){
1004 if (strcmp(long_option[i].name, "cap-input") == 0){
1008 if (strcmp(long_option[i].name, "url-input") == 0){
1013 #ifdef NUMCHAR_OPTION
1014 if (strcmp(long_option[i].name, "numchar-input") == 0){
1020 if (strcmp(long_option[i].name, "no-output") == 0){
1024 if (strcmp(long_option[i].name, "debug") == 0){
1029 if (strcmp(long_option[i].name, "cp932") == 0){
1030 #ifdef SHIFTJIS_CP932
1034 #ifdef UTF8_OUTPUT_ENABLE
1035 ms_ucs_map_f = TRUE;
1039 if (strcmp(long_option[i].name, "no-cp932") == 0){
1040 #ifdef SHIFTJIS_CP932
1044 #ifdef UTF8_OUTPUT_ENABLE
1045 ms_ucs_map_f = FALSE;
1049 #ifdef SHIFTJIS_CP932
1050 if (strcmp(long_option[i].name, "cp932inv") == 0){
1057 if (strcmp(long_option[i].name, "x0212") == 0){
1064 if (strcmp(long_option[i].name, "exec-in") == 0){
1068 if (strcmp(long_option[i].name, "exec-out") == 0){
1073 #ifdef UTF8_OUTPUT_ENABLE
1074 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1075 ms_ucs_map_f = TRUE;
1079 if (strcmp(long_option[i].name, "prefix=") == 0){
1080 if (*p == '=' && ' ' < p[1] && p[1] < 128){
1081 for (i = 2; ' ' < p[i] && p[i] < 128; i++){
1082 prefix_table[p[i]] = p[1];
1089 case 'b': /* buffered mode */
1092 case 'u': /* non bufferd mode */
1095 case 't': /* transparent mode */
1098 case 'j': /* JIS output */
1100 output_conv = j_oconv;
1102 case 'e': /* AT&T EUC output */
1103 output_conv = e_oconv;
1105 case 's': /* SJIS output */
1106 output_conv = s_oconv;
1108 case 'l': /* ISO8859 Latin-1 support, no conversion */
1109 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
1110 input_f = LATIN1_INPUT;
1112 case 'i': /* Kanji IN ESC-$-@/B */
1113 if (*cp=='@'||*cp=='B')
1114 kanji_intro = *cp++;
1116 case 'o': /* ASCII IN ESC-(-J/B */
1117 if (*cp=='J'||*cp=='B'||*cp=='H')
1118 ascii_intro = *cp++;
1122 bit:1 katakana->hiragana
1123 bit:2 hiragana->katakana
1125 if ('9'>= *cp && *cp>='0')
1126 hira_f |= (*cp++ -'0');
1133 #if defined(MSDOS) || defined(__OS2__)
1148 #ifdef UTF8_OUTPUT_ENABLE
1149 case 'w': /* UTF-8 output */
1150 if ('1'== cp[0] && '6'==cp[1]) {
1151 output_conv = w_oconv16; cp+=2;
1153 unicode_bom_f=2; cp++;
1156 unicode_bom_f=1; cp++;
1158 } else if (cp[0] == 'B') {
1159 unicode_bom_f=2; cp++;
1161 unicode_bom_f=1; cp++;
1164 } else if (cp[0] == '8') {
1165 output_conv = w_oconv; cp++;
1168 unicode_bom_f=1; cp++;
1171 output_conv = w_oconv;
1174 #ifdef UTF8_INPUT_ENABLE
1175 case 'W': /* UTF-8 input */
1176 if ('1'== cp[0] && '6'==cp[1]) {
1177 input_f = UTF16BE_INPUT;
1178 utf16_mode = UTF16BE_INPUT;
1182 input_f = UTF16LE_INPUT;
1183 utf16_mode = UTF16LE_INPUT;
1184 } else if (cp[0] == 'B') {
1186 input_f = UTF16BE_INPUT;
1187 utf16_mode = UTF16BE_INPUT;
1189 } else if (cp[0] == '8') {
1191 input_f = UTF8_INPUT;
1193 input_f = UTF8_INPUT;
1196 /* Input code assumption */
1197 case 'J': /* JIS input */
1198 case 'E': /* AT&T EUC input */
1199 input_f = JIS_INPUT;
1201 case 'S': /* MS Kanji input */
1202 input_f = SJIS_INPUT;
1203 if (x0201_f==NO_X0201) x0201_f=TRUE;
1205 case 'Z': /* Convert X0208 alphabet to asii */
1206 /* bit:0 Convert X0208
1207 bit:1 Convert Kankaku to one space
1208 bit:2 Convert Kankaku to two spaces
1209 bit:3 Convert HTML Entity
1211 if ('9'>= *cp && *cp>='0')
1212 alpha_f |= 1<<(*cp++ -'0');
1216 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
1217 x0201_f = FALSE; /* No X0201->X0208 conversion */
1219 ESC-(-I in JIS, EUC, MS Kanji
1220 SI/SO in JIS, EUC, MS Kanji
1221 SSO in EUC, JIS, not in MS Kanji
1222 MS Kanji (0xa0-0xdf)
1224 ESC-(-I in JIS (0x20-0x5f)
1225 SSO in EUC (0xa0-0xdf)
1226 0xa0-0xd in MS Kanji (0xa0-0xdf)
1229 case 'X': /* Assume X0201 kana */
1230 /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1233 case 'F': /* prserve new lines */
1234 fold_preserve_f = TRUE;
1235 case 'f': /* folding -f60 or -f */
1238 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1240 fold_len += *cp++ - '0';
1242 if (!(0<fold_len && fold_len<BUFSIZ))
1243 fold_len = DEFAULT_FOLD;
1247 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1249 fold_margin += *cp++ - '0';
1253 case 'm': /* MIME support */
1254 /* mime_decode_f = TRUE; */ /* this has too large side effects... */
1255 if (*cp=='B'||*cp=='Q') {
1256 mime_decode_mode = *cp++;
1257 mimebuf_f = FIXED_MIME;
1258 } else if (*cp=='N') {
1259 mime_f = TRUE; cp++;
1260 } else if (*cp=='S') {
1261 mime_f = STRICT_MIME; cp++;
1262 } else if (*cp=='0') {
1263 mime_decode_f = FALSE;
1264 mime_f = FALSE; cp++;
1267 case 'M': /* MIME output */
1270 mimeout_f = FIXED_MIME; cp++;
1271 } else if (*cp=='Q') {
1273 mimeout_f = FIXED_MIME; cp++;
1278 case 'B': /* Broken JIS support */
1280 bit:1 allow any x on ESC-(-x or ESC-$-x
1281 bit:2 reset to ascii on NL
1283 if ('9'>= *cp && *cp>='0')
1284 broken_f |= 1<<(*cp++ -'0');
1289 case 'O':/* for Output file */
1293 case 'c':/* add cr code */
1296 case 'd':/* delete cr code */
1299 case 'I': /* ISO-2022-JP output */
1302 case 'L': /* line mode */
1303 if (*cp=='u') { /* unix */
1304 crmode_f = NL; cp++;
1305 } else if (*cp=='m') { /* mac */
1306 crmode_f = CR; cp++;
1307 } else if (*cp=='w') { /* windows */
1308 crmode_f = CRLF; cp++;
1309 } else if (*cp=='0') { /* no conversion */
1319 /* module muliple options in a string are allowed for Perl moudle */
1320 while(*cp && *cp++!='-');
1323 /* bogus option but ignored */
1329 #ifdef ANSI_C_PROTOTYPE
1330 struct input_code * find_inputcode_byfunc(int (*iconv_func)(int c2,int c1,int c0))
1332 struct input_code * find_inputcode_byfunc(iconv_func)
1333 int (*iconv_func)();
1337 struct input_code *p = input_code_list;
1339 if (iconv_func == p->iconv_func){
1349 static int (*iconv_for_check)() = 0;
1352 #ifdef ANSI_C_PROTOTYPE
1353 void set_iconv(int f, int (*iconv_func)(int c2,int c1,int c0))
1355 void set_iconv(f, iconv_func)
1357 int (*iconv_func)();
1360 #ifdef INPUT_CODE_FIX
1368 #ifdef INPUT_CODE_FIX
1369 && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1375 if (estab_f && iconv_for_check != iconv){
1376 struct input_code *p = find_inputcode_byfunc(iconv);
1378 set_input_codename(p->name);
1379 debug(input_codename);
1381 iconv_for_check = iconv;
1386 #define SCORE_L2 (1) /*
\e$BBh
\e(B2
\e$B?e=`4A;z
\e(B */
1387 #define SCORE_KANA (SCORE_L2 << 1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
1388 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
1389 #ifdef SHIFTJIS_CP932
1390 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932
\e$B$K$h$kFI$_49$(
\e(B */
1391 #define SCORE_NO_EXIST (SCORE_CP932 << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1393 #define SCORE_NO_EXIST (SCORE_DEPEND << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1395 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME
\e$B$K$h$k;XDj
\e(B */
1396 #define SCORE_ERROR (SCORE_iMIME << 1) /*
\e$B%(%i!<
\e(B */
1398 #define SCORE_INIT (SCORE_iMIME)
1400 int score_table_A0[] = {
1403 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1404 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1407 int score_table_F0[] = {
1408 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1409 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1410 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1411 SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1414 void set_code_score(ptr, score)
1415 struct input_code *ptr;
1419 ptr->score |= score;
1423 void clr_code_score(ptr, score)
1424 struct input_code *ptr;
1428 ptr->score &= ~score;
1432 void code_score(ptr)
1433 struct input_code *ptr;
1435 int c2 = ptr->buf[0];
1436 int c1 = ptr->buf[1];
1438 set_code_score(ptr, SCORE_ERROR);
1439 }else if (c2 == SSO){
1440 set_code_score(ptr, SCORE_KANA);
1441 #ifdef UTF8_OUTPUT_ENABLE
1442 }else if (!e2w_conv(c2, c1)){
1443 set_code_score(ptr, SCORE_NO_EXIST);
1445 }else if ((c2 & 0x70) == 0x20){
1446 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1447 }else if ((c2 & 0x70) == 0x70){
1448 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1449 }else if ((c2 & 0x70) >= 0x50){
1450 set_code_score(ptr, SCORE_L2);
1454 void status_disable(ptr)
1455 struct input_code *ptr;
1460 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1463 void status_push_ch(ptr, c)
1464 struct input_code *ptr;
1467 ptr->buf[ptr->index++] = c;
1470 void status_clear(ptr)
1471 struct input_code *ptr;
1477 void status_reset(ptr)
1478 struct input_code *ptr;
1481 ptr->score = SCORE_INIT;
1484 void status_reinit(ptr)
1485 struct input_code *ptr;
1488 ptr->_file_stat = 0;
1491 void status_check(ptr, c)
1492 struct input_code *ptr;
1495 if (c <= DEL && estab_f){
1500 void s_status(ptr, c)
1501 struct input_code *ptr;
1506 status_check(ptr, c);
1511 #ifdef NUMCHAR_OPTION
1512 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1515 }else if (0xa1 <= c && c <= 0xdf){
1516 status_push_ch(ptr, SSO);
1517 status_push_ch(ptr, c);
1520 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
1522 status_push_ch(ptr, c);
1523 #ifdef SHIFTJIS_CP932
1525 && CP932_TABLE_BEGIN <= c && c <= CP932_TABLE_END){
1527 status_push_ch(ptr, c);
1528 #endif /* SHIFTJIS_CP932 */
1530 }else if (x0212_f && 0xf0 <= c && c <= 0xfc){
1532 status_push_ch(ptr, c);
1533 #endif /* X0212_ENABLE */
1535 status_disable(ptr);
1539 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1540 status_push_ch(ptr, c);
1541 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
1545 status_disable(ptr);
1549 #ifdef SHIFTJIS_CP932
1550 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1551 status_push_ch(ptr, c);
1552 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
1553 set_code_score(ptr, SCORE_CP932);
1558 #endif /* SHIFTJIS_CP932 */
1559 #ifndef X0212_ENABLE
1560 status_disable(ptr);
1566 void e_status(ptr, c)
1567 struct input_code *ptr;
1572 status_check(ptr, c);
1577 #ifdef NUMCHAR_OPTION
1578 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1581 }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
1583 status_push_ch(ptr, c);
1585 }else if (0x8f == c){
1587 status_push_ch(ptr, c);
1588 #endif /* X0212_ENABLE */
1590 status_disable(ptr);
1594 if (0xa1 <= c && c <= 0xfe){
1595 status_push_ch(ptr, c);
1599 status_disable(ptr);
1604 if (0xa1 <= c && c <= 0xfe){
1606 status_push_ch(ptr, c);
1608 status_disable(ptr);
1610 #endif /* X0212_ENABLE */
1614 #ifdef UTF8_INPUT_ENABLE
1615 void w16_status(ptr, c)
1616 struct input_code *ptr;
1623 if (ptr->_file_stat == 0){
1624 if (c == 0xfe || c == 0xff){
1626 status_push_ch(ptr, c);
1627 ptr->_file_stat = 1;
1629 status_disable(ptr);
1630 ptr->_file_stat = -1;
1632 }else if (ptr->_file_stat > 0){
1634 status_push_ch(ptr, c);
1635 }else if (ptr->_file_stat < 0){
1636 status_disable(ptr);
1642 status_disable(ptr);
1643 ptr->_file_stat = -1;
1645 status_push_ch(ptr, c);
1652 if (ptr->stat != c && (c == 0xfe || c == 0xff)){
1653 status_push_ch(ptr, c);
1656 status_disable(ptr);
1657 ptr->_file_stat = -1;
1663 void w_status(ptr, c)
1664 struct input_code *ptr;
1669 status_check(ptr, c);
1674 #ifdef NUMCHAR_OPTION
1675 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1678 }else if (0xc0 <= c && c <= 0xdf){
1680 status_push_ch(ptr, c);
1681 }else if (0xe0 <= c && c <= 0xef){
1683 status_push_ch(ptr, c);
1685 status_disable(ptr);
1690 if (0x80 <= c && c <= 0xbf){
1691 status_push_ch(ptr, c);
1692 if (ptr->index > ptr->stat){
1693 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
1694 && ptr->buf[2] == 0xbf);
1695 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
1696 &ptr->buf[0], &ptr->buf[1]);
1703 status_disable(ptr);
1714 int action_flag = 1;
1715 struct input_code *result = 0;
1716 struct input_code *p = input_code_list;
1718 (p->status_func)(p, c);
1721 }else if(p->stat == 0){
1732 if (result && !estab_f){
1733 set_iconv(TRUE, result->iconv_func);
1734 }else if (c <= DEL){
1735 struct input_code *ptr = input_code_list;
1750 return std_gc_buf[--std_gc_ndx];
1761 if (std_gc_ndx == STD_GC_BUFSIZE){
1764 std_gc_buf[std_gc_ndx++] = c;
1784 while ((c = (*i_getc)(f)) != EOF)
1793 oconv = output_conv;
1796 /* replace continucation module, from output side */
1798 /* output redicrection */
1800 if (noout_f || guess_f){
1807 if (mimeout_f == TRUE) {
1808 o_base64conv = oconv; oconv = base64_conv;
1810 /* base64_count = 0; */
1814 o_crconv = oconv; oconv = cr_conv;
1817 o_rot_conv = oconv; oconv = rot_conv;
1820 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
1823 o_hira_conv = oconv; oconv = hira_conv;
1826 o_fconv = oconv; oconv = fold_conv;
1829 if (alpha_f || x0201_f) {
1830 o_zconv = oconv; oconv = z_conv;
1834 i_ungetc = std_ungetc;
1835 /* input redicrection */
1838 i_cgetc = i_getc; i_getc = cap_getc;
1839 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
1842 i_ugetc = i_getc; i_getc = url_getc;
1843 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
1846 #ifdef NUMCHAR_OPTION
1848 i_ngetc = i_getc; i_getc = numchar_getc;
1849 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
1852 if (mime_f && mimebuf_f==FIXED_MIME) {
1853 i_mgetc = i_getc; i_getc = mime_getc;
1854 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
1857 i_bgetc = i_getc; i_getc = broken_getc;
1858 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
1860 if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
1861 set_iconv(-TRUE, e_iconv);
1862 } else if (input_f == SJIS_INPUT) {
1863 set_iconv(-TRUE, s_iconv);
1864 #ifdef UTF8_INPUT_ENABLE
1865 } else if (input_f == UTF8_INPUT) {
1866 set_iconv(-TRUE, w_iconv);
1867 } else if (input_f == UTF16BE_INPUT) {
1868 set_iconv(-TRUE, w_iconv16);
1869 } else if (input_f == UTF16LE_INPUT) {
1870 set_iconv(-TRUE, w_iconv16);
1873 set_iconv(FALSE, e_iconv);
1877 struct input_code *p = input_code_list;
1885 Conversion main loop. Code detection only.
1894 int is_8bit = FALSE;
1896 module_connection();
1901 output_mode = ASCII;
1904 #define NEXT continue /* no output, get next */
1905 #define SEND ; /* output c1 and c2, get next */
1906 #define LAST break /* end of loop, go closing */
1908 while ((c1 = (*i_getc)(f)) != EOF) {
1913 /* in case of 8th bit is on */
1914 if (!estab_f&&!mime_decode_mode) {
1915 /* in case of not established yet */
1916 /* It is still ambiguious */
1917 if (h_conv(f, c2, c1)==EOF)
1923 /* in case of already established */
1925 /* ignore bogus code */
1931 /* second byte, 7 bit code */
1932 /* it might be kanji shitfted */
1933 if ((c1 == DEL) || (c1 <= SPACE)) {
1934 /* ignore bogus first code */
1942 #ifdef UTF8_INPUT_ENABLE
1951 #ifdef NUMCHAR_OPTION
1952 } else if ((c1 & CLASS_MASK) == CLASS_UTF16){
1955 } else if (c1 > DEL) {
1957 if (!estab_f && !iso8859_f) {
1958 /* not established yet */
1959 if (!is_8bit) is_8bit = TRUE;
1962 } else { /* estab_f==TRUE */
1967 } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
1968 /* SJIS X0201 Case... */
1969 if(iso2022jp_f && x0201_f==NO_X0201) {
1970 (*oconv)(GETA1, GETA2);
1977 } else if (c1==SSO && iconv != s_iconv) {
1978 /* EUC X0201 Case */
1979 c1 = (*i_getc)(f); /* skip SSO */
1981 if (SSP<=c1 && c1<0xe0) {
1982 if(iso2022jp_f && x0201_f==NO_X0201) {
1983 (*oconv)(GETA1, GETA2);
1990 } else { /* bogus code, skip SSO and one byte */
1994 /* already established */
1999 } else if ((c1 > SPACE) && (c1 != DEL)) {
2000 /* in case of Roman characters */
2002 /* output 1 shifted byte */
2006 } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
2007 /* output 1 shifted byte */
2008 if(iso2022jp_f && x0201_f==NO_X0201) {
2009 (*oconv)(GETA1, GETA2);
2016 /* look like bogus code */
2019 } else if (input_mode == X0208) {
2020 /* in case of Kanji shifted */
2023 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
2024 /* Check MIME code */
2025 if ((c1 = (*i_getc)(f)) == EOF) {
2028 } else if (c1 == '?') {
2029 /* =? is mime conversion start sequence */
2030 if(mime_f == STRICT_MIME) {
2031 /* check in real detail */
2032 if (mime_begin_strict(f) == EOF)
2036 } else if (mime_begin(f) == EOF)
2046 /* normal ASCII code */
2049 } else if (c1 == SI) {
2052 } else if (c1 == SO) {
2055 } else if (c1 == ESC ) {
2056 if ((c1 = (*i_getc)(f)) == EOF) {
2057 /* (*oconv)(0, ESC); don't send bogus code */
2059 } else if (c1 == '$') {
2060 if ((c1 = (*i_getc)(f)) == EOF) {
2062 (*oconv)(0, ESC); don't send bogus code
2063 (*oconv)(0, '$'); */
2065 } else if (c1 == '@'|| c1 == 'B') {
2066 /* This is kanji introduction */
2069 set_input_codename("ISO-2022-JP");
2070 debug(input_codename);
2072 } else if (c1 == '(') {
2073 if ((c1 = (*i_getc)(f)) == EOF) {
2074 /* don't send bogus code
2080 } else if (c1 == '@'|| c1 == 'B') {
2081 /* This is kanji introduction */
2086 } else if (c1 == 'D'){
2090 #endif /* X0212_ENABLE */
2092 /* could be some special code */
2099 } else if (broken_f&0x2) {
2100 /* accept any ESC-(-x as broken code ... */
2110 } else if (c1 == '(') {
2111 if ((c1 = (*i_getc)(f)) == EOF) {
2112 /* don't send bogus code
2114 (*oconv)(0, '('); */
2118 /* This is X0201 kana introduction */
2119 input_mode = X0201; shift_mode = X0201;
2121 } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2122 /* This is X0208 kanji introduction */
2123 input_mode = ASCII; shift_mode = FALSE;
2125 } else if (broken_f&0x2) {
2126 input_mode = ASCII; shift_mode = FALSE;
2131 /* maintain various input_mode here */
2135 } else if ( c1 == 'N' || c1 == 'n' ){
2137 c3 = (*i_getc)(f); /* skip SS2 */
2138 if ( (SPACE<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2153 } else if ((c1 == NL || c1 == CR) && broken_f&4) {
2154 input_mode = ASCII; set_iconv(FALSE, 0);
2156 } else if (c1 == NL && mime_decode_f && !mime_decode_mode ) {
2157 if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2165 } else if (c1 == CR && mime_decode_f && !mime_decode_mode ) {
2166 if ((c1=(*i_getc)(f))!=EOF) {
2170 } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2186 if (input_mode == X0208)
2187 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2189 else if (input_mode == X0212)
2190 (*oconv)((0x8f << 8) | c2, c1);
2191 #endif /* X0212_ENABLE */
2192 else if (input_mode)
2193 (*oconv)(input_mode, c1); /* other special case */
2194 else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */
2195 int c0 = (*i_getc)(f);
2198 (*iconv)(c2, c1, c0);
2204 /* goto next_word */
2208 (*iconv)(EOF, 0, 0);
2209 if (!is_inputcode_set)
2212 struct input_code *p = input_code_list;
2213 struct input_code *result = p;
2215 if (p->score < result->score) result = p;
2218 set_input_codename(result->name);
2233 /** it must NOT be in the kanji shifte sequence */
2234 /** it must NOT be written in JIS7 */
2235 /** and it must be after 2 byte 8bit code */
2242 while ((c1 = (*i_getc)(f)) != EOF) {
2248 if (push_hold_buf(c1) == EOF || estab_f){
2254 struct input_code *p = input_code_list;
2255 struct input_code *result = p;
2260 if (p->score < result->score){
2265 set_iconv(FALSE, result->iconv_func);
2270 ** 1) EOF is detected, or
2271 ** 2) Code is established, or
2272 ** 3) Buffer is FULL (but last word is pushed)
2274 ** in 1) and 3) cases, we continue to use
2275 ** Kanji codes by oconv and leave estab_f unchanged.
2280 while (wc < hold_count){
2281 c2 = hold_buf[wc++];
2283 #ifdef NUMCHAR_OPTION
2284 || (c2 & CLASS_MASK) == CLASS_UTF16
2289 }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
2290 (*iconv)(X0201, c2, 0);
2293 if (wc < hold_count){
2294 c1 = hold_buf[wc++];
2303 if ((*iconv)(c2, c1, 0) < 0){
2305 if (wc < hold_count){
2306 c0 = hold_buf[wc++];
2315 (*iconv)(c2, c1, c0);
2328 if (hold_count >= HOLD_SIZE*2)
2330 hold_buf[hold_count++] = c2;
2331 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
2334 int s2e_conv(c2, c1, p2, p1)
2339 #ifdef SHIFTJIS_CP932
2340 if (cp932_f && CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){
2341 extern unsigned short shiftjis_cp932[3][189];
2342 val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
2348 #endif /* SHIFTJIS_CP932 */
2350 if (x0212_f && 0xfa <= c2 && c2 <= 0xfc){
2351 extern unsigned short shiftjis_x0212[3][189];
2352 val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
2355 c2 = (0x8f << 8) | (val >> 8);
2367 c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
2369 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
2377 c2 = x0212_unshift(c2);
2392 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2395 int ret = s2e_conv(c2, c1, &c2, &c1);
2396 if (ret) return ret;
2410 }else if (c2 == 0x8f){
2414 c2 = (c2 << 8) | (c1 & 0x7f);
2416 #ifdef SHIFTJIS_CP932
2419 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2420 s2e_conv(s2, s1, &c2, &c1);
2421 if ((c2 & 0xff00) == 0){
2427 #endif /* SHIFTJIS_CP932 */
2428 #endif /* X0212_ENABLE */
2429 } else if (c2 == SSO){
2432 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2442 #ifdef UTF8_INPUT_ENABLE
2444 w2e_conv(c2, c1, c0, p2, p1)
2448 extern unsigned short * utf8_to_euc_2bytes[];
2449 extern unsigned short ** utf8_to_euc_3bytes[];
2452 if (0xc0 <= c2 && c2 <= 0xef) {
2453 unsigned short **pp;
2456 if (c0 == 0) return -1;
2457 pp = utf8_to_euc_3bytes[c2 - 0x80];
2458 ret = w_iconv_common(c1, c0, pp, sizeof_utf8_to_euc_C2, p2, p1);
2460 ret = w_iconv_common(c2, c1, utf8_to_euc_2bytes, sizeof_utf8_to_euc_2bytes, p2, p1);
2462 #ifdef NUMCHAR_OPTION
2465 if (p1) *p1 = CLASS_UTF16 | ww16_conv(c2, c1, c0);
2470 } else if (c2 == X0201) {
2483 int ret = w2e_conv(c2, c1, c0, &c2, &c1);
2491 w16w_conv(val, p2, p1, p0)
2499 }else if (val < 0x800){
2500 *p2 = 0xc0 | (val >> 6);
2501 *p1 = 0x80 | (val & 0x3f);
2504 *p2 = 0xe0 | (val >> 12);
2505 *p1 = 0x80 | ((val >> 6) & 0x3f);
2506 *p0 = 0x80 | (val & 0x3f);
2511 ww16_conv(c2, c1, c0)
2516 val = (c2 & 0x0f) << 12;
2517 val |= (c1 & 0x3f) << 6;
2519 }else if (c2 >= 0xc0){
2520 val = (c2 & 0x1f) << 6;
2529 w16e_conv(val, p2, p1)
2533 extern unsigned short * utf8_to_euc_2bytes[];
2534 extern unsigned short ** utf8_to_euc_3bytes[];
2536 unsigned short **pp;
2540 w16w_conv(val, &c2, &c1, &c0);
2543 pp = utf8_to_euc_3bytes[c2 - 0x80];
2544 psize = sizeof_utf8_to_euc_C2;
2545 ret = w_iconv_common(c1, c0, pp, psize, p2, p1);
2547 pp = utf8_to_euc_2bytes;
2548 psize = sizeof_utf8_to_euc_2bytes;
2549 ret = w_iconv_common(c2, c1, pp, psize, p2, p1);
2551 #ifdef NUMCHAR_OPTION
2554 *p1 = CLASS_UTF16 | val;
2566 w_iconv16(c2, c1, c0)
2571 if (c2==0376 && c1==0377){
2572 utf16_mode = UTF16BE_INPUT;
2574 } else if (c2==0377 && c1==0376){
2575 utf16_mode = UTF16LE_INPUT;
2578 if (c2 != EOF && utf16_mode == UTF16LE_INPUT) {
2580 tmp=c1; c1=c2; c2=tmp;
2582 if ((c2==0 && c1 < 0x80) || c2==EOF) {
2586 ret = w16e_conv(((c2<<8)&0xff00) + c1, &c2, &c1);
2587 if (ret) return ret;
2593 w_iconv_common(c1, c0, pp, psize, p2, p1)
2595 unsigned short **pp;
2603 if (pp == 0) return 1;
2606 if (c1 < 0 || psize <= c1) return 1;
2608 if (p == 0) return 1;
2611 if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1;
2613 if (val == 0) return 1;
2620 if (c2 == SO) c2 = X0201;
2629 #ifdef UTF8_OUTPUT_ENABLE
2634 extern unsigned short euc_to_utf8_1byte[];
2635 extern unsigned short * euc_to_utf8_2bytes[];
2636 extern unsigned short * euc_to_utf8_2bytes_ms[];
2640 p = euc_to_utf8_1byte;
2642 } else if (c2 >> 8 == 0x8f){
2643 extern unsigned short * x0212_to_utf8_2bytes[];
2644 c2 = (c2&0x7f) - 0x21;
2645 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2646 p = x0212_to_utf8_2bytes[c2];
2652 c2 = (c2&0x7f) - 0x21;
2653 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2654 p = ms_ucs_map_f ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
2659 c1 = (c1 & 0x7f) - 0x21;
2660 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
2676 if (unicode_bom_f==2) {
2683 #ifdef NUMCHAR_OPTION
2684 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2685 w16w_conv(c1, &c2, &c1, &c0);
2689 if (c0) (*o_putc)(c0);
2696 output_mode = ASCII;
2698 } else if (c2 == ISO8859_1) {
2699 output_mode = ISO8859_1;
2700 (*o_putc)(c1 | 0x080);
2704 val = e2w_conv(c2, c1);
2706 w16w_conv(val, &c2, &c1, &c0);
2710 if (c0) (*o_putc)(c0);
2726 if (unicode_bom_f==2) {
2728 (*o_putc)((unsigned char)'\377');
2732 (*o_putc)((unsigned char)'\377');
2737 if (c2 == ISO8859_1) {
2740 #ifdef NUMCHAR_OPTION
2741 } else if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16) {
2742 c2 = (c1 >> 8) & 0xff;
2746 unsigned short val = e2w_conv(c2, c1);
2747 c2 = (val >> 8) & 0xff;
2766 #ifdef NUMCHAR_OPTION
2767 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2768 w16e_conv(c1, &c2, &c1);
2769 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2777 } else if (c2 == 0) {
2778 output_mode = ASCII;
2780 } else if (c2 == X0201) {
2781 output_mode = JAPANESE_EUC;
2782 (*o_putc)(SSO); (*o_putc)(c1|0x80);
2783 } else if (c2 == ISO8859_1) {
2784 output_mode = ISO8859_1;
2785 (*o_putc)(c1 | 0x080);
2787 } else if ((c2 & 0xff00) >> 8 == 0x8f){
2788 output_mode = JAPANESE_EUC;
2789 #ifdef SHIFTJIS_CP932
2792 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2793 s2e_conv(s2, s1, &c2, &c1);
2797 if ((c2 & 0xff00) >> 8 == 0x8f){
2800 (*o_putc)((c2 & 0x7f) | 0x080);
2801 (*o_putc)(c1 | 0x080);
2804 (*o_putc)((c2 & 0x7f) | 0x080);
2805 (*o_putc)(c1 | 0x080);
2809 if ((c1<0x21 || 0x7e<c1) ||
2810 (c2<0x21 || 0x7e<c2)) {
2811 set_iconv(FALSE, 0);
2812 return; /* too late to rescue this char */
2814 output_mode = JAPANESE_EUC;
2815 (*o_putc)(c2 | 0x080);
2816 (*o_putc)(c1 | 0x080);
2826 if ((ret & 0xff00) == 0x8f00){
2827 if (0x75 <= c && c <= 0x7f){
2828 ret = c + (0x109 - 0x75);
2831 if (0x75 <= c && c <= 0x7f){
2832 ret = c + (0x113 - 0x75);
2839 int x0212_unshift(c)
2843 if (0x7f <= c && c <= 0x88){
2844 ret = c + (0x75 - 0x7f);
2845 }else if (0x89 <= c && c <= 0x92){
2846 ret = (0x8f << 8) | 0x80 | (c + (0x75 - 0x89));
2850 #endif /* X0212_ENABLE */
2853 e2s_conv(c2, c1, p2, p1)
2854 int c2, c1, *p2, *p1;
2857 unsigned short *ptr;
2859 extern unsigned short *x0212_shiftjis[];
2861 if ((c2 & 0xff00) == 0x8f00){
2863 if (0x21 <= ndx && ndx <= 0x7e){
2864 ptr = x0212_shiftjis[ndx - 0x21];
2866 val = ptr[(c1 & 0x7f) - 0x21];
2876 c2 = x0212_shift(c2);
2878 #endif /* X0212_ENABLE */
2879 if ((c2 & 0xff00) == 0x8f00){
2882 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
2883 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
2892 #ifdef NUMCHAR_OPTION
2893 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2894 w16e_conv(c1, &c2, &c1);
2900 } else if (c2 == 0) {
2901 output_mode = ASCII;
2903 } else if (c2 == X0201) {
2904 output_mode = SHIFT_JIS;
2906 } else if (c2 == ISO8859_1) {
2907 output_mode = ISO8859_1;
2908 (*o_putc)(c1 | 0x080);
2910 } else if ((c2 & 0xff00) >> 8 == 0x8f){
2911 output_mode = SHIFT_JIS;
2912 if (e2s_conv(c2, c1, &c2, &c1) == 0){
2918 if ((c1<0x20 || 0x7e<c1) ||
2919 (c2<0x20 || 0x7e<c2)) {
2920 set_iconv(FALSE, 0);
2921 return; /* too late to rescue this char */
2923 output_mode = SHIFT_JIS;
2924 e2s_conv(c2, c1, &c2, &c1);
2926 #ifdef SHIFTJIS_CP932
2928 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2929 extern unsigned short cp932inv[2][189];
2930 int c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
2936 #endif /* SHIFTJIS_CP932 */
2939 if (prefix_table[(unsigned char)c1]){
2940 (*o_putc)(prefix_table[(unsigned char)c1]);
2951 #ifdef NUMCHAR_OPTION
2952 if ((c1 & CLASS_MASK) == CLASS_UTF16){
2953 w16e_conv(c1, &c2, &c1);
2957 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2960 (*o_putc)(ascii_intro);
2961 output_mode = ASCII;
2965 } else if ((c2 & 0xff00) >> 8 == 0x8f){
2966 if (output_mode!=X0212) {
2967 output_mode = X0212;
2973 (*o_putc)(c2 & 0x7f);
2976 } else if (c2==X0201) {
2977 if (output_mode!=X0201) {
2978 output_mode = X0201;
2984 } else if (c2==ISO8859_1) {
2985 /* iso8859 introduction, or 8th bit on */
2986 /* Can we convert in 7bit form using ESC-'-'-A ?
2988 output_mode = ISO8859_1;
2990 } else if (c2 == 0) {
2991 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2994 (*o_putc)(ascii_intro);
2995 output_mode = ASCII;
2999 if (output_mode != X0208) {
3000 output_mode = X0208;
3003 (*o_putc)(kanji_intro);
3005 if (c1<0x20 || 0x7e<c1)
3007 if (c2<0x20 || 0x7e<c2)
3019 mime_prechar(c2, c1);
3020 (*o_base64conv)(c2,c1);
3024 static int broken_buf[3];
3025 static int broken_counter = 0;
3026 static int broken_last = 0;
3033 if (broken_counter>0) {
3034 return broken_buf[--broken_counter];
3037 if (c=='$' && broken_last != ESC
3038 && (input_mode==ASCII || input_mode==X0201)) {
3041 if (c1=='@'|| c1=='B') {
3042 broken_buf[0]=c1; broken_buf[1]=c;
3049 } else if (c=='(' && broken_last != ESC
3050 && (input_mode==X0208 || input_mode==X0201)) { /* ) */
3053 if (c1=='J'|| c1=='B') {
3054 broken_buf[0]=c1; broken_buf[1]=c;
3072 if (broken_counter<2)
3073 broken_buf[broken_counter++]=c;
3077 static int prev_cr = 0;
3085 if (! (c2==0&&c1==NL) ) {
3091 } else if (c1=='\r') {
3093 } else if (c1=='\n') {
3094 if (crmode_f==CRLF) {
3095 (*o_crconv)(0,'\r');
3096 } else if (crmode_f==CR) {
3097 (*o_crconv)(0,'\r');
3101 } else if (c1!='\032' || crmode_f!=NL){
3107 Return value of fold_conv()
3109 \n add newline and output char
3110 \r add newline and output nothing
3113 1 (or else) normal output
3115 fold state in prev (previous character)
3117 >0x80 Japanese (X0208/X0201)
3122 This fold algorthm does not preserve heading space in a line.
3123 This is the main difference from fmt.
3126 #define char_size(c2,c1) (c2?2:1)
3135 if (c1== '\r' && !fold_preserve_f) {
3136 fold_state=0; /* ignore cr */
3137 }else if (c1== '\n'&&f_prev=='\r' && fold_preserve_f) {
3139 fold_state=0; /* ignore cr */
3140 } else if (c1== BS) {
3141 if (f_line>0) f_line--;
3143 } else if (c2==EOF && f_line != 0) { /* close open last line */
3145 } else if ((c1=='\n' && !fold_preserve_f)
3146 || ((c1=='\r'||(c1=='\n'&&f_prev!='\r'))
3147 && fold_preserve_f)) {
3149 if (fold_preserve_f) {
3153 } else if ((f_prev == c1 && !fold_preserve_f)
3154 || (f_prev == '\n' && fold_preserve_f)
3155 ) { /* duplicate newline */
3158 fold_state = '\n'; /* output two newline */
3164 if (f_prev&0x80) { /* Japanese? */
3166 fold_state = 0; /* ignore given single newline */
3167 } else if (f_prev==' ') {
3171 if (++f_line<=fold_len)
3175 fold_state = '\r'; /* fold and output nothing */
3179 } else if (c1=='\f') {
3184 fold_state = '\n'; /* output newline and clear */
3185 } else if ( (c2==0 && c1==' ')||
3186 (c2==0 && c1=='\t')||
3187 (c2=='!'&& c1=='!')) {
3188 /* X0208 kankaku or ascii space */
3189 if (f_prev == ' ') {
3190 fold_state = 0; /* remove duplicate spaces */
3193 if (++f_line<=fold_len)
3194 fold_state = ' '; /* output ASCII space only */
3196 f_prev = ' '; f_line = 0;
3197 fold_state = '\r'; /* fold and output nothing */
3201 prev0 = f_prev; /* we still need this one... , but almost done */
3203 if (c2 || c2==X0201)
3204 f_prev |= 0x80; /* this is Japanese */
3205 f_line += char_size(c2,c1);
3206 if (f_line<=fold_len) { /* normal case */
3209 if (f_line>=fold_len+fold_margin) { /* too many kinsou suspension */
3210 f_line = char_size(c2,c1);
3211 fold_state = '\n'; /* We can't wait, do fold now */
3212 } else if (c2==X0201) {
3213 /* simple kinsoku rules return 1 means no folding */
3214 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
3215 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
3216 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
3217 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
3218 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
3219 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
3220 else if (SPACE<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
3222 fold_state = '\n';/* add one new f_line before this character */
3225 fold_state = '\n';/* add one new f_line before this character */
3228 /* kinsoku point in ASCII */
3229 if ( c1==')'|| /* { [ ( */
3240 /* just after special */
3241 } else if (!is_alnum(prev0)) {
3242 f_line = char_size(c2,c1);
3244 } else if ((prev0==' ') || /* ignored new f_line */
3245 (prev0=='\n')|| /* ignored new f_line */
3246 (prev0&0x80)) { /* X0208 - ASCII */
3247 f_line = char_size(c2,c1);
3248 fold_state = '\n';/* add one new f_line before this character */
3250 fold_state = 1; /* default no fold in ASCII */
3254 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
3255 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
3256 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
3257 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
3258 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
3259 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
3260 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
3261 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
3262 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
3263 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
3264 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
3265 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
3266 /* default no fold in kinsoku */
3269 f_line = char_size(c2,c1);
3270 /* add one new f_line before this character */
3273 f_line = char_size(c2,c1);
3275 /* add one new f_line before this character */
3280 /* terminator process */
3281 switch(fold_state) {
3300 int z_prev2=0,z_prev1=0;
3307 /* if (c2) c1 &= 0x7f; assertion */
3309 if (x0201_f && z_prev2==X0201) { /* X0201 */
3310 if (c1==(0xde&0x7f)) { /*
\e$BByE@
\e(B */
3312 (*o_zconv)(dv[(z_prev1-SPACE)*2],dv[(z_prev1-SPACE)*2+1]);
3314 } else if (c1==(0xdf&0x7f)&&ev[(z_prev1-SPACE)*2]) { /*
\e$BH>ByE@
\e(B */
3316 (*o_zconv)(ev[(z_prev1-SPACE)*2],ev[(z_prev1-SPACE)*2+1]);
3320 (*o_zconv)(cv[(z_prev1-SPACE)*2],cv[(z_prev1-SPACE)*2+1]);
3329 if (x0201_f && c2==X0201) {
3330 if (dv[(c1-SPACE)*2]||ev[(c1-SPACE)*2]) {
3331 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
3332 z_prev1 = c1; z_prev2 = c2;
3335 (*o_zconv)(cv[(c1-SPACE)*2],cv[(c1-SPACE)*2+1]);
3340 /* JISX0208 Alphabet */
3341 if (alpha_f && c2 == 0x23 ) {
3343 } else if (alpha_f && c2 == 0x21 ) {
3344 /* JISX0208 Kigou */
3349 } else if (alpha_f&0x4) {
3354 } else if (0x20<c1 && c1<0x7f && fv[c1-0x20]) {
3360 case '>': entity = ">"; break;
3361 case '<': entity = "<"; break;
3362 case '\"': entity = """; break;
3363 case '&': entity = "&"; break;
3366 while (*entity) (*o_zconv)(0, *entity++);
3376 #define rot13(c) ( \
3378 (c <= 'M') ? (c + 13): \
3379 (c <= 'Z') ? (c - 13): \
3381 (c <= 'm') ? (c + 13): \
3382 (c <= 'z') ? (c - 13): \
3386 #define rot47(c) ( \
3388 ( c <= 'O' ) ? (c + 47) : \
3389 ( c <= '~' ) ? (c - 47) : \
3397 if (c2==0 || c2==X0201 || c2==ISO8859_1) {
3403 (*o_rot_conv)(c2,c1);
3410 if ((hira_f & 1) && c2==0x25 && 0x20<c1 && c1<0x74) {
3412 } else if ((hira_f & 2) && c2==0x24 && 0x20<c1 && c1<0x74) {
3415 (*o_hira_conv)(c2,c1);
3420 iso2022jp_check_conv(c2,c1)
3423 static int range[RANGE_NUM_MAX][2] = {
3446 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
3450 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
3455 for (i = 0; i < RANGE_NUM_MAX; i++) {
3456 start = range[i][0];
3459 if (c >= start && c <= end) {
3464 (*o_iso2022jp_check_conv)(c2,c1);
3468 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
3470 unsigned char *mime_pattern[] = {
3471 (unsigned char *)"\075?EUC-JP?B?",
3472 (unsigned char *)"\075?SHIFT_JIS?B?",
3473 (unsigned char *)"\075?ISO-8859-1?Q?",
3474 (unsigned char *)"\075?ISO-8859-1?B?",
3475 (unsigned char *)"\075?ISO-2022-JP?B?",
3476 (unsigned char *)"\075?ISO-2022-JP?Q?",
3477 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3478 (unsigned char *)"\075?UTF-8?B?",
3479 (unsigned char *)"\075?UTF-8?Q?",
3481 (unsigned char *)"\075?US-ASCII?Q?",
3486 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
3487 int (*mime_priority_func[])PROTO((int c2, int c1, int c0)) = {
3488 e_iconv, s_iconv, 0, 0, 0, 0,
3489 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3495 int mime_encode[] = {
3496 JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201,
3497 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3504 int mime_encode_method[] = {
3505 'B', 'B','Q', 'B', 'B', 'Q',
3506 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3514 #define MAXRECOVER 20
3516 /* I don't trust portablity of toupper */
3517 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
3518 #define nkf_isdigit(c) ('0'<=c && c<='9')
3519 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
3520 #define nkf_isblank(c) (c == SPACE || c == TAB)
3521 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == NL)
3522 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
3523 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
3528 if (i_getc!=mime_getc) {
3529 i_mgetc = i_getc; i_getc = mime_getc;
3530 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
3531 if(mime_f==STRICT_MIME) {
3532 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
3533 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
3539 unswitch_mime_getc()
3541 if(mime_f==STRICT_MIME) {
3542 i_mgetc = i_mgetc_buf;
3543 i_mungetc = i_mungetc_buf;
3546 i_ungetc = i_mungetc;
3550 mime_begin_strict(f)
3555 unsigned char *p,*q;
3556 int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
3558 mime_decode_mode = FALSE;
3559 /* =? has been checked */
3561 p = mime_pattern[j];
3564 for(i=2;p[i]>' ';i++) { /* start at =? */
3565 if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) {
3566 /* pattern fails, try next one */
3568 while ((p = mime_pattern[++j])) {
3569 for(k=2;k<i;k++) /* assume length(p) > i */
3570 if (p[k]!=q[k]) break;
3571 if (k==i && nkf_toupper(c1)==p[k]) break;
3573 if (p) continue; /* found next one, continue */
3574 /* all fails, output from recovery buffer */
3582 mime_decode_mode = p[i-2];
3584 set_iconv(FALSE, mime_priority_func[j]);
3585 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
3587 if (mime_decode_mode=='B') {
3588 mimebuf_f = unbuf_f;
3590 /* do MIME integrity check */
3591 return mime_integrity(f,mime_pattern[j]);
3603 /* we don't keep eof of Fifo, becase it contains ?= as
3604 a terminator. It was checked in mime_integrity. */
3605 return ((mimebuf_f)?
3606 (*i_mgetc_buf)(f):Fifo(mime_input++));
3610 mime_ungetc_buf(c,f)
3615 (*i_mungetc_buf)(c,f);
3617 Fifo(--mime_input)=c;
3628 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
3629 /* re-read and convert again from mime_buffer. */
3631 /* =? has been checked */
3633 Fifo(mime_last++)='='; Fifo(mime_last++)='?';
3634 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
3635 /* We accept any character type even if it is breaked by new lines */
3636 c1 = (*i_getc)(f); Fifo(mime_last++)= c1 ;
3637 if (c1=='\n'||c1==' '||c1=='\r'||
3638 c1=='-'||c1=='_'||is_alnum(c1) ) continue;
3640 /* Failed. But this could be another MIME preemble */
3648 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3649 if (!(++i<MAXRECOVER) || c1==EOF) break;
3650 if (c1=='b'||c1=='B') {
3651 mime_decode_mode = 'B';
3652 } else if (c1=='q'||c1=='Q') {
3653 mime_decode_mode = 'Q';
3657 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3658 if (!(++i<MAXRECOVER) || c1==EOF) break;
3660 mime_decode_mode = FALSE;
3666 if (!mime_decode_mode) {
3667 /* false MIME premble, restart from mime_buffer */
3668 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
3669 /* Since we are in MIME mode until buffer becomes empty, */
3670 /* we never go into mime_begin again for a while. */
3673 /* discard mime preemble, and goto MIME mode */
3675 /* do no MIME integrity check */
3676 return c1; /* used only for checking EOF */
3691 fprintf(stderr, "%s\n", str);
3697 set_input_codename (codename)
3702 strcmp(codename, "") != 0 &&
3703 strcmp(codename, input_codename) != 0)
3705 is_inputcode_mixed = TRUE;
3707 input_codename = codename;
3708 is_inputcode_set = TRUE;
3713 print_guessed_code (filename)
3716 char *codename = "BINARY";
3717 if (!is_inputcode_mixed) {
3718 if (strcmp(input_codename, "") == 0) {
3721 codename = input_codename;
3724 if (filename != NULL) printf("%s:", filename);
3725 printf("%s\n", codename);
3733 if (nkf_isdigit(x)) return x - '0';
3734 return nkf_toupper(x) - 'A' + 10;
3739 #ifdef ANSI_C_PROTOTYPE
3740 int hex_getc(int ch, FILE *f, int (*g)(FILE *f), int (*u)(int c, FILE *f))
3743 hex_getc(ch, f, g, u)
3756 if (!nkf_isxdigit(c2)){
3761 if (!nkf_isxdigit(c3)){
3766 return (hex2bin(c2) << 4) | hex2bin(c3);
3773 return hex_getc(':', f, i_cgetc, i_cungetc);
3781 return (*i_cungetc)(c, f);
3788 return hex_getc('%', f, i_ugetc, i_uungetc);
3796 return (*i_uungetc)(c, f);
3800 #ifdef NUMCHAR_OPTION
3805 int (*g)() = i_ngetc;
3806 int (*u)() = i_nungetc;
3817 if (buf[i] == 'x' || buf[i] == 'X'){
3818 for (j = 0; j < 5; j++){
3820 if (!nkf_isxdigit(buf[i])){
3827 c |= hex2bin(buf[i]);
3830 for (j = 0; j < 6; j++){
3834 if (!nkf_isdigit(buf[i])){
3841 c += hex2bin(buf[i]);
3847 return CLASS_UTF16 | c;
3857 numchar_ungetc(c, f)
3861 return (*i_nungetc)(c, f);
3870 int c1, c2, c3, c4, cc;
3871 int t1, t2, t3, t4, mode, exit_mode;
3875 int lwsp_size = 128;
3877 if (mime_top != mime_last) { /* Something is in FIFO */
3878 return Fifo(mime_top++);
3880 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
3881 mime_decode_mode=FALSE;
3882 unswitch_mime_getc();
3883 return (*i_getc)(f);
3886 if (mimebuf_f == FIXED_MIME)
3887 exit_mode = mime_decode_mode;
3890 if (mime_decode_mode == 'Q') {
3891 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3893 if (c1=='_') return ' ';
3894 if (c1!='=' && c1!='?') {
3898 mime_decode_mode = exit_mode; /* prepare for quit */
3899 if (c1<=' ') return c1;
3900 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
3901 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
3902 /* end Q encoding */
3903 input_mode = exit_mode;
3905 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
3906 if (lwsp_buf==NULL) {
3907 perror("can't malloc");
3910 while ((c1=(*i_getc)(f))!=EOF) {
3915 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
3923 if ((c1=(*i_getc)(f))!=EOF && c1 == NL) {
3924 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
3939 lwsp_buf[lwsp_count] = c1;
3940 if (lwsp_count++>lwsp_size){
3942 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
3943 if (lwsp_buf_new==NULL) {
3946 perror("can't realloc");
3949 lwsp_buf = lwsp_buf_new;
3955 if (lwsp_count > 0) {
3956 if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) {
3960 for(lwsp_count--;lwsp_count>0;lwsp_count--)
3961 i_ungetc(lwsp_buf[lwsp_count],f);
3969 if (c1=='='&&c2<' ') { /* this is soft wrap */
3970 while((c1 = (*i_mgetc)(f)) <=' ') {
3971 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3973 mime_decode_mode = 'Q'; /* still in MIME */
3974 goto restart_mime_q;
3977 mime_decode_mode = 'Q'; /* still in MIME */
3981 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
3982 if (c2<=' ') return c2;
3983 mime_decode_mode = 'Q'; /* still in MIME */
3984 #define hex(c) (('0'<=c&&c<='9')?(c-'0'):\
3985 ('A'<=c&&c<='F')?(c-'A'+10):('a'<=c&&c<='f')?(c-'a'+10):0)
3986 return ((hex(c2)<<4) + hex(c3));
3989 if (mime_decode_mode != 'B') {
3990 mime_decode_mode = FALSE;
3991 return (*i_mgetc)(f);
3995 /* Base64 encoding */
3997 MIME allows line break in the middle of
3998 Base64, but we are very pessimistic in decoding
3999 in unbuf mode because MIME encoded code may broken by
4000 less or editor's control sequence (such as ESC-[-K in unbuffered
4001 mode. ignore incomplete MIME.
4003 mode = mime_decode_mode;
4004 mime_decode_mode = exit_mode; /* prepare for quit */
4006 while ((c1 = (*i_mgetc)(f))<=' ') {
4011 if ((c2 = (*i_mgetc)(f))<=' ') {
4014 if (mime_f != STRICT_MIME) goto mime_c2_retry;
4015 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4018 if ((c1 == '?') && (c2 == '=')) {
4021 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
4022 if (lwsp_buf==NULL) {
4023 perror("can't malloc");
4026 while ((c1=(*i_getc)(f))!=EOF) {
4031 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4039 if ((c1=(*i_getc)(f))!=EOF) {
4043 } else if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4058 lwsp_buf[lwsp_count] = c1;
4059 if (lwsp_count++>lwsp_size){
4061 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4062 if (lwsp_buf_new==NULL) {
4065 perror("can't realloc");
4068 lwsp_buf = lwsp_buf_new;
4074 if (lwsp_count > 0) {
4075 if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) {
4079 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4080 i_ungetc(lwsp_buf[lwsp_count],f);
4089 if ((c3 = (*i_mgetc)(f))<=' ') {
4092 if (mime_f != STRICT_MIME) goto mime_c3_retry;
4093 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4097 if ((c4 = (*i_mgetc)(f))<=' ') {
4100 if (mime_f != STRICT_MIME) goto mime_c4_retry;
4101 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4105 mime_decode_mode = mode; /* still in MIME sigh... */
4107 /* BASE 64 decoding */
4109 t1 = 0x3f & base64decode(c1);
4110 t2 = 0x3f & base64decode(c2);
4111 t3 = 0x3f & base64decode(c3);
4112 t4 = 0x3f & base64decode(c4);
4113 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
4115 Fifo(mime_last++) = cc;
4116 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
4118 Fifo(mime_last++) = cc;
4119 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
4121 Fifo(mime_last++) = cc;
4126 return Fifo(mime_top++);
4134 Fifo(--mime_top) = c;
4145 /* In buffered mode, read until =? or NL or buffer full
4147 mime_input = mime_top;
4148 mime_last = mime_top;
4149 while(*p) Fifo(mime_input++) = *p++;
4152 while((c=(*i_getc)(f))!=EOF) {
4153 if (((mime_input-mime_top)&MIME_BUF_MASK)==0) {
4154 break; /* buffer full */
4156 if (c=='=' && d=='?') {
4157 /* checked. skip header, start decode */
4158 Fifo(mime_input++) = c;
4159 /* mime_last_input = mime_input; */
4164 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
4166 /* Should we check length mod 4? */
4167 Fifo(mime_input++) = c;
4170 /* In case of Incomplete MIME, no MIME decode */
4171 Fifo(mime_input++) = c;
4172 mime_last = mime_input; /* point undecoded buffer */
4173 mime_decode_mode = 1; /* no decode on Fifo last in mime_getc */
4174 switch_mime_getc(); /* anyway we need buffered getc */
4185 i = c - 'A'; /* A..Z 0-25 */
4187 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
4189 } else if (c > '/') {
4190 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
4191 } else if (c == '+') {
4192 i = '>' /* 62 */ ; /* + 62 */
4194 i = '?' /* 63 */ ; /* / 63 */
4199 static char basis_64[] =
4200 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
4203 #define MIMEOUT_BUF_LENGTH (60)
4204 char mimeout_buf[MIMEOUT_BUF_LENGTH+1];
4205 int mimeout_buf_count = 0;
4206 int mimeout_preserve_space = 0;
4207 #define itoh4(c) (c>=10?c+'A'-10:c+'0')
4216 p = mime_pattern[0];
4217 for(i=0;mime_encode[i];i++) {
4218 if (mode == mime_encode[i]) {
4219 p = mime_pattern[i];
4223 mimeout_mode = mime_encode_method[i];
4226 if (base64_count>45) {
4227 if (mimeout_buf_count>0 && nkf_isblank(mimeout_buf[i])){
4228 (*o_mputc)(mimeout_buf[i]);
4234 if (!mimeout_preserve_space && mimeout_buf_count>0
4235 && (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
4236 || mimeout_buf[i]==CR || mimeout_buf[i]==NL )) {
4240 if (!mimeout_preserve_space) {
4241 for (;i<mimeout_buf_count;i++) {
4242 if (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
4243 || mimeout_buf[i]==CR || mimeout_buf[i]==NL ) {
4244 (*o_mputc)(mimeout_buf[i]);
4251 mimeout_preserve_space = FALSE;
4257 j = mimeout_buf_count;
4258 mimeout_buf_count = 0;
4260 mime_putc(mimeout_buf[i]);
4276 switch(mimeout_mode) {
4281 (*o_mputc)(basis_64[((b64c & 0x3)<< 4)]);
4287 (*o_mputc)(basis_64[((b64c & 0xF) << 2)]);
4293 if (mimeout_f!=FIXED_MIME) {
4295 } else if (mimeout_mode != 'Q')
4304 switch(mimeout_mode) {
4309 } else if (c==CR||c==NL) {
4312 } else if(c<SPACE||c=='='||c=='?'||c=='_'||DEL<=c) {
4314 (*o_mputc)(itoh4(((c>>4)&0xf)));
4315 (*o_mputc)(itoh4((c&0xf)));
4324 (*o_mputc)(basis_64[c>>2]);
4329 (*o_mputc)(basis_64[((b64c & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
4335 (*o_mputc)(basis_64[((b64c & 0xF) << 2) | ((c & 0xC0) >>6)]);
4336 (*o_mputc)(basis_64[c & 0x3F]);
4347 int mime_lastchar2, mime_lastchar1;
4349 void mime_prechar(c2, c1)
4354 if (base64_count + mimeout_buf_count/3*4> 66){
4355 (*o_base64conv)(EOF,0);
4356 (*o_base64conv)(0,NL);
4357 (*o_base64conv)(0,SPACE);
4359 }/*else if (mime_lastchar2){
4360 if (c1 <=DEL && !nkf_isspace(c1)){
4361 (*o_base64conv)(0,SPACE);
4365 if (c2 && mime_lastchar2 == 0
4366 && mime_lastchar1 && !nkf_isspace(mime_lastchar1)){
4367 (*o_base64conv)(0,SPACE);
4370 mime_lastchar2 = c2;
4371 mime_lastchar1 = c1;
4382 if (mimeout_f == FIXED_MIME){
4383 if (mimeout_mode == 'Q'){
4384 if (base64_count > 71){
4385 if (c!=CR && c!=NL) {
4392 if (base64_count > 71){
4397 if (c == EOF) { /* c==EOF */
4401 if (c != EOF) { /* c==EOF */
4407 /* mimeout_f != FIXED_MIME */
4409 if (c == EOF) { /* c==EOF */
4410 j = mimeout_buf_count;
4411 mimeout_buf_count = 0;
4414 /*if (nkf_isspace(mimeout_buf[i])){
4417 mimeout_addchar(mimeout_buf[i]);
4421 (*o_mputc)(mimeout_buf[i]);
4427 if (mimeout_mode=='Q') {
4428 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
4440 if (mimeout_buf_count > 0){
4441 lastchar = mimeout_buf[mimeout_buf_count - 1];
4446 if (!mimeout_mode) {
4447 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1)) {
4448 if (nkf_isspace(c)) {
4449 if (c==CR || c==NL) {
4452 for (i=0;i<mimeout_buf_count;i++) {
4453 (*o_mputc)(mimeout_buf[i]);
4454 if (mimeout_buf[i] == CR || mimeout_buf[i] == NL){
4461 mimeout_buf_count = 1;
4463 if (base64_count > 1
4464 && base64_count + mimeout_buf_count > 76){
4467 if (!nkf_isspace(mimeout_buf[0])){
4472 mimeout_buf[mimeout_buf_count++] = c;
4473 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
4474 open_mime(output_mode);
4479 if (lastchar==CR || lastchar == NL){
4480 for (i=0;i<mimeout_buf_count;i++) {
4481 (*o_mputc)(mimeout_buf[i]);
4484 mimeout_buf_count = 0;
4486 if (lastchar==SPACE) {
4487 for (i=0;i<mimeout_buf_count-1;i++) {
4488 (*o_mputc)(mimeout_buf[i]);
4491 mimeout_buf[0] = SPACE;
4492 mimeout_buf_count = 1;
4494 open_mime(output_mode);
4497 /* mimeout_mode == 'B', 1, 2 */
4498 if ( c<=DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
4499 if (lastchar == CR || lastchar == NL){
4500 if (nkf_isblank(c)) {
4501 for (i=0;i<mimeout_buf_count;i++) {
4502 mimeout_addchar(mimeout_buf[i]);
4504 mimeout_buf_count = 0;
4505 } else if (SPACE<c && c<DEL) {
4507 for (i=0;i<mimeout_buf_count;i++) {
4508 (*o_mputc)(mimeout_buf[i]);
4511 mimeout_buf_count = 0;
4514 if (c==SPACE || c==TAB || c==CR || c==NL) {
4515 for (i=0;i<mimeout_buf_count;i++) {
4516 if (SPACE<mimeout_buf[i] && mimeout_buf[i]<DEL) {
4518 for (i=0;i<mimeout_buf_count;i++) {
4519 (*o_mputc)(mimeout_buf[i]);
4522 mimeout_buf_count = 0;
4525 mimeout_buf[mimeout_buf_count++] = c;
4526 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
4528 for (i=0;i<mimeout_buf_count;i++) {
4529 (*o_mputc)(mimeout_buf[i]);
4532 mimeout_buf_count = 0;
4536 if (mimeout_buf_count>0 && SPACE<c && c!='=') {
4537 mimeout_buf[mimeout_buf_count++] = c;
4538 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
4539 j = mimeout_buf_count;
4540 mimeout_buf_count = 0;
4542 mimeout_addchar(mimeout_buf[i]);
4549 if (mimeout_buf_count>0) {
4550 j = mimeout_buf_count;
4551 mimeout_buf_count = 0;
4553 if (mimeout_buf[i]==CR || mimeout_buf[i]==NL)
4555 mimeout_addchar(mimeout_buf[i]);
4561 (*o_mputc)(mimeout_buf[i]);
4563 open_mime(output_mode);
4570 #if defined(PERL_XS) || defined(WIN32DLL)
4575 struct input_code *p = input_code_list;
4588 mime_f = STRICT_MIME;
4589 mime_decode_f = FALSE;
4594 #if defined(MSDOS) || defined(__OS2__)
4599 iso2022jp_f = FALSE;
4600 #ifdef UTF8_OUTPUT_ENABLE
4603 ms_ucs_map_f = FALSE;
4615 is_inputcode_mixed = FALSE;
4616 is_inputcode_set = FALSE;
4620 #ifdef SHIFTJIS_CP932
4626 for (i = 0; i < 256; i++){
4627 prefix_table[i] = 0;
4630 #ifdef UTF8_INPUT_ENABLE
4631 utf16_mode = UTF16BE_INPUT;
4633 mimeout_buf_count = 0;
4638 fold_preserve_f = FALSE;
4641 kanji_intro = DEFAULT_J;
4642 ascii_intro = DEFAULT_R;
4643 fold_margin = FOLD_MARGIN;
4644 output_conv = DEFAULT_CONV;
4645 oconv = DEFAULT_CONV;
4646 o_zconv = no_connection;
4647 o_fconv = no_connection;
4648 o_crconv = no_connection;
4649 o_rot_conv = no_connection;
4650 o_hira_conv = no_connection;
4651 o_base64conv = no_connection;
4652 o_iso2022jp_check_conv = no_connection;
4655 i_ungetc = std_ungetc;
4657 i_bungetc = std_ungetc;
4660 i_mungetc = std_ungetc;
4661 i_mgetc_buf = std_getc;
4662 i_mungetc_buf = std_ungetc;
4663 output_mode = ASCII;
4666 mime_decode_mode = FALSE;
4672 z_prev2=0,z_prev1=0;
4674 iconv_for_check = 0;
4676 input_codename = "";
4684 no_connection(c2,c1)
4687 no_connection2(c2,c1,0);
4691 no_connection2(c2,c1,c0)
4694 fprintf(stderr,"nkf internal module connection failure.\n");
4696 return 0; /* LINT */
4701 #define fprintf dllprintf
4706 fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
4707 fprintf(stderr,"Flags:\n");
4708 fprintf(stderr,"b,u Output is buffered (DEFAULT),Output is unbuffered\n");
4709 #ifdef DEFAULT_CODE_SJIS
4710 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC), UTF-8\n");
4712 #ifdef DEFAULT_CODE_JIS
4713 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC), UTF-8\n");
4715 #ifdef DEFAULT_CODE_EUC
4716 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT), UTF-8\n");
4718 #ifdef DEFAULT_CODE_UTF8
4719 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC), UTF-8 (DEFAULT)\n");
4721 #ifdef UTF8_OUTPUT_ENABLE
4722 fprintf(stderr," After 'w' you can add more options. (80?|16((B|L)0?)?) \n");
4724 fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC), UTF-8\n");
4725 #ifdef UTF8_INPUT_ENABLE
4726 fprintf(stderr," After 'W' you can add more options. (8|16(B|L)?) \n");
4728 fprintf(stderr,"t no conversion\n");
4729 fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n");
4730 fprintf(stderr,"r {de/en}crypt ROT13/47\n");
4731 fprintf(stderr,"h 1 katakana->hiragana, 2 hiragana->katakana, 3 both\n");
4732 fprintf(stderr,"v Show this usage. V: show version\n");
4733 fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
4734 fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
4735 fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
4736 fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
4737 fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces,\n");
4738 fprintf(stderr," 3: Convert HTML Entity\n");
4739 fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
4740 fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
4742 fprintf(stderr,"T Text mode output\n");
4744 fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
4745 fprintf(stderr,"d,c Delete \\r in line feed and \\032, Add \\r in line feed\n");
4746 fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n");
4747 fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
4748 fprintf(stderr,"long name options\n");
4749 fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n");
4750 fprintf(stderr," --jis,--euc,--sjis,--utf8,--utf16,--mime,--base64 convert for the code\n");
4751 fprintf(stderr," --hiragana, --katakana Hiragana/Katakana Conversion\n");
4752 fprintf(stderr," --x0212 Convert JISX0212\n");
4753 fprintf(stderr," --cp932, --no-cp932 CP932 compatibility\n");
4754 fprintf(stderr," --prefix= Insert escape before troublesome characters of Shift_JIS\n");
4756 fprintf(stderr," --cap-input, --url-input Convert hex after ':' or '%%'\n");
4758 #ifdef NUMCHAR_OPTION
4759 fprintf(stderr," --numchar-input Convert Unicode Character Reference\n");
4761 #ifdef UTF8_OUTPUT_ENABLE
4762 fprintf(stderr," --ms-ucs-map Microsoft UCS Mapping Compatible\n");
4765 fprintf(stderr," --overwrite Overwrite original listed files by filtered result\n");
4767 fprintf(stderr," -g, --guess Guess the input code\n");
4768 fprintf(stderr," --help,--version\n");
4775 fprintf(stderr,"Network Kanji Filter Version %s (%s) "
4776 #if defined(MSDOS) && !defined(__WIN32__) && !defined(__WIN16__)
4779 #if defined(MSDOS) && defined(__WIN16__)
4782 #if defined(MSDOS) && defined(__WIN32__)
4788 ,NKF_VERSION,NKF_RELEASE_DATE);
4789 fprintf(stderr,"\n%s\n",CopyRight);
4794 **
\e$B%Q%C%A@):n<T
\e(B
4795 ** void@merope.pleiades.or.jp (Kusakabe Youichi)
4796 ** NIDE Naoyuki <nide@ics.nara-wu.ac.jp>
4797 ** ohta@src.ricoh.co.jp (Junn Ohta)
4798 ** inouet@strl.nhk.or.jp (Tomoyuki Inoue)
4799 ** kiri@pulser.win.or.jp (Tetsuaki Kiriyama)
4800 ** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp>
4801 ** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe)
4802 ** kono@ie.u-ryukyu.ac.jp (Shinji Kono)
4803 ** GHG00637@nifty-serve.or.jp (COW)