1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
5 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
6 ** Copyright (C) 1996,1998
8 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
9 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
10 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
11 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
13 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
14 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
15 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
16 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
17 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
18 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
20 ** Everyone is permitted to do anything on this program
21 ** including copying, modifying, improving,
22 ** as long as you don't try to pretend that you wrote it.
23 ** i.e., the above copyright notice has to appear in all copies.
24 ** Binary distribution requires original version messages.
25 ** You don't have to ask before copying, redistribution or publishing.
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
29 /***********************************************************************
30 ** UTF-8
\e$B%5%]!<%H$K$D$$$F
\e(B
31 **
\e$B=>Mh$N
\e(B nkf
\e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9
\e(B
32 ** nkf -e
\e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G
\e(B UTF-8
\e$B$HH=Dj$5$l$l$P!"
\e(B
33 **
\e$B$=$N$^$^
\e(B euc-jp
\e$B$KJQ49$5$l$^$9
\e(B
35 **
\e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#
\e(B
36 ** (
\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O
\e(B)
38 **
\e$B2?$+LdBj$r8+$D$1$?$i!"
\e(B
39 ** E-Mail: furukawa@tcp-ip.or.jp
40 **
\e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#
\e(B
41 ***********************************************************************/
42 /* $Id: nkf.c,v 1.64 2005/03/04 19:20:25 naruse Exp $ */
43 #define NKF_VERSION "2.0.4"
44 #define NKF_RELEASE_DATE "2005-03-05"
47 static char *CopyRight =
48 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2004 Kono, Furukawa";
55 ** USAGE: nkf [flags] [file]
58 ** b Output is buffered (DEFAULT)
59 ** u Output is unbuffered
63 ** j Outout code is JIS 7 bit (DEFAULT SELECT)
64 ** s Output code is MS Kanji (DEFAULT SELECT)
65 ** e Output code is AT&T JIS (DEFAULT SELECT)
66 ** w Output code is AT&T JIS (DEFAULT SELECT)
67 ** l Output code is JIS 7bit and ISO8859-1 Latin-1
69 ** m MIME conversion for ISO-2022-JP
70 ** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
71 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
72 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
73 ** M MIME output conversion
75 ** r {de/en}crypt ROT13/47
79 ** T Text mode output (for MS-DOS)
81 ** x Do not convert X0201 kana into X0208
82 ** Z Convert X0208 alphabet to ASCII
87 ** B try to fix broken JIS, missing Escape
88 ** B[1-9] broken level
90 ** O Output to 'nkf.out' file or last file name
91 ** d Delete \r in line feed
92 ** c Add \r in line feed
93 ** -- other long option
94 ** -- ignore following option (don't use with -O )
98 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__)) && !defined(MSDOS)
100 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
115 #if defined(MSDOS) || defined(__OS2__)
122 #define setbinmode(fp) fsetbin(fp)
123 #else /* Microsoft C, Turbo C */
124 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
126 #else /* UNIX,OS/2 */
127 #define setbinmode(fp)
130 #ifdef _IOFBF /* SysV and MSDOS, Windows */
131 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
133 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
136 /*Borland C++ 4.5 EasyWin*/
137 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
146 /* added by satoru@isoternet.org */
148 #include <sys/stat.h>
149 #ifndef MSDOS /* UNIX, OS/2 */
152 #else /* defined(MSDOS) */
154 #ifdef __BORLANDC__ /* BCC32 */
156 #else /* !defined(__BORLANDC__) */
157 #include <sys/utime.h>
158 #endif /* (__BORLANDC__) */
159 #else /* !defined(__WIN32__) */
160 #if defined(_MSC_VER) || defined(__MINGW32__) /* VC++, MinGW */
161 #include <sys/utime.h>
162 #elif defined(__TURBOC__) /* BCC */
164 #elif defined(LSI_C) /* LSI C */
165 #endif /* (__WIN32__) */
177 /* state of output_mode and input_mode
195 /* Input Assumption */
199 #define LATIN1_INPUT 6
201 #define STRICT_MIME 8
206 #define JAPANESE_EUC 10
210 #define UTF8_INPUT 13
211 #define UTF16LE_INPUT 14
212 #define UTF16BE_INPUT 15
232 #define is_alnum(c) \
233 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
235 #define HOLD_SIZE 1024
236 #define IOBUF_SIZE 16384
238 #define DEFAULT_J 'B'
239 #define DEFAULT_R 'B'
241 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
242 #define SJ6394 0x0161 /* 63 - 94 ku offset */
244 #define RANGE_NUM_MAX 18
249 #if defined( UTF8_OUTPUT_ENABLE ) || defined( UTF8_INPUT_ENABLE )
250 #define sizeof_euc_utf8 94
251 #define sizeof_euc_to_utf8_1byte 94
252 #define sizeof_euc_to_utf8_2bytes 94
253 #define sizeof_utf8_to_euc_C2 64
254 #define sizeof_utf8_to_euc_E5B8 64
255 #define sizeof_utf8_to_euc_2bytes 112
256 #define sizeof_utf8_to_euc_3bytes 112
259 /* MIME preprocessor */
262 #ifdef EASYWIN /*Easy Win */
263 extern POINT _BufferSize;
266 /* function prototype */
268 #ifdef ANSI_C_PROTOTYPE
270 #define STATIC static
282 void (*status_func)PROTO((struct input_code *, int));
283 int (*iconv_func)PROTO((int c2, int c1, int c0));
287 STATIC char *input_codename = "";
289 STATIC int noconvert PROTO((FILE *f));
290 STATIC int kanji_convert PROTO((FILE *f));
291 STATIC int h_conv PROTO((FILE *f,int c2,int c1));
292 STATIC int push_hold_buf PROTO((int c2));
293 STATIC void set_iconv PROTO((int f, int (*iconv_func)(int c2,int c1,int c0)));
294 STATIC int s_iconv PROTO((int c2,int c1,int c0));
295 STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
296 STATIC int e_iconv PROTO((int c2,int c1,int c0));
297 #ifdef UTF8_INPUT_ENABLE
298 STATIC int w2e_conv PROTO((int c2,int c1,int c0,int *p2,int *p1));
299 STATIC int w_iconv PROTO((int c2,int c1,int c0));
300 STATIC int w_iconv16 PROTO((int c2,int c1,int c0));
301 STATIC int w_iconv_common PROTO((int c1,int c0,unsigned short **pp,int psize,int *p2,int *p1));
302 STATIC int ww16_conv PROTO((int c2, int c1, int c0));
304 #ifdef UTF8_OUTPUT_ENABLE
305 STATIC int e2w_conv PROTO((int c2,int c1));
306 STATIC void w_oconv PROTO((int c2,int c1));
307 STATIC void w_oconv16 PROTO((int c2,int c1));
309 STATIC void e_oconv PROTO((int c2,int c1));
310 STATIC int e2s_conv PROTO((int c2, int c1, int *p2, int *p1));
311 STATIC void s_oconv PROTO((int c2,int c1));
312 STATIC void j_oconv PROTO((int c2,int c1));
313 STATIC void fold_conv PROTO((int c2,int c1));
314 STATIC void cr_conv PROTO((int c2,int c1));
315 STATIC void z_conv PROTO((int c2,int c1));
316 STATIC void rot_conv PROTO((int c2,int c1));
317 STATIC void hira_conv PROTO((int c2,int c1));
318 STATIC void base64_conv PROTO((int c2,int c1));
319 STATIC void iso2022jp_check_conv PROTO((int c2,int c1));
320 STATIC void no_connection PROTO((int c2,int c1));
321 STATIC int no_connection2 PROTO((int c2,int c1,int c0));
323 STATIC void code_score PROTO((struct input_code *ptr));
324 STATIC void code_status PROTO((int c));
326 STATIC void std_putc PROTO((int c));
327 STATIC int std_getc PROTO((FILE *f));
328 STATIC int std_ungetc PROTO((int c,FILE *f));
330 STATIC int broken_getc PROTO((FILE *f));
331 STATIC int broken_ungetc PROTO((int c,FILE *f));
333 STATIC int mime_begin PROTO((FILE *f));
334 STATIC int mime_getc PROTO((FILE *f));
335 STATIC int mime_ungetc PROTO((int c,FILE *f));
337 STATIC int mime_begin_strict PROTO((FILE *f));
338 STATIC int mime_getc_buf PROTO((FILE *f));
339 STATIC int mime_ungetc_buf PROTO((int c,FILE *f));
340 STATIC int mime_integrity PROTO((FILE *f,unsigned char *p));
342 STATIC int base64decode PROTO((int c));
343 STATIC void mime_prechar PROTO((int c2, int c1));
344 STATIC void mime_putc PROTO((int c));
345 STATIC void open_mime PROTO((int c));
346 STATIC void close_mime PROTO(());
347 STATIC void usage PROTO(());
348 STATIC void version PROTO(());
349 STATIC void options PROTO((unsigned char *c));
350 #if defined(PERL_XS) || defined(WIN32DLL)
351 STATIC void reinit PROTO(());
356 static unsigned char stdibuf[IOBUF_SIZE];
357 static unsigned char stdobuf[IOBUF_SIZE];
358 static unsigned char hold_buf[HOLD_SIZE*2];
359 static int hold_count;
361 /* MIME preprocessor fifo */
363 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
364 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
365 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
366 static unsigned char mime_buf[MIME_BUF_SIZE];
367 static unsigned int mime_top = 0;
368 static unsigned int mime_last = 0; /* decoded */
369 static unsigned int mime_input = 0; /* undecoded */
372 static int unbuf_f = FALSE;
373 static int estab_f = FALSE;
374 static int nop_f = FALSE;
375 static int binmode_f = TRUE; /* binary mode */
376 static int rot_f = FALSE; /* rot14/43 mode */
377 static int hira_f = FALSE; /* hira/kata henkan */
378 static int input_f = FALSE; /* non fixed input code */
379 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
380 static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
381 static int mime_decode_f = FALSE; /* mime decode is explicitly on */
382 static int mimebuf_f = FALSE; /* MIME buffered input */
383 static int broken_f = FALSE; /* convert ESC-less broken JIS */
384 static int iso8859_f = FALSE; /* ISO8859 through */
385 static int mimeout_f = FALSE; /* base64 mode */
386 #if defined(MSDOS) || defined(__OS2__)
387 static int x0201_f = TRUE; /* Assume JISX0201 kana */
389 static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
391 static int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
392 #ifdef UTF8_OUTPUT_ENABLE
393 static int unicode_bom_f= 0; /* Output Unicode BOM */
394 static int w_oconv16_LE = 0; /* utf-16 little endian */
395 static int ms_ucs_map_f = FALSE; /* Microsoft UCS Mapping Compatible */
399 #ifdef NUMCHAR_OPTION
401 #define CLASS_MASK 0x0f000000
402 #define CLASS_UTF16 0x01000000
406 static int cap_f = FALSE;
407 static int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */
408 static int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc;
409 STATIC int cap_getc PROTO((FILE *f));
410 STATIC int cap_ungetc PROTO((int c,FILE *f));
412 static int url_f = FALSE;
413 static int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
414 static int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc;
415 STATIC int url_getc PROTO((FILE *f));
416 STATIC int url_ungetc PROTO((int c,FILE *f));
418 static int numchar_f = FALSE;
419 static int (*i_ngetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
420 static int (*i_nungetc)PROTO((int c ,FILE *f)) = std_ungetc;
421 STATIC int numchar_getc PROTO((FILE *f));
422 STATIC int numchar_ungetc PROTO((int c,FILE *f));
426 static int noout_f = FALSE;
427 STATIC void no_putc PROTO((int c));
428 static int debug_f = FALSE;
429 STATIC void debug PROTO((char *str));
432 static int guess_f = FALSE;
433 STATIC void print_guessed_code PROTO((char *filename));
434 STATIC void set_input_codename PROTO((char *codename));
435 static int is_inputcode_mixed = FALSE;
436 static int is_inputcode_set = FALSE;
439 static int exec_f = 0;
442 #ifdef SHIFTJIS_CP932
443 STATIC int cp932_f = TRUE;
444 #define CP932_TABLE_BEGIN (0xfa)
445 #define CP932_TABLE_END (0xfc)
447 STATIC int cp932inv_f = TRUE;
448 #define CP932INV_TABLE_BEGIN (0xed)
449 #define CP932INV_TABLE_END (0xee)
451 /* STATIC int cp932_conv PROTO((int c2, int c1)); */
452 #endif /* SHIFTJIS_CP932 */
455 STATIC int x0212_f = FALSE;
456 static int x0212_shift PROTO((int c));
457 static int x0212_unshift PROTO((int c));
460 STATIC unsigned char prefix_table[256];
462 STATIC void e_status PROTO((struct input_code *, int));
463 STATIC void s_status PROTO((struct input_code *, int));
465 #ifdef UTF8_INPUT_ENABLE
466 STATIC void w_status PROTO((struct input_code *, int));
467 STATIC void w16_status PROTO((struct input_code *, int));
468 static int utf16_mode = UTF16LE_INPUT;
471 struct input_code input_code_list[] = {
472 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
473 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
474 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
475 {"UTF-16", 0, 0, 0, {0, 0, 0}, w16_status, w_iconv16, 0},
479 static int mimeout_mode = 0;
480 static int base64_count = 0;
482 /* X0208 -> ASCII converter */
485 static int f_line = 0; /* chars in line */
486 static int f_prev = 0;
487 static int fold_preserve_f = FALSE; /* preserve new lines */
488 static int fold_f = FALSE;
489 static int fold_len = 0;
492 static unsigned char kanji_intro = DEFAULT_J;
493 static unsigned char ascii_intro = DEFAULT_R;
497 #define FOLD_MARGIN 10
498 #define DEFAULT_FOLD 60
500 static int fold_margin = FOLD_MARGIN;
504 #ifdef DEFAULT_CODE_JIS
505 # define DEFAULT_CONV j_oconv
507 #ifdef DEFAULT_CODE_SJIS
508 # define DEFAULT_CONV s_oconv
510 #ifdef DEFAULT_CODE_EUC
511 # define DEFAULT_CONV e_oconv
513 #ifdef DEFAULT_CODE_UTF8
514 # define DEFAULT_CONV w_oconv
517 /* process default */
518 static void (*output_conv)PROTO((int c2,int c1)) = DEFAULT_CONV;
520 static void (*oconv)PROTO((int c2,int c1)) = no_connection;
521 /* s_iconv or oconv */
522 static int (*iconv)PROTO((int c2,int c1,int c0)) = no_connection2;
524 static void (*o_zconv)PROTO((int c2,int c1)) = no_connection;
525 static void (*o_fconv)PROTO((int c2,int c1)) = no_connection;
526 static void (*o_crconv)PROTO((int c2,int c1)) = no_connection;
527 static void (*o_rot_conv)PROTO((int c2,int c1)) = no_connection;
528 static void (*o_hira_conv)PROTO((int c2,int c1)) = no_connection;
529 static void (*o_base64conv)PROTO((int c2,int c1)) = no_connection;
530 static void (*o_iso2022jp_check_conv)PROTO((int c2,int c1)) = no_connection;
532 /* static redirections */
534 static void (*o_putc)PROTO((int c)) = std_putc;
536 static int (*i_getc)PROTO((FILE *f)) = std_getc; /* general input */
537 static int (*i_ungetc)PROTO((int c,FILE *f)) =std_ungetc;
539 static int (*i_bgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
540 static int (*i_bungetc)PROTO((int c ,FILE *f)) = std_ungetc;
542 static void (*o_mputc)PROTO((int c)) = std_putc ; /* output of mputc */
544 static int (*i_mgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
545 static int (*i_mungetc)PROTO((int c ,FILE *f)) = std_ungetc;
547 /* for strict mime */
548 static int (*i_mgetc_buf)PROTO((FILE *)) = std_getc; /* input of mgetc_buf */
549 static int (*i_mungetc_buf)PROTO((int c,FILE *f)) = std_ungetc;
552 static int output_mode = ASCII, /* output kanji mode */
553 input_mode = ASCII, /* input kanji mode */
554 shift_mode = FALSE; /* TRUE shift out, or X0201 */
555 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
557 /* X0201 / X0208 conversion tables */
559 /* X0201 kana conversion table */
562 unsigned char cv[]= {
563 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
564 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
565 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
566 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
567 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
568 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
569 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
570 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
571 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
572 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
573 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
574 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
575 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
576 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
577 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
578 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
582 /* X0201 kana conversion table for daguten */
585 unsigned char dv[]= {
586 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
587 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
588 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
589 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
590 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
591 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
592 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
593 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
594 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
595 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
596 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
597 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
598 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
601 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
604 /* X0201 kana conversion table for han-daguten */
607 unsigned char ev[]= {
608 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
610 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
611 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
614 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
615 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
616 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
617 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
618 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
619 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
620 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
621 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
622 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
623 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
627 /* X0208 kigou conversion table */
628 /* 0x8140 - 0x819e */
630 unsigned char fv[] = {
632 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
633 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
634 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
635 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
636 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
637 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
638 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
639 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
640 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
641 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
642 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
643 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
649 static int file_out = FALSE;
651 static int overwrite = FALSE;
654 static int crmode_f = 0; /* CR, NL, CRLF */
655 #ifdef EASYWIN /*Easy Win */
656 static int end_check;
659 #define STD_GC_BUFSIZE (256)
660 int std_gc_buf[STD_GC_BUFSIZE];
664 #include "nkf32dll.c"
665 #elif defined(PERL_XS)
675 char *outfname = NULL;
678 #ifdef EASYWIN /*Easy Win */
679 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
682 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
683 cp = (unsigned char *)*argv;
688 if (pipe(fds) < 0 || (pid = fork()) < 0){
699 execvp(argv[1], &argv[1]);
713 if(x0201_f == WISH_TRUE)
714 x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
716 if (binmode_f == TRUE)
718 if (freopen("","wb",stdout) == NULL)
725 setbuf(stdout, (char *) NULL);
727 setvbuffer(stdout, stdobuf, IOBUF_SIZE);
730 if (binmode_f == TRUE)
732 if (freopen("","rb",stdin) == NULL) return (-1);
736 setvbuffer(stdin, stdibuf, IOBUF_SIZE);
740 kanji_convert(stdin);
741 if (guess_f) print_guessed_code(NULL);
746 is_inputcode_mixed = FALSE;
747 is_inputcode_set = FALSE;
749 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
758 /* reopen file for stdout */
759 if (file_out == TRUE) {
762 outfname = malloc(strlen(origfname)
763 + strlen(".nkftmpXXXXXX")
769 strcpy(outfname, origfname);
773 for (i = strlen(outfname); i; --i){
774 if (outfname[i - 1] == '/'
775 || outfname[i - 1] == '\\'){
781 strcat(outfname, "ntXXXXXX");
783 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC,
786 strcat(outfname, ".nkftmpXXXXXX");
787 fd = mkstemp(outfname);
790 || (fd_backup = dup(fileno(stdout))) < 0
791 || dup2(fd, fileno(stdout)) < 0
802 outfname = "nkf.out";
805 if(freopen(outfname, "w", stdout) == NULL) {
809 if (binmode_f == TRUE) {
811 if (freopen("","wb",stdout) == NULL)
818 if (binmode_f == TRUE)
820 if (freopen("","rb",fin) == NULL)
825 setvbuffer(fin, stdibuf, IOBUF_SIZE);
829 char *filename = NULL;
831 if (nfiles > 1) filename = origfname;
832 if (guess_f) print_guessed_code(filename);
838 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
846 if (dup2(fd_backup, fileno(stdout)) < 0){
849 if (stat(origfname, &sb)) {
850 fprintf(stderr, "Can't stat %s\n", origfname);
852 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
853 if (chmod(outfname, sb.st_mode)) {
854 fprintf(stderr, "Can't set permission %s\n", outfname);
857 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
858 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
859 tb[0] = tb[1] = sb.st_mtime;
860 if (utime(outfname, tb)) {
861 fprintf(stderr, "Can't set timestamp %s\n", outfname);
864 tb.actime = sb.st_atime;
865 tb.modtime = sb.st_mtime;
866 if (utime(outfname, &tb)) {
867 fprintf(stderr, "Can't set timestamp %s\n", outfname);
871 if (unlink(origfname)){
875 if (rename(outfname, origfname)) {
877 fprintf(stderr, "Can't rename %s to %s\n",
878 outfname, origfname);
886 #ifdef EASYWIN /*Easy Win */
887 if (file_out == FALSE)
888 scanf("%d",&end_check);
891 #else /* for Other OS */
892 if (file_out == TRUE)
897 #endif /* WIN32DLL */
922 {"katakana-hiragana","h3"},
929 #ifdef UTF8_OUTPUT_ENABLE
934 #ifdef UTF8_INPUT_ENABLE
936 {"utf16-input", "W16"},
945 #ifdef NUMCHAR_OPTION
946 {"numchar-input", ""},
952 #ifdef SHIFTJIS_CP932
962 static int option_mode = 0;
969 unsigned char *p = NULL;
981 case '-': /* literal options */
982 if (!*cp) { /* ignore the rest of arguments */
986 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
988 p = (unsigned char *)long_option[i].name;
989 for (j=0;*p && (*p != '=') && *p == cp[j];p++, j++);
997 cp = (unsigned char *)long_option[i].alias;
1000 if (strcmp(long_option[i].name, "overwrite") == 0){
1007 if (strcmp(long_option[i].name, "cap-input") == 0){
1011 if (strcmp(long_option[i].name, "url-input") == 0){
1016 #ifdef NUMCHAR_OPTION
1017 if (strcmp(long_option[i].name, "numchar-input") == 0){
1023 if (strcmp(long_option[i].name, "no-output") == 0){
1027 if (strcmp(long_option[i].name, "debug") == 0){
1032 if (strcmp(long_option[i].name, "cp932") == 0){
1033 #ifdef SHIFTJIS_CP932
1037 #ifdef UTF8_OUTPUT_ENABLE
1038 ms_ucs_map_f = TRUE;
1042 if (strcmp(long_option[i].name, "no-cp932") == 0){
1043 #ifdef SHIFTJIS_CP932
1047 #ifdef UTF8_OUTPUT_ENABLE
1048 ms_ucs_map_f = FALSE;
1052 #ifdef SHIFTJIS_CP932
1053 if (strcmp(long_option[i].name, "cp932inv") == 0){
1060 if (strcmp(long_option[i].name, "x0212") == 0){
1067 if (strcmp(long_option[i].name, "exec-in") == 0){
1071 if (strcmp(long_option[i].name, "exec-out") == 0){
1076 #ifdef UTF8_OUTPUT_ENABLE
1077 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1078 ms_ucs_map_f = TRUE;
1082 if (strcmp(long_option[i].name, "prefix=") == 0){
1083 if (*p == '=' && ' ' < p[1] && p[1] < 128){
1084 for (i = 2; ' ' < p[i] && p[i] < 128; i++){
1085 prefix_table[p[i]] = p[1];
1092 case 'b': /* buffered mode */
1095 case 'u': /* non bufferd mode */
1098 case 't': /* transparent mode */
1101 case 'j': /* JIS output */
1103 output_conv = j_oconv;
1105 case 'e': /* AT&T EUC output */
1106 output_conv = e_oconv;
1108 case 's': /* SJIS output */
1109 output_conv = s_oconv;
1111 case 'l': /* ISO8859 Latin-1 support, no conversion */
1112 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
1113 input_f = LATIN1_INPUT;
1115 case 'i': /* Kanji IN ESC-$-@/B */
1116 if (*cp=='@'||*cp=='B')
1117 kanji_intro = *cp++;
1119 case 'o': /* ASCII IN ESC-(-J/B */
1120 if (*cp=='J'||*cp=='B'||*cp=='H')
1121 ascii_intro = *cp++;
1128 if ('9'>= *cp && *cp>='0')
1129 hira_f |= (*cp++ -'0');
1136 #if defined(MSDOS) || defined(__OS2__)
1151 #ifdef UTF8_OUTPUT_ENABLE
1152 case 'w': /* UTF-8 output */
1153 if ('1'== cp[0] && '6'==cp[1]) {
1154 output_conv = w_oconv16; cp+=2;
1156 unicode_bom_f=2; cp++;
1159 unicode_bom_f=1; cp++;
1161 } else if (cp[0] == 'B') {
1162 unicode_bom_f=2; cp++;
1164 unicode_bom_f=1; cp++;
1167 } else if (cp[0] == '8') {
1168 output_conv = w_oconv; cp++;
1171 unicode_bom_f=1; cp++;
1174 output_conv = w_oconv;
1177 #ifdef UTF8_INPUT_ENABLE
1178 case 'W': /* UTF-8 input */
1179 if ('1'== cp[0] && '6'==cp[1]) {
1180 input_f = UTF16LE_INPUT;
1183 } else if (cp[0] == 'B') {
1185 input_f = UTF16BE_INPUT;
1187 } else if (cp[0] == '8') {
1189 input_f = UTF8_INPUT;
1191 input_f = UTF8_INPUT;
1194 /* Input code assumption */
1195 case 'J': /* JIS input */
1196 case 'E': /* AT&T EUC input */
1197 input_f = JIS_INPUT;
1199 case 'S': /* MS Kanji input */
1200 input_f = SJIS_INPUT;
1201 if (x0201_f==NO_X0201) x0201_f=TRUE;
1203 case 'Z': /* Convert X0208 alphabet to asii */
1204 /* bit:0 Convert X0208
1205 bit:1 Convert Kankaku to one space
1206 bit:2 Convert Kankaku to two spaces
1207 bit:3 Convert HTML Entity
1209 if ('9'>= *cp && *cp>='0')
1210 alpha_f |= 1<<(*cp++ -'0');
1214 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
1215 x0201_f = FALSE; /* No X0201->X0208 conversion */
1217 ESC-(-I in JIS, EUC, MS Kanji
1218 SI/SO in JIS, EUC, MS Kanji
1219 SSO in EUC, JIS, not in MS Kanji
1220 MS Kanji (0xa0-0xdf)
1222 ESC-(-I in JIS (0x20-0x5f)
1223 SSO in EUC (0xa0-0xdf)
1224 0xa0-0xd in MS Kanji (0xa0-0xdf)
1227 case 'X': /* Assume X0201 kana */
1228 /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1231 case 'F': /* prserve new lines */
1232 fold_preserve_f = TRUE;
1233 case 'f': /* folding -f60 or -f */
1236 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1238 fold_len += *cp++ - '0';
1240 if (!(0<fold_len && fold_len<BUFSIZ))
1241 fold_len = DEFAULT_FOLD;
1245 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1247 fold_margin += *cp++ - '0';
1251 case 'm': /* MIME support */
1252 mime_decode_f = TRUE;
1253 if (*cp=='B'||*cp=='Q') {
1254 mime_decode_mode = *cp++;
1255 mimebuf_f = FIXED_MIME;
1256 } else if (*cp=='N') {
1257 mime_f = TRUE; cp++;
1258 } else if (*cp=='S') {
1259 mime_f = STRICT_MIME; cp++;
1260 } else if (*cp=='0') {
1261 mime_f = FALSE; cp++;
1264 case 'M': /* MIME output */
1267 mimeout_f = FIXED_MIME; cp++;
1268 } else if (*cp=='Q') {
1270 mimeout_f = FIXED_MIME; cp++;
1275 case 'B': /* Broken JIS support */
1277 bit:1 allow any x on ESC-(-x or ESC-$-x
1278 bit:2 reset to ascii on NL
1280 if ('9'>= *cp && *cp>='0')
1281 broken_f |= 1<<(*cp++ -'0');
1286 case 'O':/* for Output file */
1290 case 'c':/* add cr code */
1293 case 'd':/* delete cr code */
1296 case 'I': /* ISO-2022-JP output */
1299 case 'L': /* line mode */
1300 if (*cp=='u') { /* unix */
1301 crmode_f = NL; cp++;
1302 } else if (*cp=='m') { /* mac */
1303 crmode_f = CR; cp++;
1304 } else if (*cp=='w') { /* windows */
1305 crmode_f = CRLF; cp++;
1306 } else if (*cp=='0') { /* no conversion */
1316 /* module muliple options in a string are allowed for Perl moudle */
1317 while(*cp && *cp!='-') cp++;
1321 /* bogus option but ignored */
1327 #ifdef ANSI_C_PROTOTYPE
1328 struct input_code * find_inputcode_byfunc(int (*iconv_func)(int c2,int c1,int c0))
1330 struct input_code * find_inputcode_byfunc(iconv_func)
1331 int (*iconv_func)();
1335 struct input_code *p = input_code_list;
1337 if (iconv_func == p->iconv_func){
1347 static int (*iconv_for_check)() = 0;
1350 #ifdef ANSI_C_PROTOTYPE
1351 void set_iconv(int f, int (*iconv_func)(int c2,int c1,int c0))
1353 void set_iconv(f, iconv_func)
1355 int (*iconv_func)();
1358 #ifdef INPUT_CODE_FIX
1366 #ifdef INPUT_CODE_FIX
1367 && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1373 if (estab_f && iconv_for_check != iconv){
1374 struct input_code *p = find_inputcode_byfunc(iconv);
1376 set_input_codename(p->name);
1377 debug(input_codename);
1379 iconv_for_check = iconv;
1384 #define SCORE_L2 (1) /*
\e$BBh
\e(B2
\e$B?e=`4A;z
\e(B */
1385 #define SCORE_KANA (SCORE_L2 << 1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
1386 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
1387 #ifdef SHIFTJIS_CP932
1388 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932
\e$B$K$h$kFI$_49$(
\e(B */
1389 #define SCORE_NO_EXIST (SCORE_CP932 << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1391 #define SCORE_NO_EXIST (SCORE_DEPEND << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1393 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME
\e$B$K$h$k;XDj
\e(B */
1394 #define SCORE_ERROR (SCORE_iMIME << 1) /*
\e$B%(%i!<
\e(B */
1396 #define SCORE_INIT (SCORE_iMIME)
1398 int score_table_A0[] = {
1401 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1402 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1405 int score_table_F0[] = {
1406 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1407 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1408 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1409 SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1412 void set_code_score(ptr, score)
1413 struct input_code *ptr;
1417 ptr->score |= score;
1421 void clr_code_score(ptr, score)
1422 struct input_code *ptr;
1426 ptr->score &= ~score;
1430 void code_score(ptr)
1431 struct input_code *ptr;
1433 int c2 = ptr->buf[0];
1434 int c1 = ptr->buf[1];
1436 set_code_score(ptr, SCORE_ERROR);
1437 }else if (c2 == SSO){
1438 set_code_score(ptr, SCORE_KANA);
1439 #ifdef UTF8_OUTPUT_ENABLE
1440 }else if (!e2w_conv(c2, c1)){
1441 set_code_score(ptr, SCORE_NO_EXIST);
1443 }else if ((c2 & 0x70) == 0x20){
1444 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1445 }else if ((c2 & 0x70) == 0x70){
1446 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1447 }else if ((c2 & 0x70) >= 0x50){
1448 set_code_score(ptr, SCORE_L2);
1452 void status_disable(ptr)
1453 struct input_code *ptr;
1458 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1461 void status_push_ch(ptr, c)
1462 struct input_code *ptr;
1465 ptr->buf[ptr->index++] = c;
1468 void status_clear(ptr)
1469 struct input_code *ptr;
1475 void status_reset(ptr)
1476 struct input_code *ptr;
1479 ptr->score = SCORE_INIT;
1482 void status_reinit(ptr)
1483 struct input_code *ptr;
1486 ptr->_file_stat = 0;
1489 void status_check(ptr, c)
1490 struct input_code *ptr;
1493 if (c <= DEL && estab_f){
1498 void s_status(ptr, c)
1499 struct input_code *ptr;
1504 status_check(ptr, c);
1509 #ifdef NUMCHAR_OPTION
1510 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1513 }else if (0xa1 <= c && c <= 0xdf){
1514 status_push_ch(ptr, SSO);
1515 status_push_ch(ptr, c);
1518 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
1520 status_push_ch(ptr, c);
1521 #ifdef SHIFTJIS_CP932
1523 && CP932_TABLE_BEGIN <= c && c <= CP932_TABLE_END){
1525 status_push_ch(ptr, c);
1526 #endif /* SHIFTJIS_CP932 */
1528 }else if (x0212_f && 0xf0 <= c && c <= 0xfc){
1530 status_push_ch(ptr, c);
1531 #endif /* X0212_ENABLE */
1533 status_disable(ptr);
1537 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1538 status_push_ch(ptr, c);
1539 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
1543 status_disable(ptr);
1547 #ifdef SHIFTJIS_CP932
1548 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1549 status_push_ch(ptr, c);
1550 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
1551 set_code_score(ptr, SCORE_CP932);
1556 #endif /* SHIFTJIS_CP932 */
1557 #ifndef X0212_ENABLE
1558 status_disable(ptr);
1564 void e_status(ptr, c)
1565 struct input_code *ptr;
1570 status_check(ptr, c);
1575 #ifdef NUMCHAR_OPTION
1576 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1579 }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
1581 status_push_ch(ptr, c);
1583 }else if (0x8f == c){
1585 status_push_ch(ptr, c);
1586 #endif /* X0212_ENABLE */
1588 status_disable(ptr);
1592 if (0xa1 <= c && c <= 0xfe){
1593 status_push_ch(ptr, c);
1597 status_disable(ptr);
1602 if (0xa1 <= c && c <= 0xfe){
1604 status_push_ch(ptr, c);
1606 status_disable(ptr);
1608 #endif /* X0212_ENABLE */
1612 #ifdef UTF8_INPUT_ENABLE
1613 void w16_status(ptr, c)
1614 struct input_code *ptr;
1621 if (ptr->_file_stat == 0){
1622 if (c == 0xfe || c == 0xff){
1624 status_push_ch(ptr, c);
1625 ptr->_file_stat = 1;
1627 status_disable(ptr);
1628 ptr->_file_stat = -1;
1630 }else if (ptr->_file_stat > 0){
1632 status_push_ch(ptr, c);
1633 }else if (ptr->_file_stat < 0){
1634 status_disable(ptr);
1640 status_disable(ptr);
1641 ptr->_file_stat = -1;
1643 status_push_ch(ptr, c);
1650 if (ptr->stat != c && (c == 0xfe || c == 0xff)){
1651 status_push_ch(ptr, c);
1654 status_disable(ptr);
1655 ptr->_file_stat = -1;
1661 void w_status(ptr, c)
1662 struct input_code *ptr;
1667 status_check(ptr, c);
1672 #ifdef NUMCHAR_OPTION
1673 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1676 }else if (0xc0 <= c && c <= 0xdf){
1678 status_push_ch(ptr, c);
1679 }else if (0xe0 <= c && c <= 0xef){
1681 status_push_ch(ptr, c);
1683 status_disable(ptr);
1688 if (0x80 <= c && c <= 0xbf){
1689 status_push_ch(ptr, c);
1690 if (ptr->index > ptr->stat){
1691 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
1692 && ptr->buf[2] == 0xbf);
1693 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
1694 &ptr->buf[0], &ptr->buf[1]);
1701 status_disable(ptr);
1712 int action_flag = 1;
1713 struct input_code *result = 0;
1714 struct input_code *p = input_code_list;
1716 (p->status_func)(p, c);
1719 }else if(p->stat == 0){
1730 if (result && !estab_f){
1731 set_iconv(TRUE, result->iconv_func);
1732 }else if (c <= DEL){
1733 struct input_code *ptr = input_code_list;
1748 return std_gc_buf[--std_gc_ndx];
1759 if (std_gc_ndx == STD_GC_BUFSIZE){
1762 std_gc_buf[std_gc_ndx++] = c;
1782 while ((c = (*i_getc)(f)) != EOF)
1791 oconv = output_conv;
1794 /* replace continucation module, from output side */
1796 /* output redicrection */
1798 if (noout_f || guess_f){
1805 if (mimeout_f == TRUE) {
1806 o_base64conv = oconv; oconv = base64_conv;
1808 /* base64_count = 0; */
1812 o_crconv = oconv; oconv = cr_conv;
1815 o_rot_conv = oconv; oconv = rot_conv;
1818 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
1821 o_hira_conv = oconv; oconv = hira_conv;
1824 o_fconv = oconv; oconv = fold_conv;
1827 if (alpha_f || x0201_f) {
1828 o_zconv = oconv; oconv = z_conv;
1832 i_ungetc = std_ungetc;
1833 /* input redicrection */
1836 i_cgetc = i_getc; i_getc = cap_getc;
1837 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
1840 i_ugetc = i_getc; i_getc = url_getc;
1841 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
1844 #ifdef NUMCHAR_OPTION
1846 i_ngetc = i_getc; i_getc = numchar_getc;
1847 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
1850 if (mime_f && mimebuf_f==FIXED_MIME) {
1851 i_mgetc = i_getc; i_getc = mime_getc;
1852 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
1855 i_bgetc = i_getc; i_getc = broken_getc;
1856 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
1858 if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
1859 set_iconv(-TRUE, e_iconv);
1860 } else if (input_f == SJIS_INPUT) {
1861 set_iconv(-TRUE, s_iconv);
1862 #ifdef UTF8_INPUT_ENABLE
1863 } else if (input_f == UTF8_INPUT) {
1864 set_iconv(-TRUE, w_iconv);
1865 } else if (input_f == UTF16LE_INPUT) {
1866 set_iconv(-TRUE, w_iconv16);
1869 set_iconv(FALSE, e_iconv);
1873 struct input_code *p = input_code_list;
1881 Conversion main loop. Code detection only.
1890 int is_8bit = FALSE;
1892 module_connection();
1897 output_mode = ASCII;
1900 #define NEXT continue /* no output, get next */
1901 #define SEND ; /* output c1 and c2, get next */
1902 #define LAST break /* end of loop, go closing */
1904 while ((c1 = (*i_getc)(f)) != EOF) {
1909 /* in case of 8th bit is on */
1910 if (!estab_f&&!mime_decode_mode) {
1911 /* in case of not established yet */
1912 /* It is still ambiguious */
1913 if (h_conv(f, c2, c1)==EOF)
1919 /* in case of already established */
1921 /* ignore bogus code */
1927 /* second byte, 7 bit code */
1928 /* it might be kanji shitfted */
1929 if ((c1 == DEL) || (c1 <= SPACE)) {
1930 /* ignore bogus first code */
1938 #ifdef UTF8_INPUT_ENABLE
1947 #ifdef NUMCHAR_OPTION
1948 } else if ((c1 & CLASS_MASK) == CLASS_UTF16){
1951 } else if (c1 > DEL) {
1953 if (!estab_f && !iso8859_f) {
1954 /* not established yet */
1955 if (!is_8bit) is_8bit = TRUE;
1958 } else { /* estab_f==TRUE */
1963 } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
1964 /* SJIS X0201 Case... */
1965 if(iso2022jp_f && x0201_f==NO_X0201) {
1966 (*oconv)(GETA1, GETA2);
1973 } else if (c1==SSO && iconv != s_iconv) {
1974 /* EUC X0201 Case */
1975 c1 = (*i_getc)(f); /* skip SSO */
1977 if (SSP<=c1 && c1<0xe0) {
1978 if(iso2022jp_f && x0201_f==NO_X0201) {
1979 (*oconv)(GETA1, GETA2);
1986 } else { /* bogus code, skip SSO and one byte */
1990 /* already established */
1995 } else if ((c1 > SPACE) && (c1 != DEL)) {
1996 /* in case of Roman characters */
1998 /* output 1 shifted byte */
2002 } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
2003 /* output 1 shifted byte */
2004 if(iso2022jp_f && x0201_f==NO_X0201) {
2005 (*oconv)(GETA1, GETA2);
2012 /* look like bogus code */
2015 } else if (input_mode == X0208) {
2016 /* in case of Kanji shifted */
2019 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
2020 /* Check MIME code */
2021 if ((c1 = (*i_getc)(f)) == EOF) {
2024 } else if (c1 == '?') {
2025 /* =? is mime conversion start sequence */
2026 if(mime_f == STRICT_MIME) {
2027 /* check in real detail */
2028 if (mime_begin_strict(f) == EOF)
2032 } else if (mime_begin(f) == EOF)
2042 /* normal ASCII code */
2045 } else if (c1 == SI) {
2048 } else if (c1 == SO) {
2051 } else if (c1 == ESC ) {
2052 if ((c1 = (*i_getc)(f)) == EOF) {
2053 /* (*oconv)(0, ESC); don't send bogus code */
2055 } else if (c1 == '$') {
2056 if ((c1 = (*i_getc)(f)) == EOF) {
2058 (*oconv)(0, ESC); don't send bogus code
2059 (*oconv)(0, '$'); */
2061 } else if (c1 == '@'|| c1 == 'B') {
2062 /* This is kanji introduction */
2065 set_input_codename("ISO-2022-JP");
2066 debug(input_codename);
2068 } else if (c1 == '(') {
2069 if ((c1 = (*i_getc)(f)) == EOF) {
2070 /* don't send bogus code
2076 } else if (c1 == '@'|| c1 == 'B') {
2077 /* This is kanji introduction */
2082 } else if (c1 == 'D'){
2086 #endif /* X0212_ENABLE */
2088 /* could be some special code */
2095 } else if (broken_f&0x2) {
2096 /* accept any ESC-(-x as broken code ... */
2106 } else if (c1 == '(') {
2107 if ((c1 = (*i_getc)(f)) == EOF) {
2108 /* don't send bogus code
2110 (*oconv)(0, '('); */
2114 /* This is X0201 kana introduction */
2115 input_mode = X0201; shift_mode = X0201;
2117 } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2118 /* This is X0208 kanji introduction */
2119 input_mode = ASCII; shift_mode = FALSE;
2121 } else if (broken_f&0x2) {
2122 input_mode = ASCII; shift_mode = FALSE;
2127 /* maintain various input_mode here */
2131 } else if ( c1 == 'N' || c1 == 'n' ){
2133 c3 = (*i_getc)(f); /* skip SS2 */
2134 if ( (SPACE<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2149 } else if ((c1 == NL || c1 == CR) && broken_f&4) {
2150 input_mode = ASCII; set_iconv(FALSE, 0);
2152 } else if (c1 == NL && mime_decode_f && !mime_decode_mode ) {
2153 if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2161 } else if (c1 == CR && mime_decode_f && !mime_decode_mode ) {
2162 if ((c1=(*i_getc)(f))!=EOF) {
2166 } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2182 if (input_mode == X0208)
2183 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2185 else if (input_mode == X0212)
2186 (*oconv)((0x8f << 8) | c2, c1);
2187 #endif /* X0212_ENABLE */
2188 else if (input_mode)
2189 (*oconv)(input_mode, c1); /* other special case */
2190 else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */
2191 int c0 = (*i_getc)(f);
2194 (*iconv)(c2, c1, c0);
2200 /* goto next_word */
2204 (*iconv)(EOF, 0, 0);
2205 if (!is_inputcode_set)
2208 struct input_code *p = input_code_list;
2209 struct input_code *result = p;
2211 if (p->score < result->score) result = p;
2214 set_input_codename(result->name);
2216 set_input_codename("ASCII");
2230 /** it must NOT be in the kanji shifte sequence */
2231 /** it must NOT be written in JIS7 */
2232 /** and it must be after 2 byte 8bit code */
2239 while ((c1 = (*i_getc)(f)) != EOF) {
2245 if (push_hold_buf(c1) == EOF || estab_f){
2251 struct input_code *p = input_code_list;
2252 struct input_code *result = p;
2257 if (p->score < result->score){
2262 set_iconv(FALSE, result->iconv_func);
2267 ** 1) EOF is detected, or
2268 ** 2) Code is established, or
2269 ** 3) Buffer is FULL (but last word is pushed)
2271 ** in 1) and 3) cases, we continue to use
2272 ** Kanji codes by oconv and leave estab_f unchanged.
2277 while (wc < hold_count){
2278 c2 = hold_buf[wc++];
2280 #ifdef NUMCHAR_OPTION
2281 || (c2 & CLASS_MASK) == CLASS_UTF16
2286 }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
2287 (*iconv)(X0201, c2, 0);
2290 if (wc < hold_count){
2291 c1 = hold_buf[wc++];
2300 if ((*iconv)(c2, c1, 0) < 0){
2302 if (wc < hold_count){
2303 c0 = hold_buf[wc++];
2312 (*iconv)(c2, c1, c0);
2325 if (hold_count >= HOLD_SIZE*2)
2327 hold_buf[hold_count++] = c2;
2328 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
2331 int s2e_conv(c2, c1, p2, p1)
2336 #ifdef SHIFTJIS_CP932
2337 if (cp932_f && CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){
2338 extern unsigned short shiftjis_cp932[3][189];
2339 val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
2345 #endif /* SHIFTJIS_CP932 */
2347 if (x0212_f && 0xfa <= c2 && c2 <= 0xfc){
2348 extern unsigned short shiftjis_x0212[3][189];
2349 val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
2352 c2 = (0x8f << 8) | (val >> 8);
2364 c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
2366 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
2374 c2 = x0212_unshift(c2);
2389 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2392 int ret = s2e_conv(c2, c1, &c2, &c1);
2393 if (ret) return ret;
2407 }else if (c2 == 0x8f){
2411 c2 = (c2 << 8) | (c1 & 0x7f);
2413 #ifdef SHIFTJIS_CP932
2416 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2417 s2e_conv(s2, s1, &c2, &c1);
2418 if ((c2 & 0xff00) == 0){
2424 #endif /* SHIFTJIS_CP932 */
2425 #endif /* X0212_ENABLE */
2426 } else if (c2 == SSO){
2429 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2439 #ifdef UTF8_INPUT_ENABLE
2441 w2e_conv(c2, c1, c0, p2, p1)
2445 extern unsigned short * utf8_to_euc_2bytes[];
2446 extern unsigned short ** utf8_to_euc_3bytes[];
2449 if (0xc0 <= c2 && c2 <= 0xef) {
2450 unsigned short **pp;
2453 if (c0 == 0) return -1;
2454 pp = utf8_to_euc_3bytes[c2 - 0x80];
2455 ret = w_iconv_common(c1, c0, pp, sizeof_utf8_to_euc_C2, p2, p1);
2457 ret = w_iconv_common(c2, c1, utf8_to_euc_2bytes, sizeof_utf8_to_euc_2bytes, p2, p1);
2459 #ifdef NUMCHAR_OPTION
2462 if (p1) *p1 = CLASS_UTF16 | ww16_conv(c2, c1, c0);
2467 } else if (c2 == X0201) {
2480 int ret = w2e_conv(c2, c1, c0, &c2, &c1);
2488 w16w_conv(val, p2, p1, p0)
2496 }else if (val < 0x800){
2497 *p2 = 0xc0 | (val >> 6);
2498 *p1 = 0x80 | (val & 0x3f);
2501 *p2 = 0xe0 | (val >> 12);
2502 *p1 = 0x80 | ((val >> 6) & 0x3f);
2503 *p0 = 0x80 | (val & 0x3f);
2508 ww16_conv(c2, c1, c0)
2513 val = (c2 & 0x0f) << 12;
2514 val |= (c1 & 0x3f) << 6;
2516 }else if (c2 >= 0xc0){
2517 val = (c2 & 0x1f) << 6;
2526 w16e_conv(val, p2, p1)
2530 extern unsigned short * utf8_to_euc_2bytes[];
2531 extern unsigned short ** utf8_to_euc_3bytes[];
2533 unsigned short **pp;
2537 w16w_conv(val, &c2, &c1, &c0);
2540 pp = utf8_to_euc_3bytes[c2 - 0x80];
2541 psize = sizeof_utf8_to_euc_C2;
2542 ret = w_iconv_common(c1, c0, pp, psize, p2, p1);
2544 pp = utf8_to_euc_2bytes;
2545 psize = sizeof_utf8_to_euc_2bytes;
2546 ret = w_iconv_common(c2, c1, pp, psize, p2, p1);
2548 #ifdef NUMCHAR_OPTION
2551 *p1 = CLASS_UTF16 | val;
2563 w_iconv16(c2, c1, c0)
2568 if (c2==0376 && c1==0377){
2569 utf16_mode = UTF16LE_INPUT;
2571 } else if (c2==0377 && c1==0376){
2572 utf16_mode = UTF16BE_INPUT;
2575 if (c2 != EOF && utf16_mode == UTF16BE_INPUT) {
2577 tmp=c1; c1=c2; c2=tmp;
2579 if ((c2==0 && c1 < 0x80) || c2==EOF) {
2583 ret = w16e_conv(((c2<<8)&0xff00) + c1, &c2, &c1);
2584 if (ret) return ret;
2590 w_iconv_common(c1, c0, pp, psize, p2, p1)
2592 unsigned short **pp;
2600 if (pp == 0) return 1;
2603 if (c1 < 0 || psize <= c1) return 1;
2605 if (p == 0) return 1;
2608 if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1;
2610 if (val == 0) return 1;
2617 if (c2 == SO) c2 = X0201;
2626 #ifdef UTF8_OUTPUT_ENABLE
2631 extern unsigned short euc_to_utf8_1byte[];
2632 extern unsigned short * euc_to_utf8_2bytes[];
2633 extern unsigned short * euc_to_utf8_2bytes_ms[];
2637 p = euc_to_utf8_1byte;
2639 } else if (c2 >> 8 == 0x8f){
2640 extern unsigned short * x0212_to_utf8_2bytes[];
2641 c2 = (c2&0x7f) - 0x21;
2642 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2643 p = x0212_to_utf8_2bytes[c2];
2649 c2 = (c2&0x7f) - 0x21;
2650 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2651 p = ms_ucs_map_f ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
2656 c1 = (c1 & 0x7f) - 0x21;
2657 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
2673 if (unicode_bom_f==2) {
2680 #ifdef NUMCHAR_OPTION
2681 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2682 w16w_conv(c1, &c2, &c1, &c0);
2686 if (c0) (*o_putc)(c0);
2693 output_mode = ASCII;
2695 } else if (c2 == ISO8859_1) {
2696 output_mode = ISO8859_1;
2697 (*o_putc)(c1 | 0x080);
2701 val = e2w_conv(c2, c1);
2703 w16w_conv(val, &c2, &c1, &c0);
2707 if (c0) (*o_putc)(c0);
2723 if (unicode_bom_f==2) {
2725 (*o_putc)((unsigned char)'\377');
2729 (*o_putc)((unsigned char)'\377');
2734 if (c2 == ISO8859_1) {
2737 #ifdef NUMCHAR_OPTION
2738 } else if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16) {
2739 c2 = (c1 >> 8) & 0xff;
2743 unsigned short val = e2w_conv(c2, c1);
2744 c2 = (val >> 8) & 0xff;
2763 #ifdef NUMCHAR_OPTION
2764 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2765 w16e_conv(c1, &c2, &c1);
2766 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2774 } else if (c2 == 0) {
2775 output_mode = ASCII;
2777 } else if (c2 == X0201) {
2778 output_mode = JAPANESE_EUC;
2779 (*o_putc)(SSO); (*o_putc)(c1|0x80);
2780 } else if (c2 == ISO8859_1) {
2781 output_mode = ISO8859_1;
2782 (*o_putc)(c1 | 0x080);
2784 } else if ((c2 & 0xff00) >> 8 == 0x8f){
2785 output_mode = JAPANESE_EUC;
2786 #ifdef SHIFTJIS_CP932
2789 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2790 s2e_conv(s2, s1, &c2, &c1);
2794 if ((c2 & 0xff00) >> 8 == 0x8f){
2797 (*o_putc)((c2 & 0x7f) | 0x080);
2798 (*o_putc)(c1 | 0x080);
2801 (*o_putc)((c2 & 0x7f) | 0x080);
2802 (*o_putc)(c1 | 0x080);
2806 if ((c1<0x21 || 0x7e<c1) ||
2807 (c2<0x21 || 0x7e<c2)) {
2808 set_iconv(FALSE, 0);
2809 return; /* too late to rescue this char */
2811 output_mode = JAPANESE_EUC;
2812 (*o_putc)(c2 | 0x080);
2813 (*o_putc)(c1 | 0x080);
2823 if ((ret & 0xff00) == 0x8f00){
2824 if (0x75 <= c && c <= 0x7f){
2825 ret = c + (0x109 - 0x75);
2828 if (0x75 <= c && c <= 0x7f){
2829 ret = c + (0x113 - 0x75);
2836 int x0212_unshift(c)
2840 if (0x7f <= c && c <= 0x88){
2841 ret = c + (0x75 - 0x7f);
2842 }else if (0x89 <= c && c <= 0x92){
2843 ret = (0x8f << 8) | 0x80 | (c + (0x75 - 0x89));
2847 #endif /* X0212_ENABLE */
2850 e2s_conv(c2, c1, p2, p1)
2851 int c2, c1, *p2, *p1;
2854 unsigned short *ptr;
2856 extern unsigned short *x0212_shiftjis[];
2858 if ((c2 & 0xff00) == 0x8f00){
2860 if (0x21 <= ndx && ndx <= 0x7e){
2861 ptr = x0212_shiftjis[ndx - 0x21];
2863 val = ptr[(c1 & 0x7f) - 0x21];
2873 c2 = x0212_shift(c2);
2875 #endif /* X0212_ENABLE */
2876 if ((c2 & 0xff00) == 0x8f00){
2879 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
2880 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
2889 #ifdef NUMCHAR_OPTION
2890 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2891 w16e_conv(c1, &c2, &c1);
2897 } else if (c2 == 0) {
2898 output_mode = ASCII;
2900 } else if (c2 == X0201) {
2901 output_mode = SHIFT_JIS;
2903 } else if (c2 == ISO8859_1) {
2904 output_mode = ISO8859_1;
2905 (*o_putc)(c1 | 0x080);
2907 } else if ((c2 & 0xff00) >> 8 == 0x8f){
2908 output_mode = SHIFT_JIS;
2909 if (e2s_conv(c2, c1, &c2, &c1) == 0){
2915 if ((c1<0x20 || 0x7e<c1) ||
2916 (c2<0x20 || 0x7e<c2)) {
2917 set_iconv(FALSE, 0);
2918 return; /* too late to rescue this char */
2920 output_mode = SHIFT_JIS;
2921 e2s_conv(c2, c1, &c2, &c1);
2923 #ifdef SHIFTJIS_CP932
2925 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2926 extern unsigned short cp932inv[2][189];
2927 int c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
2933 #endif /* SHIFTJIS_CP932 */
2936 if (prefix_table[(unsigned char)c1]){
2937 (*o_putc)(prefix_table[(unsigned char)c1]);
2948 #ifdef NUMCHAR_OPTION
2949 if ((c1 & CLASS_MASK) == CLASS_UTF16){
2950 w16e_conv(c1, &c2, &c1);
2954 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2957 (*o_putc)(ascii_intro);
2958 output_mode = ASCII;
2962 } else if ((c2 & 0xff00) >> 8 == 0x8f){
2963 if (output_mode!=X0212) {
2964 output_mode = X0212;
2970 (*o_putc)(c2 & 0x7f);
2973 } else if (c2==X0201) {
2974 if (output_mode!=X0201) {
2975 output_mode = X0201;
2981 } else if (c2==ISO8859_1) {
2982 /* iso8859 introduction, or 8th bit on */
2983 /* Can we convert in 7bit form using ESC-'-'-A ?
2985 output_mode = ISO8859_1;
2987 } else if (c2 == 0) {
2988 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2991 (*o_putc)(ascii_intro);
2992 output_mode = ASCII;
2996 if (output_mode != X0208) {
2997 output_mode = X0208;
3000 (*o_putc)(kanji_intro);
3002 if (c1<0x20 || 0x7e<c1)
3004 if (c2<0x20 || 0x7e<c2)
3016 mime_prechar(c2, c1);
3017 (*o_base64conv)(c2,c1);
3021 static int broken_buf[3];
3022 static int broken_counter = 0;
3023 static int broken_last = 0;
3030 if (broken_counter>0) {
3031 return broken_buf[--broken_counter];
3034 if (c=='$' && broken_last != ESC
3035 && (input_mode==ASCII || input_mode==X0201)) {
3038 if (c1=='@'|| c1=='B') {
3039 broken_buf[0]=c1; broken_buf[1]=c;
3046 } else if (c=='(' && broken_last != ESC
3047 && (input_mode==X0208 || input_mode==X0201)) { /* ) */
3050 if (c1=='J'|| c1=='B') {
3051 broken_buf[0]=c1; broken_buf[1]=c;
3069 if (broken_counter<2)
3070 broken_buf[broken_counter++]=c;
3074 static int prev_cr = 0;
3082 if (! (c2==0&&c1==NL) ) {
3088 } else if (c1=='\r') {
3090 } else if (c1=='\n') {
3091 if (crmode_f==CRLF) {
3092 (*o_crconv)(0,'\r');
3093 } else if (crmode_f==CR) {
3094 (*o_crconv)(0,'\r');
3098 } else if (c1!='\032' || crmode_f!=NL){
3104 Return value of fold_conv()
3106 \n add newline and output char
3107 \r add newline and output nothing
3110 1 (or else) normal output
3112 fold state in prev (previous character)
3114 >0x80 Japanese (X0208/X0201)
3119 This fold algorthm does not preserve heading space in a line.
3120 This is the main difference from fmt.
3123 #define char_size(c2,c1) (c2?2:1)
3132 if (c1== '\r' && !fold_preserve_f) {
3133 fold_state=0; /* ignore cr */
3134 }else if (c1== '\n'&&f_prev=='\r' && fold_preserve_f) {
3136 fold_state=0; /* ignore cr */
3137 } else if (c1== BS) {
3138 if (f_line>0) f_line--;
3140 } else if (c2==EOF && f_line != 0) { /* close open last line */
3142 } else if ((c1=='\n' && !fold_preserve_f)
3143 || ((c1=='\r'||(c1=='\n'&&f_prev!='\r'))
3144 && fold_preserve_f)) {
3146 if (fold_preserve_f) {
3150 } else if ((f_prev == c1 && !fold_preserve_f)
3151 || (f_prev == '\n' && fold_preserve_f)
3152 ) { /* duplicate newline */
3155 fold_state = '\n'; /* output two newline */
3161 if (f_prev&0x80) { /* Japanese? */
3163 fold_state = 0; /* ignore given single newline */
3164 } else if (f_prev==' ') {
3168 if (++f_line<=fold_len)
3172 fold_state = '\r'; /* fold and output nothing */
3176 } else if (c1=='\f') {
3181 fold_state = '\n'; /* output newline and clear */
3182 } else if ( (c2==0 && c1==' ')||
3183 (c2==0 && c1=='\t')||
3184 (c2=='!'&& c1=='!')) {
3185 /* X0208 kankaku or ascii space */
3186 if (f_prev == ' ') {
3187 fold_state = 0; /* remove duplicate spaces */
3190 if (++f_line<=fold_len)
3191 fold_state = ' '; /* output ASCII space only */
3193 f_prev = ' '; f_line = 0;
3194 fold_state = '\r'; /* fold and output nothing */
3198 prev0 = f_prev; /* we still need this one... , but almost done */
3200 if (c2 || c2==X0201)
3201 f_prev |= 0x80; /* this is Japanese */
3202 f_line += char_size(c2,c1);
3203 if (f_line<=fold_len) { /* normal case */
3206 if (f_line>=fold_len+fold_margin) { /* too many kinsou suspension */
3207 f_line = char_size(c2,c1);
3208 fold_state = '\n'; /* We can't wait, do fold now */
3209 } else if (c2==X0201) {
3210 /* simple kinsoku rules return 1 means no folding */
3211 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
3212 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
3213 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
3214 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
3215 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
3216 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
3217 else if (SPACE<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
3219 fold_state = '\n';/* add one new f_line before this character */
3222 fold_state = '\n';/* add one new f_line before this character */
3225 /* kinsoku point in ASCII */
3226 if ( c1==')'|| /* { [ ( */
3237 /* just after special */
3238 } else if (!is_alnum(prev0)) {
3239 f_line = char_size(c2,c1);
3241 } else if ((prev0==' ') || /* ignored new f_line */
3242 (prev0=='\n')|| /* ignored new f_line */
3243 (prev0&0x80)) { /* X0208 - ASCII */
3244 f_line = char_size(c2,c1);
3245 fold_state = '\n';/* add one new f_line before this character */
3247 fold_state = 1; /* default no fold in ASCII */
3251 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
3252 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
3253 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
3254 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
3255 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
3256 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
3257 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
3258 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
3259 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
3260 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
3261 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
3262 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
3263 /* default no fold in kinsoku */
3266 f_line = char_size(c2,c1);
3267 /* add one new f_line before this character */
3270 f_line = char_size(c2,c1);
3272 /* add one new f_line before this character */
3277 /* terminator process */
3278 switch(fold_state) {
3297 int z_prev2=0,z_prev1=0;
3304 /* if (c2) c1 &= 0x7f; assertion */
3306 if (x0201_f && z_prev2==X0201) { /* X0201 */
3307 if (c1==(0xde&0x7f)) { /*
\e$BByE@
\e(B */
3309 (*o_zconv)(dv[(z_prev1-SPACE)*2],dv[(z_prev1-SPACE)*2+1]);
3311 } else if (c1==(0xdf&0x7f)&&ev[(z_prev1-SPACE)*2]) { /*
\e$BH>ByE@
\e(B */
3313 (*o_zconv)(ev[(z_prev1-SPACE)*2],ev[(z_prev1-SPACE)*2+1]);
3317 (*o_zconv)(cv[(z_prev1-SPACE)*2],cv[(z_prev1-SPACE)*2+1]);
3326 if (x0201_f && c2==X0201) {
3327 if (dv[(c1-SPACE)*2]||ev[(c1-SPACE)*2]) {
3328 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
3329 z_prev1 = c1; z_prev2 = c2;
3332 (*o_zconv)(cv[(c1-SPACE)*2],cv[(c1-SPACE)*2+1]);
3337 /* JISX0208 Alphabet */
3338 if (alpha_f && c2 == 0x23 ) {
3340 } else if (alpha_f && c2 == 0x21 ) {
3341 /* JISX0208 Kigou */
3346 } else if (alpha_f&0x4) {
3351 } else if (0x20<c1 && c1<0x7f && fv[c1-0x20]) {
3357 case '>': entity = ">"; break;
3358 case '<': entity = "<"; break;
3359 case '\"': entity = """; break;
3360 case '&': entity = "&"; break;
3363 while (*entity) (*o_zconv)(0, *entity++);
3373 #define rot13(c) ( \
3375 (c <= 'M') ? (c + 13): \
3376 (c <= 'Z') ? (c - 13): \
3378 (c <= 'm') ? (c + 13): \
3379 (c <= 'z') ? (c - 13): \
3383 #define rot47(c) ( \
3385 ( c <= 'O' ) ? (c + 47) : \
3386 ( c <= '~' ) ? (c - 47) : \
3394 if (c2==0 || c2==X0201 || c2==ISO8859_1) {
3400 (*o_rot_conv)(c2,c1);
3407 if ((hira_f & 1) && c2==0x25 && 0x20<c1 && c1<0x74) {
3409 } else if ((hira_f & 2) && c2==0x24 && 0x20<c1 && c1<0x74) {
3412 (*o_hira_conv)(c2,c1);
3417 iso2022jp_check_conv(c2,c1)
3420 static int range[RANGE_NUM_MAX][2] = {
3443 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
3447 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
3452 for (i = 0; i < RANGE_NUM_MAX; i++) {
3453 start = range[i][0];
3456 if (c >= start && c <= end) {
3461 (*o_iso2022jp_check_conv)(c2,c1);
3465 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
3467 unsigned char *mime_pattern[] = {
3468 (unsigned char *)"\075?EUC-JP?B?",
3469 (unsigned char *)"\075?SHIFT_JIS?B?",
3470 (unsigned char *)"\075?ISO-8859-1?Q?",
3471 (unsigned char *)"\075?ISO-8859-1?B?",
3472 (unsigned char *)"\075?ISO-2022-JP?B?",
3473 (unsigned char *)"\075?ISO-2022-JP?Q?",
3474 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3475 (unsigned char *)"\075?UTF-8?B?",
3476 (unsigned char *)"\075?UTF-8?Q?",
3478 (unsigned char *)"\075?US-ASCII?Q?",
3483 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
3484 int (*mime_priority_func[])PROTO((int c2, int c1, int c0)) = {
3485 e_iconv, s_iconv, 0, 0, 0, 0,
3486 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3492 int mime_encode[] = {
3493 JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201,
3494 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3501 int mime_encode_method[] = {
3502 'B', 'B','Q', 'B', 'B', 'Q',
3503 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3511 #define MAXRECOVER 20
3513 /* I don't trust portablity of toupper */
3514 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
3515 #define nkf_isdigit(c) ('0'<=c && c<='9')
3516 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
3517 #define nkf_isblank(c) (c == SPACE || c == TAB)
3518 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == NL)
3519 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
3520 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
3525 if (i_getc!=mime_getc) {
3526 i_mgetc = i_getc; i_getc = mime_getc;
3527 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
3528 if(mime_f==STRICT_MIME) {
3529 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
3530 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
3536 unswitch_mime_getc()
3538 if(mime_f==STRICT_MIME) {
3539 i_mgetc = i_mgetc_buf;
3540 i_mungetc = i_mungetc_buf;
3543 i_ungetc = i_mungetc;
3547 mime_begin_strict(f)
3552 unsigned char *p,*q;
3553 int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
3555 mime_decode_mode = FALSE;
3556 /* =? has been checked */
3558 p = mime_pattern[j];
3561 for(i=2;p[i]>' ';i++) { /* start at =? */
3562 if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) {
3563 /* pattern fails, try next one */
3565 while ((p = mime_pattern[++j])) {
3566 for(k=2;k<i;k++) /* assume length(p) > i */
3567 if (p[k]!=q[k]) break;
3568 if (k==i && nkf_toupper(c1)==p[k]) break;
3570 if (p) continue; /* found next one, continue */
3571 /* all fails, output from recovery buffer */
3579 mime_decode_mode = p[i-2];
3581 set_iconv(FALSE, mime_priority_func[j]);
3582 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
3584 if (mime_decode_mode=='B') {
3585 mimebuf_f = unbuf_f;
3587 /* do MIME integrity check */
3588 return mime_integrity(f,mime_pattern[j]);
3600 /* we don't keep eof of Fifo, becase it contains ?= as
3601 a terminator. It was checked in mime_integrity. */
3602 return ((mimebuf_f)?
3603 (*i_mgetc_buf)(f):Fifo(mime_input++));
3607 mime_ungetc_buf(c,f)
3612 (*i_mungetc_buf)(c,f);
3614 Fifo(--mime_input)=c;
3625 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
3626 /* re-read and convert again from mime_buffer. */
3628 /* =? has been checked */
3630 Fifo(mime_last++)='='; Fifo(mime_last++)='?';
3631 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
3632 /* We accept any character type even if it is breaked by new lines */
3633 c1 = (*i_getc)(f); Fifo(mime_last++)= c1 ;
3634 if (c1=='\n'||c1==' '||c1=='\r'||
3635 c1=='-'||c1=='_'||is_alnum(c1) ) continue;
3637 /* Failed. But this could be another MIME preemble */
3645 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3646 if (!(++i<MAXRECOVER) || c1==EOF) break;
3647 if (c1=='b'||c1=='B') {
3648 mime_decode_mode = 'B';
3649 } else if (c1=='q'||c1=='Q') {
3650 mime_decode_mode = 'Q';
3654 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3655 if (!(++i<MAXRECOVER) || c1==EOF) break;
3657 mime_decode_mode = FALSE;
3663 if (!mime_decode_mode) {
3664 /* false MIME premble, restart from mime_buffer */
3665 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
3666 /* Since we are in MIME mode until buffer becomes empty, */
3667 /* we never go into mime_begin again for a while. */
3670 /* discard mime preemble, and goto MIME mode */
3672 /* do no MIME integrity check */
3673 return c1; /* used only for checking EOF */
3688 fprintf(stderr, "%s\n", str);
3694 set_input_codename (codename)
3699 strcmp(codename, "") != 0 &&
3700 strcmp(codename, input_codename) != 0)
3702 is_inputcode_mixed = TRUE;
3704 input_codename = codename;
3705 is_inputcode_set = TRUE;
3710 print_guessed_code (filename)
3713 char *codename = "BINARY";
3714 if (!is_inputcode_mixed) {
3715 if (strcmp(input_codename, "") == 0) {
3718 codename = input_codename;
3721 if (filename != NULL) printf("%s:", filename);
3722 printf("%s\n", codename);
3730 if (nkf_isdigit(x)) return x - '0';
3731 return nkf_toupper(x) - 'A' + 10;
3736 #ifdef ANSI_C_PROTOTYPE
3737 int hex_getc(int ch, FILE *f, int (*g)(FILE *f), int (*u)(int c, FILE *f))
3740 hex_getc(ch, f, g, u)
3753 if (!nkf_isxdigit(c2)){
3758 if (!nkf_isxdigit(c3)){
3763 return (hex2bin(c2) << 4) | hex2bin(c3);
3770 return hex_getc(':', f, i_cgetc, i_cungetc);
3778 return (*i_cungetc)(c, f);