1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
5 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
6 ** Copyright (C) 1996,1998
8 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
9 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
10 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
11 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
13 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
14 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
15 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
16 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
17 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
18 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
20 ** Everyone is permitted to do anything on this program
21 ** including copying, modifying, improving,
22 ** as long as you don't try to pretend that you wrote it.
23 ** i.e., the above copyright notice has to appear in all copies.
24 ** Binary distribution requires original version messages.
25 ** You don't have to ask before copying, redistribution or publishing.
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
29 /***********************************************************************
30 ** UTF-8
\e$B%5%]!<%H$K$D$$$F
\e(B
31 **
\e$B=>Mh$N
\e(B nkf
\e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9
\e(B
32 ** nkf -e
\e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G
\e(B UTF-8
\e$B$HH=Dj$5$l$l$P!"
\e(B
33 **
\e$B$=$N$^$^
\e(B euc-jp
\e$B$KJQ49$5$l$^$9
\e(B
35 **
\e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#
\e(B
36 ** (
\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O
\e(B)
38 **
\e$B2?$+LdBj$r8+$D$1$?$i!"
\e(B
39 ** E-Mail: furukawa@tcp-ip.or.jp
40 **
\e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#
\e(B
41 ***********************************************************************/
44 static char *CopyRight =
45 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2004 Kono, Furukawa";
46 static char *Version =
48 static char *Patchlevel =
55 ** USAGE: nkf [flags] [file]
58 ** b Output is buffered (DEFAULT)
59 ** u Output is unbuffered
63 ** j Outout code is JIS 7 bit (DEFAULT SELECT)
64 ** s Output code is MS Kanji (DEFAULT SELECT)
65 ** e Output code is AT&T JIS (DEFAULT SELECT)
66 ** w Output code is AT&T JIS (DEFAULT SELECT)
67 ** l Output code is JIS 7bit and ISO8859-1 Latin-1
69 ** m MIME conversion for ISO-2022-JP
70 ** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
71 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
72 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
73 ** M MIME output conversion
75 ** r {de/en}crypt ROT13/47
79 ** T Text mode output (for MS-DOS)
81 ** x Do not convert X0201 kana into X0208
82 ** Z Convert X0208 alphabet to ASCII
87 ** B try to fix broken JIS, missing Escape
88 ** B[1-9] broken level
90 ** O Output to 'nkf.out' file or last file name
91 ** d Delete \r in line feed
92 ** c Add \r in line feed
93 ** -- other long option
94 ** -- ignore following option (don't use with -O )
98 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__)) && !defined(MSDOS)
100 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
113 #if defined(MSDOS) || defined(__OS2__)
120 #define setbinmode(fp) fsetbin(fp)
121 #else /* Microsoft C, Turbo C */
122 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
124 #else /* UNIX,OS/2 */
125 #define setbinmode(fp)
128 #ifdef _IOFBF /* SysV and MSDOS, Windows */
129 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
131 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
134 /*Borland C++ 4.5 EasyWin*/
135 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
144 /* added by satoru@isoternet.org */
147 #include <sys/stat.h>
148 #ifndef MSDOS /* UNIX, OS/2 */
152 #if defined(_MSC_VER) || defined(__MINGW32__) /* VC++, MinGW */
153 #include <sys/utime.h>
154 #elif defined(__TURBOC__) /* BCC */
156 #elif defined(LSI_C) /* LSI C */
168 /* state of output_mode and input_mode
185 /* Input Assumption */
189 #define LATIN1_INPUT 6
191 #define STRICT_MIME 8
196 #define JAPANESE_EUC 10
200 #define UTF8_INPUT 13
201 #define UTF16_INPUT 14
202 #define UTF16BE_INPUT 15
220 #define is_alnum(c) \
221 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
223 #define HOLD_SIZE 1024
224 #define IOBUF_SIZE 16384
226 #define DEFAULT_J 'B'
227 #define DEFAULT_R 'B'
229 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
230 #define SJ6394 0x0161 /* 63 - 94 ku offset */
232 #define RANGE_NUM_MAX 18
237 #if defined( UTF8_OUTPUT_ENABLE ) || defined( UTF8_INPUT_ENABLE )
238 #define sizeof_euc_utf8 94
239 #define sizeof_euc_to_utf8_1byte 94
240 #define sizeof_euc_to_utf8_2bytes 94
241 #define sizeof_utf8_to_euc_C2 64
242 #define sizeof_utf8_to_euc_E5B8 64
243 #define sizeof_utf8_to_euc_2bytes 112
244 #define sizeof_utf8_to_euc_3bytes 112
247 /* MIME preprocessor */
250 #ifdef EASYWIN /*Easy Win */
251 extern POINT _BufferSize;
254 /* function prototype */
256 #ifdef ANSI_C_PROTOTYPE
258 #define STATIC static
270 void (*status_func)PROTO((struct input_code *, int));
271 int (*iconv_func)PROTO((int c2, int c1, int c0));
275 STATIC char *input_codename = "";
277 STATIC int noconvert PROTO((FILE *f));
278 STATIC int kanji_convert PROTO((FILE *f));
279 STATIC int h_conv PROTO((FILE *f,int c2,int c1));
280 STATIC int push_hold_buf PROTO((int c2));
281 STATIC void set_iconv PROTO((int f, int (*iconv_func)()));
282 STATIC int s_iconv PROTO((int c2,int c1,int c0));
283 STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
284 STATIC int e_iconv PROTO((int c2,int c1,int c0));
285 #ifdef UTF8_INPUT_ENABLE
286 STATIC int w2e_conv PROTO((int c2,int c1,int c0,int *p2,int *p1));
287 STATIC int w_iconv PROTO((int c2,int c1,int c0));
288 STATIC int w_iconv16 PROTO((int c2,int c1,int c0));
289 STATIC int w_iconv_common PROTO((int c1,int c0,unsigned short **pp,int psize,int *p2,int *p1));
290 STATIC int ww16_conv PROTO((int c2, int c1, int c0));
292 #ifdef UTF8_OUTPUT_ENABLE
293 STATIC int e2w_conv PROTO((int c2,int c1));
294 STATIC void w_oconv PROTO((int c2,int c1));
295 STATIC void w_oconv16 PROTO((int c2,int c1));
297 STATIC void e_oconv PROTO((int c2,int c1));
298 STATIC void e2s_conv PROTO((int c2, int c1, int *p2, int *p1));
299 STATIC void s_oconv PROTO((int c2,int c1));
300 STATIC void j_oconv PROTO((int c2,int c1));
301 STATIC void fold_conv PROTO((int c2,int c1));
302 STATIC void cr_conv PROTO((int c2,int c1));
303 STATIC void z_conv PROTO((int c2,int c1));
304 STATIC void rot_conv PROTO((int c2,int c1));
305 STATIC void hira_conv PROTO((int c2,int c1));
306 STATIC void base64_conv PROTO((int c2,int c1));
307 STATIC void iso2022jp_check_conv PROTO((int c2,int c1));
308 STATIC void no_connection PROTO((int c2,int c1));
309 STATIC int no_connection2 PROTO((int c2,int c1,int c0));
311 STATIC void code_score PROTO((struct input_code *ptr));
312 STATIC void code_status PROTO((int c));
314 STATIC void std_putc PROTO((int c));
315 STATIC int std_getc PROTO((FILE *f));
316 STATIC int std_ungetc PROTO((int c,FILE *f));
318 STATIC int broken_getc PROTO((FILE *f));
319 STATIC int broken_ungetc PROTO((int c,FILE *f));
321 STATIC int mime_begin PROTO((FILE *f));
322 STATIC int mime_getc PROTO((FILE *f));
323 STATIC int mime_ungetc PROTO((int c,FILE *f));
325 STATIC int mime_begin_strict PROTO((FILE *f));
326 STATIC int mime_getc_buf PROTO((FILE *f));
327 STATIC int mime_ungetc_buf PROTO((int c,FILE *f));
328 STATIC int mime_integrity PROTO((FILE *f,unsigned char *p));
330 STATIC int base64decode PROTO((int c));
331 STATIC void mime_putc PROTO((int c));
332 STATIC void open_mime PROTO((int c));
333 STATIC void close_mime PROTO(());
334 STATIC void usage PROTO(());
335 STATIC void version PROTO(());
336 STATIC void options PROTO((unsigned char *c));
338 STATIC void reinit PROTO(());
343 static unsigned char stdibuf[IOBUF_SIZE];
344 static unsigned char stdobuf[IOBUF_SIZE];
345 static unsigned char hold_buf[HOLD_SIZE*2];
346 static int hold_count;
348 /* MIME preprocessor fifo */
350 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
351 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
352 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
353 static unsigned char mime_buf[MIME_BUF_SIZE];
354 static unsigned int mime_top = 0;
355 static unsigned int mime_last = 0; /* decoded */
356 static unsigned int mime_input = 0; /* undecoded */
359 static int unbuf_f = FALSE;
360 static int estab_f = FALSE;
361 static int nop_f = FALSE;
362 static int binmode_f = TRUE; /* binary mode */
363 static int rot_f = FALSE; /* rot14/43 mode */
364 static int hira_f = FALSE; /* hira/kata henkan */
365 static int input_f = FALSE; /* non fixed input code */
366 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
367 static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
368 static int mimebuf_f = FALSE; /* MIME buffered input */
369 static int broken_f = FALSE; /* convert ESC-less broken JIS */
370 static int iso8859_f = FALSE; /* ISO8859 through */
371 static int mimeout_f = FALSE; /* base64 mode */
372 #if defined(MSDOS) || defined(__OS2__)
373 static int x0201_f = TRUE; /* Assume JISX0201 kana */
375 static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
377 static int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
378 #ifdef UTF8_OUTPUT_ENABLE
379 static int w_oconv16_begin_f= 0; /* utf-16 header */
380 static int w_oconv16_LE = 0; /* utf-16 little endian */
384 #ifdef NUMCHAR_OPTION
386 #define CLASS_MASK 0x0f000000
387 #define CLASS_UTF16 0x01000000
391 static int cap_f = FALSE;
392 static int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */
393 static int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc;
394 STATIC int cap_getc PROTO((FILE *f));
395 STATIC int cap_ungetc PROTO((int c,FILE *f));
397 static int url_f = FALSE;
398 static int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
399 static int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc;
400 STATIC int url_getc PROTO((FILE *f));
401 STATIC int url_ungetc PROTO((int c,FILE *f));
403 static int numchar_f = FALSE;
404 static int (*i_ngetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
405 static int (*i_nungetc)PROTO((int c ,FILE *f)) = std_ungetc;
406 STATIC int numchar_getc PROTO((FILE *f));
407 STATIC int numchar_ungetc PROTO((int c,FILE *f));
411 static int noout_f = FALSE;
412 STATIC void no_putc PROTO((int c));
413 static int debug_f = FALSE;
414 STATIC void debug PROTO((char *str));
417 static int guess_f = FALSE;
418 STATIC void print_guessed_code PROTO((char *filename));
419 STATIC void set_input_codename PROTO((char *codename));
420 static int is_inputcode_mixed = FALSE;
421 static int is_inputcode_set = FALSE;
424 static int exec_f = 0;
427 #ifdef SHIFTJIS_CP932
428 STATIC int cp932_f = TRUE;
429 #define CP932_TABLE_BEGIN (0xfa)
430 #define CP932_TABLE_END (0xfc)
432 STATIC int cp932inv_f = FALSE;
433 #define CP932INV_TABLE_BEGIN (0xed)
434 #define CP932INV_TABLE_END (0xee)
436 #endif /* SHIFTJIS_CP932 */
438 STATIC unsigned char prefix_table[256];
440 STATIC void e_status PROTO((struct input_code *, int));
441 STATIC void s_status PROTO((struct input_code *, int));
443 #ifdef UTF8_INPUT_ENABLE
444 STATIC void w_status PROTO((struct input_code *, int));
445 STATIC void w16_status PROTO((struct input_code *, int));
446 static int utf16_mode = UTF16_INPUT;
449 struct input_code input_code_list[] = {
450 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
451 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
452 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
453 {"UTF-16", 0, 0, 0, {0, 0, 0}, w16_status, w_iconv16, 0},
457 static int mimeout_mode = 0;
458 static int base64_count = 0;
460 /* X0208 -> ASCII converter */
463 static int f_line = 0; /* chars in line */
464 static int f_prev = 0;
465 static int fold_preserve_f = FALSE; /* preserve new lines */
466 static int fold_f = FALSE;
467 static int fold_len = 0;
470 static unsigned char kanji_intro = DEFAULT_J,
471 ascii_intro = DEFAULT_R;
475 #define FOLD_MARGIN 10
476 #define DEFAULT_FOLD 60
478 static int fold_margin = FOLD_MARGIN;
482 #ifdef DEFAULT_CODE_JIS
483 # define DEFAULT_CONV j_oconv
485 #ifdef DEFAULT_CODE_SJIS
486 # define DEFAULT_CONV s_oconv
488 #ifdef DEFAULT_CODE_EUC
489 # define DEFAULT_CONV e_oconv
491 #ifdef DEFAULT_CODE_UTF8
492 # define DEFAULT_CONV w_oconv
495 /* process default */
496 static void (*output_conv)PROTO((int c2,int c1)) = DEFAULT_CONV;
498 static void (*oconv)PROTO((int c2,int c1)) = no_connection;
499 /* s_iconv or oconv */
500 static int (*iconv)PROTO((int c2,int c1,int c0)) = no_connection2;
502 static void (*o_zconv)PROTO((int c2,int c1)) = no_connection;
503 static void (*o_fconv)PROTO((int c2,int c1)) = no_connection;
504 static void (*o_crconv)PROTO((int c2,int c1)) = no_connection;
505 static void (*o_rot_conv)PROTO((int c2,int c1)) = no_connection;
506 static void (*o_hira_conv)PROTO((int c2,int c1)) = no_connection;
507 static void (*o_base64conv)PROTO((int c2,int c1)) = no_connection;
508 static void (*o_iso2022jp_check_conv)PROTO((int c2,int c1)) = no_connection;
510 /* static redirections */
512 static void (*o_putc)PROTO((int c)) = std_putc;
514 static int (*i_getc)PROTO((FILE *f)) = std_getc; /* general input */
515 static int (*i_ungetc)PROTO((int c,FILE *f)) =std_ungetc;
517 static int (*i_bgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
518 static int (*i_bungetc)PROTO((int c ,FILE *f)) = std_ungetc;
520 static void (*o_mputc)PROTO((int c)) = std_putc ; /* output of mputc */
522 static int (*i_mgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
523 static int (*i_mungetc)PROTO((int c ,FILE *f)) = std_ungetc;
525 /* for strict mime */
526 static int (*i_mgetc_buf)PROTO((FILE *)) = std_getc; /* input of mgetc_buf */
527 static int (*i_mungetc_buf)PROTO((int c,FILE *f)) = std_ungetc;
530 static int output_mode = ASCII, /* output kanji mode */
531 input_mode = ASCII, /* input kanji mode */
532 shift_mode = FALSE; /* TRUE shift out, or X0201 */
533 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
535 /* X0201 / X0208 conversion tables */
537 /* X0201 kana conversion table */
540 unsigned char cv[]= {
541 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
542 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
543 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
544 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
545 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
546 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
547 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
548 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
549 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
550 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
551 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
552 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
553 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
554 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
555 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
556 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
560 /* X0201 kana conversion table for daguten */
563 unsigned char dv[]= {
564 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
565 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
566 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
567 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
568 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
569 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
570 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
571 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
572 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
573 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
574 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
575 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
576 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
577 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
578 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
579 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
582 /* X0201 kana conversion table for han-daguten */
585 unsigned char ev[]= {
586 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
587 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
588 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
589 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
590 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
591 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
592 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
593 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
594 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
595 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
596 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
597 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
598 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
601 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
605 /* X0208 kigou conversion table */
606 /* 0x8140 - 0x819e */
608 unsigned char fv[] = {
610 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
611 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
612 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
613 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
614 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
615 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
616 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
617 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
618 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
619 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
620 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
621 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
627 static int file_out = FALSE;
629 static int overwrite = FALSE;
632 static int crmode_f = 0; /* CR, NL, CRLF */
633 #ifdef EASYWIN /*Easy Win */
634 static int end_check;
649 #ifdef EASYWIN /*Easy Win */
650 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
653 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
654 cp = (unsigned char *)*argv;
659 if (pipe(fds) < 0 || (pid = fork()) < 0){
670 execvp(argv[1], &argv[1]);
684 if(x0201_f == WISH_TRUE)
685 x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
687 if (binmode_f == TRUE)
689 if (freopen("","wb",stdout) == NULL)
696 setbuf(stdout, (char *) NULL);
698 setvbuffer(stdout, stdobuf, IOBUF_SIZE);
701 if (binmode_f == TRUE)
703 if (freopen("","rb",stdin) == NULL) return (-1);
707 setvbuffer(stdin, stdibuf, IOBUF_SIZE);
711 kanji_convert(stdin);
712 if (guess_f) print_guessed_code(NULL);
717 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
726 /* reopen file for stdout */
727 if (file_out == TRUE) {
730 outfname = malloc(strlen(origfname)
731 + strlen(".nkftmpXXXXXX")
737 strcpy(outfname, origfname);
741 for (i = strlen(outfname); i; --i){
742 if (outfname[i - 1] == '/'
743 || outfname[i - 1] == '\\'){
749 strcat(outfname, "ntXXXXXX");
751 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC,
754 strcat(outfname, ".nkftmpXXXXXX");
755 fd = mkstemp(outfname);
758 || (fd_backup = dup(fileno(stdout))) < 0
759 || dup2(fd, fileno(stdout)) < 0
770 outfname = "nkf.out";
773 if(freopen(outfname, "w", stdout) == NULL) {
777 if (binmode_f == TRUE) {
779 if (freopen("","wb",stdout) == NULL)
786 if (binmode_f == TRUE)
788 if (freopen("","rb",fin) == NULL)
793 setvbuffer(fin, stdibuf, IOBUF_SIZE);
797 char *filename = NULL;
799 if (nfiles > 1) filename = origfname;
800 if (guess_f) print_guessed_code(filename);
806 #if defined(MSDOS) && !defined(__MINGW32__)
814 if (dup2(fd_backup, fileno(stdout)) < 0){
817 if (stat(origfname, &sb)) {
818 fprintf(stderr, "Can't stat %s\n", origfname);
820 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
821 if (chmod(outfname, sb.st_mode)) {
822 fprintf(stderr, "Can't set permission %s\n", outfname);
825 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
826 #if defined(MSDOS) && !defined(__MINGW32__)
827 tb[0] = tb[1] = sb.st_mtime;
828 if (utime(outfname, tb)) {
829 fprintf(stderr, "Can't set timestamp %s\n", outfname);
832 tb.actime = sb.st_atime;
833 tb.modtime = sb.st_mtime;
834 if (utime(outfname, &tb)) {
835 fprintf(stderr, "Can't set timestamp %s\n", outfname);
839 if (unlink(origfname)){
843 if (rename(outfname, origfname)) {
845 fprintf(stderr, "Can't rename %s to %s\n",
846 outfname, origfname);
854 #ifdef EASYWIN /*Easy Win */
855 if (file_out == FALSE)
856 scanf("%d",&end_check);
859 #else /* for Other OS */
860 if (file_out == TRUE)
890 {"katakana-hiragana","h3"},
892 #ifdef UTF8_OUTPUT_ENABLE
896 #ifdef UTF8_INPUT_ENABLE
898 {"utf16-input", "W16"},
907 #ifdef NUMCHAR_OPTION
908 {"numchar-input", ""},
914 #ifdef SHIFTJIS_CP932
925 static int option_mode;
940 case '-': /* literal options */
941 if (!*cp) { /* ignore the rest of arguments */
945 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
947 p = (unsigned char *)long_option[i].name;
948 for (j=0;*p && (*p != '=') && *p == cp[j];p++, j++);
956 cp = (unsigned char *)long_option[i].alias;
959 if (strcmp(long_option[i].name, "overwrite") == 0){
966 if (strcmp(long_option[i].name, "cap-input") == 0){
970 if (strcmp(long_option[i].name, "url-input") == 0){
975 #ifdef NUMCHAR_OPTION
976 if (strcmp(long_option[i].name, "numchar-input") == 0){
982 if (strcmp(long_option[i].name, "no-output") == 0){
986 if (strcmp(long_option[i].name, "debug") == 0){
991 #ifdef SHIFTJIS_CP932
992 if (strcmp(long_option[i].name, "no-cp932") == 0){
996 if (strcmp(long_option[i].name, "cp932inv") == 0){
1002 if (strcmp(long_option[i].name, "exec-in") == 0){
1006 if (strcmp(long_option[i].name, "exec-out") == 0){
1011 if (strcmp(long_option[i].name, "prefix=") == 0){
1012 if (*p == '=' && ' ' < p[1] && p[1] < 128){
1013 for (i = 2; ' ' < p[i] && p[i] < 128; i++){
1014 prefix_table[p[i]] = p[1];
1021 case 'b': /* buffered mode */
1024 case 'u': /* non bufferd mode */
1027 case 't': /* transparent mode */
1030 case 'j': /* JIS output */
1032 output_conv = j_oconv;
1034 case 'e': /* AT&T EUC output */
1035 output_conv = e_oconv;
1037 case 's': /* SJIS output */
1038 output_conv = s_oconv;
1040 case 'l': /* ISO8859 Latin-1 support, no conversion */
1041 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
1042 input_f = LATIN1_INPUT;
1044 case 'i': /* Kanji IN ESC-$-@/B */
1045 if (*cp=='@'||*cp=='B')
1046 kanji_intro = *cp++;
1048 case 'o': /* ASCII IN ESC-(-J/B */
1049 if (*cp=='J'||*cp=='B'||*cp=='H')
1050 ascii_intro = *cp++;
1057 if ('9'>= *cp && *cp>='0')
1058 hira_f |= (*cp++ -'0');
1065 #if defined(MSDOS) || defined(__OS2__)
1080 #ifdef UTF8_OUTPUT_ENABLE
1081 case 'w': /* UTF-8 output */
1082 if ('1'== cp[0] && '6'==cp[1]) {
1083 output_conv = w_oconv16; cp+=2;
1085 w_oconv16_begin_f=2; cp++;
1088 w_oconv16_begin_f=1; cp++;
1090 } else if (cp[0] == 'B') {
1091 w_oconv16_begin_f=2; cp++;
1093 w_oconv16_begin_f=1; cp++;
1097 output_conv = w_oconv;
1100 #ifdef UTF8_INPUT_ENABLE
1101 case 'W': /* UTF-8 input */
1102 if ('1'== cp[0] && '6'==cp[1]) {
1103 input_f = UTF16_INPUT;
1105 input_f = UTF8_INPUT;
1108 /* Input code assumption */
1109 case 'J': /* JIS input */
1110 case 'E': /* AT&T EUC input */
1111 input_f = JIS_INPUT;
1113 case 'S': /* MS Kanji input */
1114 input_f = SJIS_INPUT;
1115 if (x0201_f==NO_X0201) x0201_f=TRUE;
1117 case 'Z': /* Convert X0208 alphabet to asii */
1118 /* bit:0 Convert X0208
1119 bit:1 Convert Kankaku to one space
1120 bit:2 Convert Kankaku to two spaces
1121 bit:3 Convert HTML Entity
1123 if ('9'>= *cp && *cp>='0')
1124 alpha_f |= 1<<(*cp++ -'0');
1128 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
1129 x0201_f = FALSE; /* No X0201->X0208 conversion */
1131 ESC-(-I in JIS, EUC, MS Kanji
1132 SI/SO in JIS, EUC, MS Kanji
1133 SSO in EUC, JIS, not in MS Kanji
1134 MS Kanji (0xa0-0xdf)
1136 ESC-(-I in JIS (0x20-0x5f)
1137 SSO in EUC (0xa0-0xdf)
1138 0xa0-0xd in MS Kanji (0xa0-0xdf)
1141 case 'X': /* Assume X0201 kana */
1142 /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1145 case 'F': /* prserve new lines */
1146 fold_preserve_f = TRUE;
1147 case 'f': /* folding -f60 or -f */
1150 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1152 fold_len += *cp++ - '0';
1154 if (!(0<fold_len && fold_len<BUFSIZ))
1155 fold_len = DEFAULT_FOLD;
1159 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1161 fold_margin += *cp++ - '0';
1165 case 'm': /* MIME support */
1166 if (*cp=='B'||*cp=='Q') {
1167 mime_decode_mode = *cp++;
1168 mimebuf_f = FIXED_MIME;
1169 } else if (*cp=='N') {
1170 mime_f = TRUE; cp++;
1171 } else if (*cp=='S') {
1172 mime_f = STRICT_MIME; cp++;
1173 } else if (*cp=='0') {
1174 mime_f = FALSE; cp++;
1177 case 'M': /* MIME output */
1180 mimeout_f = FIXED_MIME; cp++;
1181 } else if (*cp=='Q') {
1183 mimeout_f = FIXED_MIME; cp++;
1188 case 'B': /* Broken JIS support */
1190 bit:1 allow any x on ESC-(-x or ESC-$-x
1191 bit:2 reset to ascii on NL
1193 if ('9'>= *cp && *cp>='0')
1194 broken_f |= 1<<(*cp++ -'0');
1199 case 'O':/* for Output file */
1203 case 'c':/* add cr code */
1206 case 'd':/* delete cr code */
1209 case 'I': /* ISO-2022-JP output */
1212 case 'L': /* line mode */
1213 if (*cp=='u') { /* unix */
1214 crmode_f = NL; cp++;
1215 } else if (*cp=='m') { /* mac */
1216 crmode_f = CR; cp++;
1217 } else if (*cp=='w') { /* windows */
1218 crmode_f = CRLF; cp++;
1219 } else if (*cp=='0') { /* no conversion */
1227 /* module muliple options in a string are allowed for Perl moudle */
1228 while(*cp && *cp!='-') cp++;
1232 /* bogus option but ignored */
1238 #ifdef ANSI_C_PROTOTYPE
1239 struct input_code * find_inputcode_byfunc(int (*iconv_func)(int c2,int c1,int c0))
1241 struct input_code * find_inputcode_byfunc(iconv_func)
1242 int (*iconv_func)();
1246 struct input_code *p = input_code_list;
1248 if (iconv_func == p->iconv_func){
1257 #ifdef ANSI_C_PROTOTYPE
1258 void set_iconv(int f, int (*iconv_func)(int c2,int c1,int c0))
1260 void set_iconv(f, iconv_func)
1262 int (*iconv_func)();
1266 static int (*iconv_for_check)() = 0;
1268 #ifdef INPUT_CODE_FIX
1276 #ifdef INPUT_CODE_FIX
1277 && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1283 if (estab_f && iconv_for_check != iconv){
1284 struct input_code *p = find_inputcode_byfunc(iconv);
1286 set_input_codename(p->name);
1287 debug(input_codename);
1289 iconv_for_check = iconv;
1294 #define SCORE_L2 (1) /*
\e$BBh
\e(B2
\e$B?e=`4A;z
\e(B */
1295 #define SCORE_KANA (SCORE_L2 << 1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
1296 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
1297 #ifdef SHIFTJIS_CP932
1298 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932
\e$B$K$h$kFI$_49$(
\e(B */
1299 #define SCORE_NO_EXIST (SCORE_CP932 << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1301 #define SCORE_NO_EXIST (SCORE_DEPEND << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1303 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME
\e$B$K$h$k;XDj
\e(B */
1304 #define SCORE_ERROR (SCORE_iMIME << 1) /*
\e$B%(%i!<
\e(B */
1306 #define SCORE_INIT (SCORE_iMIME)
1308 int score_table_A0[] = {
1311 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1312 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1315 int score_table_F0[] = {
1316 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1317 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1318 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1319 SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1322 void set_code_score(ptr, score)
1323 struct input_code *ptr;
1327 ptr->score |= score;
1331 void clr_code_score(ptr, score)
1332 struct input_code *ptr;
1336 ptr->score &= ~score;
1340 void code_score(ptr)
1341 struct input_code *ptr;
1343 int c2 = ptr->buf[0];
1344 int c1 = ptr->buf[1];
1346 set_code_score(ptr, SCORE_ERROR);
1347 }else if (c2 == SSO){
1348 set_code_score(ptr, SCORE_KANA);
1349 #ifdef UTF8_OUTPUT_ENABLE
1350 }else if (!e2w_conv(c2, c1)){
1351 set_code_score(ptr, SCORE_NO_EXIST);
1353 }else if ((c2 & 0x70) == 0x20){
1354 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1355 }else if ((c2 & 0x70) == 0x70){
1356 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1357 }else if ((c2 & 0x70) >= 0x50){
1358 set_code_score(ptr, SCORE_L2);
1362 void status_disable(ptr)
1363 struct input_code *ptr;
1368 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1371 void status_push_ch(ptr, c)
1372 struct input_code *ptr;
1375 ptr->buf[ptr->index++] = c;
1378 void status_clear(ptr)
1379 struct input_code *ptr;
1385 void status_reset(ptr)
1386 struct input_code *ptr;
1389 ptr->score = SCORE_INIT;
1392 void status_reinit(ptr)
1393 struct input_code *ptr;
1396 ptr->_file_stat = 0;
1399 void status_check(ptr, c)
1400 struct input_code *ptr;
1403 if (c <= DEL && estab_f){
1408 void s_status(ptr, c)
1409 struct input_code *ptr;
1414 status_check(ptr, c);
1419 #ifdef NUMCHAR_OPTION
1420 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1423 }else if (0xa1 <= c && c <= 0xdf){
1424 status_push_ch(ptr, SSO);
1425 status_push_ch(ptr, c);
1428 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
1430 status_push_ch(ptr, c);
1431 #ifdef SHIFTJIS_CP932
1433 && CP932_TABLE_BEGIN <= c && c <= CP932_TABLE_END){
1435 status_push_ch(ptr, c);
1436 #endif /* SHIFTJIS_CP932 */
1438 status_disable(ptr);
1442 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1443 status_push_ch(ptr, c);
1444 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
1448 status_disable(ptr);
1451 #ifdef SHIFTJIS_CP932
1453 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1454 status_push_ch(ptr, c);
1455 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
1456 set_code_score(ptr, SCORE_CP932);
1461 status_disable(ptr);
1463 #endif /* SHIFTJIS_CP932 */
1467 void e_status(ptr, c)
1468 struct input_code *ptr;
1473 status_check(ptr, c);
1478 #ifdef NUMCHAR_OPTION
1479 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1482 }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
1484 status_push_ch(ptr, c);
1486 status_disable(ptr);
1490 if (0xa1 <= c && c <= 0xfe){
1491 status_push_ch(ptr, c);
1495 status_disable(ptr);
1501 #ifdef UTF8_INPUT_ENABLE
1502 void w16_status(ptr, c)
1503 struct input_code *ptr;
1510 if (ptr->_file_stat == 0){
1511 if (c == 0xfe || c == 0xff){
1513 status_push_ch(ptr, c);
1514 ptr->_file_stat = 1;
1516 status_disable(ptr);
1517 ptr->_file_stat = -1;
1519 }else if (ptr->_file_stat > 0){
1521 status_push_ch(ptr, c);
1522 }else if (ptr->_file_stat < 0){
1523 status_disable(ptr);
1529 status_disable(ptr);
1530 ptr->_file_stat = -1;
1532 status_push_ch(ptr, c);
1539 if (ptr->stat != c && (c == 0xfe || c == 0xff)){
1540 status_push_ch(ptr, c);
1543 status_disable(ptr);
1544 ptr->_file_stat = -1;
1550 void w_status(ptr, c)
1551 struct input_code *ptr;
1556 status_check(ptr, c);
1561 #ifdef NUMCHAR_OPTION
1562 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1565 }else if (0xc0 <= c && c <= 0xdf){
1567 status_push_ch(ptr, c);
1568 }else if (0xe0 <= c && c <= 0xef){
1570 status_push_ch(ptr, c);
1572 status_disable(ptr);
1577 if (0x80 <= c && c <= 0xbf){
1578 status_push_ch(ptr, c);
1579 if (ptr->index > ptr->stat){
1580 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
1581 && ptr->buf[2] == 0xbf);
1582 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
1583 &ptr->buf[0], &ptr->buf[1]);
1590 status_disable(ptr);
1601 int action_flag = 1;
1602 struct input_code *result = 0;
1603 struct input_code *p = input_code_list;
1605 (p->status_func)(p, c);
1608 }else if(p->stat == 0){
1619 if (result && !estab_f){
1620 set_iconv(TRUE, result->iconv_func);
1621 }else if (c <= DEL){
1622 struct input_code *ptr = input_code_list;
1632 #define STD_GC_BUFSIZE (256)
1633 int std_gc_buf[STD_GC_BUFSIZE];
1643 return std_gc_buf[--std_gc_ndx];
1655 if (std_gc_ndx == STD_GC_BUFSIZE){
1658 std_gc_buf[std_gc_ndx++] = c;
1678 while ((c = (*i_getc)(f)) != EOF)
1687 oconv = output_conv;
1690 /* replace continucation module, from output side */
1692 /* output redicrection */
1694 if (noout_f || guess_f){
1701 if (mimeout_f == TRUE) {
1702 o_base64conv = oconv; oconv = base64_conv;
1704 /* base64_count = 0; */
1708 o_crconv = oconv; oconv = cr_conv;
1711 o_rot_conv = oconv; oconv = rot_conv;
1714 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
1717 o_hira_conv = oconv; oconv = hira_conv;
1720 o_fconv = oconv; oconv = fold_conv;
1723 if (alpha_f || x0201_f) {
1724 o_zconv = oconv; oconv = z_conv;
1728 i_ungetc = std_ungetc;
1729 /* input redicrection */
1732 i_cgetc = i_getc; i_getc = cap_getc;
1733 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
1736 i_ugetc = i_getc; i_getc = url_getc;
1737 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
1740 #ifdef NUMCHAR_OPTION
1742 i_ngetc = i_getc; i_getc = numchar_getc;
1743 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
1746 if (mime_f && mimebuf_f==FIXED_MIME) {
1747 i_mgetc = i_getc; i_getc = mime_getc;
1748 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
1751 i_bgetc = i_getc; i_getc = broken_getc;
1752 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
1754 if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
1755 set_iconv(-TRUE, e_iconv);
1756 } else if (input_f == SJIS_INPUT) {
1757 set_iconv(-TRUE, s_iconv);
1758 #ifdef UTF8_INPUT_ENABLE
1759 } else if (input_f == UTF8_INPUT) {
1760 set_iconv(-TRUE, w_iconv);
1761 } else if (input_f == UTF16_INPUT) {
1762 set_iconv(-TRUE, w_iconv16);
1765 set_iconv(FALSE, e_iconv);
1769 struct input_code *p = input_code_list;
1777 Conversion main loop. Code detection only.
1787 module_connection();
1792 output_mode = ASCII;
1795 #define NEXT continue /* no output, get next */
1796 #define SEND ; /* output c1 and c2, get next */
1797 #define LAST break /* end of loop, go closing */
1799 while ((c1 = (*i_getc)(f)) != EOF) {
1804 /* in case of 8th bit is on */
1806 /* in case of not established yet */
1807 /* It is still ambiguious */
1808 if (h_conv(f, c2, c1)==EOF)
1814 /* in case of already established */
1816 /* ignore bogus code */
1822 /* second byte, 7 bit code */
1823 /* it might be kanji shitfted */
1824 if ((c1 == DEL) || (c1 <= SPACE)) {
1825 /* ignore bogus first code */
1833 #ifdef UTF8_INPUT_ENABLE
1842 #ifdef NUMCHAR_OPTION
1843 } else if ((c1 & CLASS_MASK) == CLASS_UTF16){
1846 } else if (c1 > DEL) {
1848 if (!estab_f && !iso8859_f) {
1849 /* not established yet */
1852 } else { /* estab_f==TRUE */
1857 } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
1858 /* SJIS X0201 Case... */
1859 if(iso2022jp_f && x0201_f==NO_X0201) {
1860 (*oconv)(GETA1, GETA2);
1867 } else if (c1==SSO && iconv != s_iconv) {
1868 /* EUC X0201 Case */
1869 c1 = (*i_getc)(f); /* skip SSO */
1871 if (SSP<=c1 && c1<0xe0) {
1872 if(iso2022jp_f && x0201_f==NO_X0201) {
1873 (*oconv)(GETA1, GETA2);
1880 } else { /* bogus code, skip SSO and one byte */
1884 /* already established */
1889 } else if ((c1 > SPACE) && (c1 != DEL)) {
1890 /* in case of Roman characters */
1892 /* output 1 shifted byte */
1896 } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
1897 /* output 1 shifted byte */
1898 if(iso2022jp_f && x0201_f==NO_X0201) {
1899 (*oconv)(GETA1, GETA2);
1906 /* look like bogus code */
1909 } else if (input_mode == X0208) {
1910 /* in case of Kanji shifted */
1913 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
1914 /* Check MIME code */
1915 if ((c1 = (*i_getc)(f)) == EOF) {
1918 } else if (c1 == '?') {
1919 /* =? is mime conversion start sequence */
1920 if(mime_f == STRICT_MIME) {
1921 /* check in real detail */
1922 if (mime_begin_strict(f) == EOF)
1926 } else if (mime_begin(f) == EOF)
1936 /* normal ASCII code */
1939 } else if (c1 == SI) {
1942 } else if (c1 == SO) {
1945 } else if (c1 == ESC ) {
1946 if ((c1 = (*i_getc)(f)) == EOF) {
1947 /* (*oconv)(0, ESC); don't send bogus code */
1949 } else if (c1 == '$') {
1950 if ((c1 = (*i_getc)(f)) == EOF) {
1952 (*oconv)(0, ESC); don't send bogus code
1953 (*oconv)(0, '$'); */
1955 } else if (c1 == '@'|| c1 == 'B') {
1956 /* This is kanji introduction */
1959 set_input_codename("ISO-2022-JP");
1960 debug(input_codename);
1962 } else if (c1 == '(') {
1963 if ((c1 = (*i_getc)(f)) == EOF) {
1964 /* don't send bogus code
1970 } else if (c1 == '@'|| c1 == 'B') {
1971 /* This is kanji introduction */
1976 /* could be some special code */
1983 } else if (broken_f&0x2) {
1984 /* accept any ESC-(-x as broken code ... */
1994 } else if (c1 == '(') {
1995 if ((c1 = (*i_getc)(f)) == EOF) {
1996 /* don't send bogus code
1998 (*oconv)(0, '('); */
2002 /* This is X0201 kana introduction */
2003 input_mode = X0201; shift_mode = X0201;
2005 } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2006 /* This is X0208 kanji introduction */
2007 input_mode = ASCII; shift_mode = FALSE;
2009 } else if (broken_f&0x2) {
2010 input_mode = ASCII; shift_mode = FALSE;
2015 /* maintain various input_mode here */
2019 } else if ( c1 == 'N' || c1 == 'n' ){
2021 c3 = (*i_getc)(f); /* skip SS2 */
2022 if ( (SPACE<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2037 } else if ((c1 == NL || c1 == CR) && broken_f&4) {
2038 input_mode = ASCII; set_iconv(FALSE, 0);
2044 if (input_mode == X0208)
2045 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2046 else if (input_mode)
2047 (*oconv)(input_mode, c1); /* other special case */
2048 else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */
2049 int c0 = (*i_getc)(f);
2052 (*iconv)(c2, c1, c0);
2058 /* goto next_word */
2062 (*iconv)(EOF, 0, 0);
2075 /** it must NOT be in the kanji shifte sequence */
2076 /** it must NOT be written in JIS7 */
2077 /** and it must be after 2 byte 8bit code */
2084 while ((c1 = (*i_getc)(f)) != EOF) {
2090 if (push_hold_buf(c1) == EOF || estab_f){
2096 struct input_code *p = input_code_list;
2097 struct input_code *result = p;
2102 if (p->score < result->score){
2107 set_iconv(FALSE, result->iconv_func);
2112 ** 1) EOF is detected, or
2113 ** 2) Code is established, or
2114 ** 3) Buffer is FULL (but last word is pushed)
2116 ** in 1) and 3) cases, we continue to use
2117 ** Kanji codes by oconv and leave estab_f unchanged.
2122 while (wc < hold_count){
2123 c2 = hold_buf[wc++];
2125 #ifdef NUMCHAR_OPTION
2126 || (c2 & CLASS_MASK) == CLASS_UTF16
2131 }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
2132 (*iconv)(X0201, c2, 0);
2135 if (wc < hold_count){
2136 c1 = hold_buf[wc++];
2145 if ((*iconv)(c2, c1, 0) < 0){
2147 if (wc < hold_count){
2148 c0 = hold_buf[wc++];
2157 (*iconv)(c2, c1, c0);
2170 if (hold_count >= HOLD_SIZE*2)
2172 hold_buf[hold_count++] = c2;
2173 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
2176 int s2e_conv(c2, c1, p2, p1)
2180 #ifdef SHIFTJIS_CP932
2181 if (cp932_f && CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){
2182 extern unsigned short shiftjis_cp932[3][189];
2183 c1 = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
2184 if (c1 == 0) return 1;
2188 #endif /* SHIFTJIS_CP932 */
2189 c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
2191 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
2208 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2211 int ret = s2e_conv(c2, c1, &c2, &c1);
2212 if (ret) return ret;
2225 } else if (c2 == SSO){
2228 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2238 #ifdef UTF8_INPUT_ENABLE
2240 w2e_conv(c2, c1, c0, p2, p1)
2244 extern unsigned short * utf8_to_euc_2bytes[];
2245 extern unsigned short ** utf8_to_euc_3bytes[];
2248 if (0xc0 <= c2 && c2 <= 0xef) {
2249 unsigned short **pp;
2252 if (c0 == 0) return -1;
2253 pp = utf8_to_euc_3bytes[c2 - 0x80];
2254 ret = w_iconv_common(c1, c0, pp, sizeof_utf8_to_euc_C2, p2, p1);
2256 ret = w_iconv_common(c2, c1, utf8_to_euc_2bytes, sizeof_utf8_to_euc_2bytes, p2, p1);
2258 #ifdef NUMCHAR_OPTION
2261 if (p1) *p1 = CLASS_UTF16 | ww16_conv(c2, c1, c0);
2266 } else if (c2 == X0201) {
2279 int ret = w2e_conv(c2, c1, c0, &c2, &c1);
2287 w16w_conv(val, p2, p1, p0)
2295 }else if (val < 0x800){
2296 *p2 = 0xc0 | (val >> 6);
2297 *p1 = 0x80 | (val & 0x3f);
2300 *p2 = 0xe0 | (val >> 12);
2301 *p1 = 0x80 | ((val >> 6) & 0x3f);
2302 *p0 = 0x80 | (val & 0x3f);
2307 ww16_conv(c2, c1, c0)
2312 val = (c2 & 0x0f) << 12;
2313 val |= (c1 & 0x3f) << 6;
2315 }else if (c2 >= 0xc0){
2316 val = (c2 & 0x1f) << 6;
2317 val |= (c1 & 0x3f) << 6;
2325 w16e_conv(val, p2, p1)
2329 extern unsigned short * utf8_to_euc_2bytes[];
2330 extern unsigned short ** utf8_to_euc_3bytes[];
2332 unsigned short **pp;
2336 w16w_conv(val, &c2, &c1, &c0);
2339 pp = utf8_to_euc_3bytes[c2 - 0x80];
2340 psize = sizeof_utf8_to_euc_C2;
2341 ret = w_iconv_common(c1, c0, pp, psize, p2, p1);
2343 pp = utf8_to_euc_2bytes;
2344 psize = sizeof_utf8_to_euc_2bytes;
2345 ret = w_iconv_common(c2, c1, pp, psize, p2, p1);
2347 #ifdef NUMCHAR_OPTION
2350 *p1 = CLASS_UTF16 | val;
2359 w_iconv16(c2, c1, c0)
2364 if (c2==0376 && c1==0377){
2365 utf16_mode = UTF16_INPUT;
2367 } else if (c2==0377 && c1==0376){
2368 utf16_mode = UTF16BE_INPUT;
2371 if (utf16_mode == UTF16BE_INPUT) {
2373 tmp=c1; c1=c2; c2=tmp;
2375 if ((c2==0 && c1 < 0x80) || c2==EOF) {
2379 ret = w16e_conv(((c2<<8)&0xff00) + c1, &c2, &c1);
2380 if (ret) return ret;
2386 w_iconv_common(c1, c0, pp, psize, p2, p1)
2388 unsigned short **pp;
2396 if (pp == 0) return 1;
2399 if (c1 < 0 || psize <= c1) return 1;
2401 if (p == 0) return 1;
2404 if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1;
2406 if (val == 0) return 1;
2409 if (c2 == SO) c2 = X0201;
2418 #ifdef UTF8_OUTPUT_ENABLE
2423 extern unsigned short euc_to_utf8_1byte[];
2424 extern unsigned short * euc_to_utf8_2bytes[];
2428 p = euc_to_utf8_1byte;
2431 c2 = (c2&0x7f) - 0x21;
2432 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2433 p = euc_to_utf8_2bytes[c2];
2438 c1 = (c1 & 0x7f) - 0x21;
2439 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
2450 #ifdef NUMCHAR_OPTION
2451 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2452 w16w_conv(c1, &c2, &c1, &c0);
2456 if (c0) (*o_putc)(c0);
2463 } else if (c2 == 0) {
2464 output_mode = ASCII;
2466 } else if (c2 == ISO8859_1) {
2467 output_mode = ISO8859_1;
2468 (*o_putc)(c1 | 0x080);
2471 w16w_conv((unsigned short)e2w_conv(c2, c1), &c2, &c1, &c0);
2475 if (c0) (*o_putc)(c0);
2490 if (w_oconv16_begin_f==2) {
2492 (*o_putc)((unsigned char)'\377');
2496 (*o_putc)((unsigned char)'\377');
2498 w_oconv16_begin_f=1;
2501 if (c2 == ISO8859_1) {
2504 #ifdef NUMCHAR_OPTION
2505 } else if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16) {
2506 c2 = (c1 >> 8) & 0xff;
2510 unsigned short val = (unsigned short)e2w_conv(c2, c1);
2511 c2 = (val >> 8) & 0xff;
2530 #ifdef NUMCHAR_OPTION
2531 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2532 w16e_conv(c1, &c2, &c1);
2538 } else if (c2 == 0) {
2539 output_mode = ASCII;
2541 } else if (c2 == X0201) {
2542 output_mode = JAPANESE_EUC;
2543 (*o_putc)(SSO); (*o_putc)(c1|0x80);
2544 } else if (c2 == ISO8859_1) {
2545 output_mode = ISO8859_1;
2546 (*o_putc)(c1 | 0x080);
2548 if ((c1<0x21 || 0x7e<c1) ||
2549 (c2<0x21 || 0x7e<c2)) {
2550 set_iconv(FALSE, 0);
2551 return; /* too late to rescue this char */
2553 output_mode = JAPANESE_EUC;
2554 (*o_putc)(c2 | 0x080);
2555 (*o_putc)(c1 | 0x080);
2560 e2s_conv(c2, c1, p2, p1)
2561 int c2, c1, *p2, *p1;
2563 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
2564 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
2572 #ifdef NUMCHAR_OPTION
2573 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2574 w16e_conv(c1, &c2, &c1);
2580 } else if (c2 == 0) {
2581 output_mode = ASCII;
2583 } else if (c2 == X0201) {
2584 output_mode = SHIFT_JIS;
2586 } else if (c2 == ISO8859_1) {
2587 output_mode = ISO8859_1;
2588 (*o_putc)(c1 | 0x080);
2590 if ((c1<0x20 || 0x7e<c1) ||
2591 (c2<0x20 || 0x7e<c2)) {
2592 set_iconv(FALSE, 0);
2593 return; /* too late to rescue this char */
2595 output_mode = SHIFT_JIS;
2596 e2s_conv(c2, c1, &c2, &c1);
2598 #ifdef SHIFTJIS_CP932
2600 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2601 extern unsigned short cp932inv[2][189];
2602 int c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
2608 #endif /* SHIFTJIS_CP932 */
2611 if (prefix_table[(unsigned char)c1]){
2612 (*o_putc)(prefix_table[(unsigned char)c1]);
2623 #ifdef NUMCHAR_OPTION
2624 if ((c1 & CLASS_MASK) == CLASS_UTF16){
2625 w16e_conv(c1, &c2, &c1);
2629 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2632 (*o_putc)(ascii_intro);
2633 output_mode = ASCII;
2636 } else if (c2==X0201) {
2637 if (output_mode!=X0201) {
2638 output_mode = X0201;
2644 } else if (c2==ISO8859_1) {
2645 /* iso8859 introduction, or 8th bit on */
2646 /* Can we convert in 7bit form using ESC-'-'-A ?
2648 output_mode = ISO8859_1;
2650 } else if (c2 == 0) {
2651 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2654 (*o_putc)(ascii_intro);
2655 output_mode = ASCII;
2659 if (output_mode != X0208) {
2660 output_mode = X0208;
2663 (*o_putc)(kanji_intro);
2665 if (c1<0x20 || 0x7e<c1)
2667 if (c2<0x20 || 0x7e<c2)
2679 if (base64_count>50 && !mimeout_mode && c2==0 && c1==SPACE) {
2681 } else if (base64_count>66 && mimeout_mode) {
2682 (*o_base64conv)(EOF,0);
2684 (*o_putc)('\t'); base64_count += 7;
2686 (*o_base64conv)(c2,c1);
2690 static int broken_buf[3];
2691 static int broken_counter = 0;
2692 static int broken_last = 0;
2699 if (broken_counter>0) {
2700 return broken_buf[--broken_counter];
2703 if (c=='$' && broken_last != ESC
2704 && (input_mode==ASCII || input_mode==X0201)) {
2707 if (c1=='@'|| c1=='B') {
2708 broken_buf[0]=c1; broken_buf[1]=c;
2715 } else if (c=='(' && broken_last != ESC
2716 && (input_mode==X0208 || input_mode==X0201)) { /* ) */
2719 if (c1=='J'|| c1=='B') {
2720 broken_buf[0]=c1; broken_buf[1]=c;
2738 if (broken_counter<2)
2739 broken_buf[broken_counter++]=c;
2743 static int prev_cr = 0;
2751 if (! (c2==0&&c1==NL) ) {
2757 } else if (c1=='\r') {
2759 } else if (c1=='\n') {
2760 if (crmode_f==CRLF) {
2761 (*o_crconv)(0,'\r');
2762 } else if (crmode_f==CR) {
2763 (*o_crconv)(0,'\r');
2767 } else if (c1!='\032' || crmode_f!=NL){
2773 Return value of fold_conv()
2775 \n add newline and output char
2776 \r add newline and output nothing
2779 1 (or else) normal output
2781 fold state in prev (previous character)
2783 >0x80 Japanese (X0208/X0201)
2788 This fold algorthm does not preserve heading space in a line.
2789 This is the main difference from fmt.
2792 #define char_size(c2,c1) (c2?2:1)
2801 if (c1== '\r' && !fold_preserve_f) {
2802 fold_state=0; /* ignore cr */
2803 }else if (c1== '\n'&&f_prev=='\r' && fold_preserve_f) {
2805 fold_state=0; /* ignore cr */
2806 } else if (c1== BS) {
2807 if (f_line>0) f_line--;
2809 } else if (c2==EOF && f_line != 0) { /* close open last line */
2811 } else if ((c1=='\n' && !fold_preserve_f)
2812 || ((c1=='\r'||(c1=='\n'&&f_prev!='\r'))
2813 && fold_preserve_f)) {
2815 if (fold_preserve_f) {
2819 } else if ((f_prev == c1 && !fold_preserve_f)
2820 || (f_prev == '\n' && fold_preserve_f)
2821 ) { /* duplicate newline */
2824 fold_state = '\n'; /* output two newline */
2830 if (f_prev&0x80) { /* Japanese? */
2832 fold_state = 0; /* ignore given single newline */
2833 } else if (f_prev==' ') {
2837 if (++f_line<=fold_len)
2841 fold_state = '\r'; /* fold and output nothing */
2845 } else if (c1=='\f') {
2850 fold_state = '\n'; /* output newline and clear */
2851 } else if ( (c2==0 && c1==' ')||
2852 (c2==0 && c1=='\t')||
2853 (c2=='!'&& c1=='!')) {
2854 /* X0208 kankaku or ascii space */
2855 if (f_prev == ' ') {
2856 fold_state = 0; /* remove duplicate spaces */
2859 if (++f_line<=fold_len)
2860 fold_state = ' '; /* output ASCII space only */
2862 f_prev = ' '; f_line = 0;
2863 fold_state = '\r'; /* fold and output nothing */
2867 prev0 = f_prev; /* we still need this one... , but almost done */
2869 if (c2 || c2==X0201)
2870 f_prev |= 0x80; /* this is Japanese */
2871 f_line += char_size(c2,c1);
2872 if (f_line<=fold_len) { /* normal case */
2875 if (f_line>=fold_len+fold_margin) { /* too many kinsou suspension */
2876 f_line = char_size(c2,c1);
2877 fold_state = '\n'; /* We can't wait, do fold now */
2878 } else if (c2==X0201) {
2879 /* simple kinsoku rules return 1 means no folding */
2880 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
2881 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
2882 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
2883 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
2884 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
2885 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
2886 else if (SPACE<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
2888 fold_state = '\n';/* add one new f_line before this character */
2891 fold_state = '\n';/* add one new f_line before this character */
2894 /* kinsoku point in ASCII */
2895 if ( c1==')'|| /* { [ ( */
2906 /* just after special */
2907 } else if (!is_alnum(prev0)) {
2908 f_line = char_size(c2,c1);
2910 } else if ((prev0==' ') || /* ignored new f_line */
2911 (prev0=='\n')|| /* ignored new f_line */
2912 (prev0&0x80)) { /* X0208 - ASCII */
2913 f_line = char_size(c2,c1);
2914 fold_state = '\n';/* add one new f_line before this character */
2916 fold_state = 1; /* default no fold in ASCII */
2920 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
2921 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
2922 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
2923 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
2924 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
2925 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
2926 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
2927 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
2928 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
2929 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
2930 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
2931 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
2932 /* default no fold in kinsoku */
2935 f_line = char_size(c2,c1);
2936 /* add one new f_line before this character */
2939 f_line = char_size(c2,c1);
2941 /* add one new f_line before this character */
2946 /* terminator process */
2947 switch(fold_state) {
2966 int z_prev2=0,z_prev1=0;
2973 /* if (c2) c1 &= 0x7f; assertion */
2975 if (x0201_f && z_prev2==X0201) { /* X0201 */
2976 if (c1==(0xde&0x7f)) { /*
\e$BByE@
\e(B */
2978 (*o_zconv)(dv[(z_prev1-SPACE)*2],dv[(z_prev1-SPACE)*2+1]);
2980 } else if (c1==(0xdf&0x7f)&&ev[(z_prev1-SPACE)*2]) { /*
\e$BH>ByE@
\e(B */
2982 (*o_zconv)(ev[(z_prev1-SPACE)*2],ev[(z_prev1-SPACE)*2+1]);
2986 (*o_zconv)(cv[(z_prev1-SPACE)*2],cv[(z_prev1-SPACE)*2+1]);
2995 if (x0201_f && c2==X0201) {
2996 if (dv[(c1-SPACE)*2]||ev[(c1-SPACE)*2]) {
2997 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
2998 z_prev1 = c1; z_prev2 = c2;
3001 (*o_zconv)(cv[(c1-SPACE)*2],cv[(c1-SPACE)*2+1]);
3006 /* JISX0208 Alphabet */
3007 if (alpha_f && c2 == 0x23 ) {
3009 } else if (alpha_f && c2 == 0x21 ) {
3010 /* JISX0208 Kigou */
3015 } else if (alpha_f&0x4) {
3020 } else if (0x20<c1 && c1<0x7f && fv[c1-0x20]) {
3026 case '>': entity = ">"; break;
3027 case '<': entity = "<"; break;
3028 case '\"': entity = """; break;
3029 case '&': entity = "&"; break;
3032 while (*entity) (*o_zconv)(0, *entity++);
3042 #define rot13(c) ( \
3044 (c <= 'M') ? (c + 13): \
3045 (c <= 'Z') ? (c - 13): \
3047 (c <= 'm') ? (c + 13): \
3048 (c <= 'z') ? (c - 13): \
3052 #define rot47(c) ( \
3054 ( c <= 'O' ) ? (c + 47) : \
3055 ( c <= '~' ) ? (c - 47) : \
3063 if (c2==0 || c2==X0201 || c2==ISO8859_1) {
3069 (*o_rot_conv)(c2,c1);
3076 if ((hira_f & 1) && c2==0x25 && 0x20<c1 && c1<0x74) {
3078 } else if ((hira_f & 2) && c2==0x24 && 0x20<c1 && c1<0x74) {
3081 (*o_hira_conv)(c2,c1);
3086 iso2022jp_check_conv(c2,c1)
3089 static int range[RANGE_NUM_MAX][2] = {
3112 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
3116 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
3121 for (i = 0; i < RANGE_NUM_MAX; i++) {
3122 start = range[i][0];
3125 if (c >= start && c <= end) {
3130 (*o_iso2022jp_check_conv)(c2,c1);
3134 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
3136 unsigned char *mime_pattern[] = {
3137 (unsigned char *)"\075?EUC-JP?B?",
3138 (unsigned char *)"\075?SHIFT_JIS?B?",
3139 (unsigned char *)"\075?ISO-8859-1?Q?",
3140 (unsigned char *)"\075?ISO-8859-1?B?",
3141 (unsigned char *)"\075?ISO-2022-JP?B?",
3142 (unsigned char *)"\075?ISO-2022-JP?Q?",
3143 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3144 (unsigned char *)"\075?UTF-8?B?",
3146 (unsigned char *)"\075?US-ASCII?Q?",
3151 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
3152 int (*mime_priority_func[])PROTO((int c2, int c1, int c0)) = {
3153 e_iconv, s_iconv, 0, 0, 0, 0,
3154 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3160 int mime_encode[] = {
3161 JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201,
3162 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3169 int mime_encode_method[] = {
3170 'B', 'B','Q', 'B', 'B', 'Q',
3171 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3179 #define MAXRECOVER 20
3181 /* I don't trust portablity of toupper */
3182 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
3183 #define nkf_isdigit(c) ('0'<=c && c<='9')
3184 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
3189 if (i_getc!=mime_getc) {
3190 i_mgetc = i_getc; i_getc = mime_getc;
3191 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
3192 if(mime_f==STRICT_MIME) {
3193 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
3194 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
3200 unswitch_mime_getc()
3202 if(mime_f==STRICT_MIME) {
3203 i_mgetc = i_mgetc_buf;
3204 i_mungetc = i_mungetc_buf;
3207 i_ungetc = i_mungetc;
3211 mime_begin_strict(f)
3216 unsigned char *p,*q;
3217 int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
3219 mime_decode_mode = FALSE;
3220 /* =? has been checked */
3222 p = mime_pattern[j];
3225 for(i=2;p[i]>' ';i++) { /* start at =? */
3226 if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) {
3227 /* pattern fails, try next one */
3229 while ((p = mime_pattern[++j])) {
3230 for(k=2;k<i;k++) /* assume length(p) > i */
3231 if (p[k]!=q[k]) break;
3232 if (k==i && nkf_toupper(c1)==p[k]) break;
3234 if (p) continue; /* found next one, continue */
3235 /* all fails, output from recovery buffer */
3243 mime_decode_mode = p[i-2];
3245 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
3247 if (mime_decode_mode=='B') {
3248 mimebuf_f = unbuf_f;
3250 /* do MIME integrity check */
3251 return mime_integrity(f,mime_pattern[j]);
3263 /* we don't keep eof of Fifo, becase it contains ?= as
3264 a terminator. It was checked in mime_integrity. */
3265 return ((mimebuf_f)?
3266 (*i_mgetc_buf)(f):Fifo(mime_input++));
3270 mime_ungetc_buf(c,f)
3275 (*i_mungetc_buf)(c,f);
3277 Fifo(--mime_input)=c;
3288 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
3289 /* re-read and convert again from mime_buffer. */
3291 /* =? has been checked */
3293 Fifo(mime_last++)='='; Fifo(mime_last++)='?';
3294 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
3295 /* We accept any character type even if it is breaked by new lines */
3296 c1 = (*i_getc)(f); Fifo(mime_last++)= c1 ;
3297 if (c1=='\n'||c1==' '||c1=='\r'||
3298 c1=='-'||c1=='_'||is_alnum(c1) ) continue;
3300 /* Failed. But this could be another MIME preemble */
3308 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3309 if (!(++i<MAXRECOVER) || c1==EOF) break;
3310 if (c1=='b'||c1=='B') {
3311 mime_decode_mode = 'B';
3312 } else if (c1=='q'||c1=='Q') {
3313 mime_decode_mode = 'Q';
3317 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3318 if (!(++i<MAXRECOVER) || c1==EOF) break;
3320 mime_decode_mode = FALSE;
3326 if (!mime_decode_mode) {
3327 /* false MIME premble, restart from mime_buffer */
3328 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
3329 /* Since we are in MIME mode until buffer becomes empty, */
3330 /* we never go into mime_begin again for a while. */
3333 /* discard mime preemble, and goto MIME mode */
3335 /* do no MIME integrity check */
3336 return c1; /* used only for checking EOF */
3351 fprintf(stderr, "%s\n", str);
3357 set_input_codename (codename)
3362 strcmp(codename, "") != 0 &&
3363 strcmp(codename, input_codename) != 0)
3365 is_inputcode_mixed = TRUE;
3367 input_codename = codename;
3368 is_inputcode_set = TRUE;
3372 print_guessed_code (filename)
3375 char *codename = "BINARY";
3376 if (!is_inputcode_mixed) {
3377 if (strcmp(input_codename, "") == 0) {
3380 codename = input_codename;
3383 if (filename != NULL) printf("%s:", filename);
3384 printf("%s\n", codename);
3391 if (nkf_isdigit(x)) return x - '0';
3392 return nkf_toupper(x) - 'A' + 10;
3397 #ifdef ANSI_C_PROTOTYPE
3398 int hex_getc(int ch, FILE *f, int (*g)(FILE *f), int (*u)(int c, FILE *f))
3401 hex_getc(ch, f, g, u)
3414 if (!nkf_isxdigit(c2)){
3419 if (!nkf_isxdigit(c3)){
3424 return (hex2bin(c2) << 4) | hex2bin(c3);
3431 return hex_getc(':', f, i_cgetc, i_cungetc);
3439 return (*i_cungetc)(c, f);
3446 return hex_getc('%', f, i_ugetc, i_uungetc);
3454 return (*i_uungetc)(c, f);
3458 #ifdef NUMCHAR_OPTION
3463 int (*g)() = i_ngetc;
3464 int (*u)() = i_nungetc;
3475 if (buf[i] == 'x' || buf[i] == 'X'){
3476 for (j = 0; j < 5; j++){
3478 if (!nkf_isxdigit(buf[i])){
3485 c |= hex2bin(buf[i]);
3488 for (j = 0; j < 6; j++){
3492 if (!nkf_isdigit(buf[i])){
3499 c += hex2bin(buf[i]);
3505 return CLASS_UTF16 | c;
3515 numchar_ungetc(c, f)
3519 return (*i_nungetc)(c, f);
3528 int c1, c2, c3, c4, cc;
3529 int t1, t2, t3, t4, mode, exit_mode;
3531 if (mime_top != mime_last) { /* Something is in FIFO */
3532 return Fifo(mime_top++);
3534 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
3535 mime_decode_mode=FALSE;
3536 unswitch_mime_getc();
3537 return (*i_getc)(f);
3540 if (mimebuf_f == FIXED_MIME)
3541 exit_mode = mime_decode_mode;
3544 if (mime_decode_mode == 'Q') {
3545 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3547 if (c1=='_') return ' ';
3548 if (c1!='=' && c1!='?') {
3552 mime_decode_mode = exit_mode; /* prepare for quit */
3553 if (c1<=' ') return c1;
3554 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
3555 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
3556 /* end Q encoding */
3557 input_mode = exit_mode;
3558 while((c1=(*i_getc)(f))!=EOF && c1==SPACE
3559 /* && (c1==NL||c1==TAB||c1=='\r') */ ) ;
3562 if (c1=='='&&c2<' ') { /* this is soft wrap */
3563 while((c1 = (*i_mgetc)(f)) <=' ') {
3564 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3566 mime_decode_mode = 'Q'; /* still in MIME */
3567 goto restart_mime_q;
3570 mime_decode_mode = 'Q'; /* still in MIME */
3574 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
3575 if (c2<=' ') return c2;
3576 mime_decode_mode = 'Q'; /* still in MIME */
3577 #define hex(c) (('0'<=c&&c<='9')?(c-'0'):\
3578 ('A'<=c&&c<='F')?(c-'A'+10):('a'<=c&&c<='f')?(c-'a'+10):0)
3579 return ((hex(c2)<<4) + hex(c3));
3582 if (mime_decode_mode != 'B') {
3583 mime_decode_mode = FALSE;
3584 return (*i_mgetc)(f);
3588 /* Base64 encoding */
3590 MIME allows line break in the middle of
3591 Base64, but we are very pessimistic in decoding
3592 in unbuf mode because MIME encoded code may broken by
3593 less or editor's control sequence (such as ESC-[-K in unbuffered
3594 mode. ignore incomplete MIME.
3596 mode = mime_decode_mode;
3597 mime_decode_mode = exit_mode; /* prepare for quit */
3599 while ((c1 = (*i_mgetc)(f))<=' ') {
3604 if ((c2 = (*i_mgetc)(f))<=' ') {
3607 if (mime_f != STRICT_MIME) goto mime_c2_retry;
3608 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3611 if ((c1 == '?') && (c2 == '=')) {
3613 while((c1=(*i_getc)(f))!=EOF && c1==SPACE
3614 /* && (c1==NL||c1==TAB||c1=='\r') */ ) ;
3618 if ((c3 = (*i_mgetc)(f))<=' ') {
3621 if (mime_f != STRICT_MIME) goto mime_c3_retry;
3622 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3626 if ((c4 = (*i_mgetc)(f))<=' ') {
3629 if (mime_f != STRICT_MIME) goto mime_c4_retry;
3630 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3634 mime_decode_mode = mode; /* still in MIME sigh... */
3636 /* BASE 64 decoding */
3638 t1 = 0x3f & base64decode(c1);
3639 t2 = 0x3f & base64decode(c2);
3640 t3 = 0x3f & base64decode(c3);
3641 t4 = 0x3f & base64decode(c4);
3642 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
3644 Fifo(mime_last++) = cc;
3645 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
3647 Fifo(mime_last++) = cc;
3648 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
3650 Fifo(mime_last++) = cc;
3655 return Fifo(mime_top++);
3663 Fifo(--mime_top) = c;
3674 /* In buffered mode, read until =? or NL or buffer full
3676 mime_input = mime_top;
3677 mime_last = mime_top;
3678 while(*p) Fifo(mime_input++) = *p++;
3681 while((c=(*i_getc)(f))!=EOF) {
3682 if (((mime_input-mime_top)&MIME_BUF_MASK)==0) {
3683 break; /* buffer full */
3685 if (c=='=' && d=='?') {
3686 /* checked. skip header, start decode */
3687 Fifo(mime_input++) = c;
3688 /* mime_last_input = mime_input; */
3693 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
3695 /* Should we check length mod 4? */
3696 Fifo(mime_input++) = c;
3699 /* In case of Incomplete MIME, no MIME decode */
3700 Fifo(mime_input++) = c;
3701 mime_last = mime_input; /* point undecoded buffer */
3702 mime_decode_mode = 1; /* no decode on Fifo last in mime_getc */
3703 switch_mime_getc(); /* anyway we need buffered getc */
3714 i = c - 'A'; /* A..Z 0-25 */
3716 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
3718 } else if (c > '/') {
3719 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
3720 } else if (c == '+') {
3721 i = '>' /* 62 */ ; /* + 62 */
3723 i = '?' /* 63 */ ; /* / 63 */
3728 static char basis_64[] =
3729 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
3739 p = mime_pattern[0];
3740 for(i=0;mime_encode[i];i++) {
3741 if (mode == mime_encode[i]) {
3742 p = mime_pattern[i];
3746 mimeout_mode = mime_encode_method[i];
3748 /* (*o_mputc)(' '); */
3765 #define itoh4(c) (c>=10?c+'A'-10:c+'0')
3771 if (mimeout_f==FIXED_MIME) {
3772 if (base64_count>71) {
3780 if ( c<=DEL &&(output_mode==ASCII ||output_mode == ISO8859_1 )
3781 && mimeout_f!=FIXED_MIME) {
3782 if (mimeout_mode=='Q') {
3789 if (mimeout_mode!='B' || c!=SPACE) {
3798 } else if (!mimeout_mode && mimeout_f!=FIXED_MIME) {
3799 open_mime(output_mode);
3801 } else { /* c==EOF */
3802 switch(mimeout_mode) {
3807 (*o_mputc)(basis_64[((b64c & 0x3)<< 4)]);
3813 (*o_mputc)(basis_64[((b64c & 0xF) << 2)]);
3819 if (mimeout_f!=FIXED_MIME) {
3821 } else if (mimeout_mode != 'Q')
3826 switch(mimeout_mode) {
3830 (*o_mputc)(itoh4(((c>>4)&0xf)));
3831 (*o_mputc)(itoh4((c&0xf)));
3838 (*o_mputc)(basis_64[c>>2]);
3843 (*o_mputc)(basis_64[((b64c & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
3849 (*o_mputc)(basis_64[((b64c & 0xF) << 2) | ((c & 0xC0) >>6)]);
3850 (*o_mputc)(basis_64[c & 0x3F]);
3870 mime_f = STRICT_MIME;
3874 #if defined(MSDOS) || defined(__OS2__)
3879 iso2022jp_f = FALSE;
3881 kanji_intro = DEFAULT_J;
3882 ascii_intro = DEFAULT_R;
3884 output_conv = DEFAULT_CONV;
3885 oconv = DEFAULT_CONV;
3888 i_mungetc = std_ungetc;
3889 i_mgetc_buf = std_getc;
3890 i_mungetc_buf = std_ungetc;
3893 i_ungetc=std_ungetc;
3896 i_bungetc= std_ungetc;
3900 o_crconv = no_connection;
3901 o_rot_conv = no_connection;
3902 o_iso2022jp_check_conv = no_connection;
3903 o_hira_conv = no_connection;
3904 o_fconv = no_connection;
3905 o_zconv = no_connection;
3908 i_ungetc = std_ungetc;
3910 i_mungetc = std_ungetc;
3912 output_mode = ASCII;
3915 mime_decode_mode = FALSE;
3924 struct input_code *p = input_code_list;
3929 #ifdef UTF8_OUTPUT_ENABLE
3930 if (w_oconv16_begin_f) {
3931 w_oconv16_begin_f = 2;
3936 fold_preserve_f = FALSE;
3939 fold_margin = FOLD_MARGIN;
3942 z_prev2=0,z_prev1=0;
3946 for (i = 0; i < 256; i++){
3947 prefix_table[i] = 0;
3950 input_codename = "";
3951 is_inputcode_mixed = FALSE;
3952 is_inputcode_set = FALSE;
3957 no_connection(c2,c1)
3960 no_connection2(c2,c1,0);
3964 no_connection2(c2,c1,c0)
3967 fprintf(stderr,"nkf internal module connection failure.\n");
3975 fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
3976 fprintf(stderr,"Flags:\n");
3977 fprintf(stderr,"b,u Output is buffered (DEFAULT),Output is unbuffered\n");
3978 #ifdef DEFAULT_CODE_SJIS
3979 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC), UTF-8\n");
3981 #ifdef DEFAULT_CODE_JIS
3982 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC), UTF-8\n");
3984 #ifdef DEFAULT_CODE_EUC
3985 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT), UTF-8\n");
3987 #ifdef DEFAULT_CODE_UTF8
3988 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC), UTF-8 (DEFAULT)\n");
3990 fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC), UTF-8\n");
3991 fprintf(stderr,"t no conversion\n");
3992 fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n");
3993 fprintf(stderr,"r {de/en}crypt ROT13/47\n");
3994 fprintf(stderr,"h 1 hirakana->katakana, 2 katakana->hirakana,3 both\n");
3995 fprintf(stderr,"v Show this usage. V: show version\n");
3996 fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
3997 fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
3998 fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
3999 fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
4000 fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces,\n");
4001 fprintf(stderr," 3: Convert HTML Entity\n");
4002 fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
4003 fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
4005 fprintf(stderr,"T Text mode output\n");
4007 fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
4008 fprintf(stderr,"d,c Delete \\r in line feed and \\032, Add \\r in line feed\n");
4009 fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n");
4010 fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
4011 fprintf(stderr,"long name options\n");
4012 fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n");
4013 fprintf(stderr," --jis,--euc,--sjis,--utf8,--utf16,--mime,--base64 convert for the code\n");
4015 fprintf(stderr," --overwrite Overwrite original listed files by filtered result\n");
4017 fprintf(stderr," -g, --guess Guess the input code\n");
4018 fprintf(stderr," --help,--version\n");
4025 fprintf(stderr,"Network Kanji Filter Version %s (%s) "
4026 #if defined(MSDOS) && !defined(__WIN32__) && !defined(__WIN16__)
4029 #if defined(MSDOS) && defined(__WIN16__)
4032 #if defined(MSDOS) && defined(__WIN32__)
4038 ,Version,Patchlevel);
4039 fprintf(stderr,"\n%s\n",CopyRight);
4044 **
\e$B%Q%C%A@):n<T
\e(B
4045 ** void@merope.pleiades.or.jp (Kusakabe Youichi)
4046 ** NIDE Naoyuki <nide@ics.nara-wu.ac.jp>
4047 ** ohta@src.ricoh.co.jp (Junn Ohta)
4048 ** inouet@strl.nhk.or.jp (Tomoyuki Inoue)
4049 ** kiri@pulser.win.or.jp (Tetsuaki Kiriyama)
4050 ** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp>
4051 ** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe)
4052 ** kono@ie.u-ryukyu.ac.jp (Shinji Kono)
4053 ** GHG00637@nifty-serve.or.jp (COW)