1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
5 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
6 ** Copyright (C) 1996,1998
8 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
9 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
10 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
11 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
13 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
14 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
15 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
16 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
17 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
18 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
20 ** Everyone is permitted to do anything on this program
21 ** including copying, modifying, improving,
22 ** as long as you don't try to pretend that you wrote it.
23 ** i.e., the above copyright notice has to appear in all copies.
24 ** Binary distribution requires original version messages.
25 ** You don't have to ask before copying, redistribution or publishing.
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
29 /***********************************************************************
30 ** UTF-8
\e$B%5%]!<%H$K$D$$$F
\e(B
31 **
\e$B=>Mh$N
\e(B nkf
\e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9
\e(B
32 ** nkf -e
\e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G
\e(B UTF-8
\e$B$HH=Dj$5$l$l$P!"
\e(B
33 **
\e$B$=$N$^$^
\e(B euc-jp
\e$B$KJQ49$5$l$^$9
\e(B
35 **
\e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#
\e(B
36 ** (
\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O
\e(B)
38 **
\e$B2?$+LdBj$r8+$D$1$?$i!"
\e(B
39 ** E-Mail: furukawa@tcp-ip.or.jp
40 **
\e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#
\e(B
41 ***********************************************************************/
44 static char *CopyRight =
45 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2004 Kono, Furukawa";
46 static char *Version =
48 static char *Patchlevel =
55 ** USAGE: nkf [flags] [file]
58 ** b Output is bufferred (DEFAULT)
59 ** u Output is unbufferred
63 ** j Outout code is JIS 7 bit (DEFAULT SELECT)
64 ** s Output code is MS Kanji (DEFAULT SELECT)
65 ** e Output code is AT&T JIS (DEFAULT SELECT)
66 ** w Output code is AT&T JIS (DEFAULT SELECT)
67 ** l Output code is JIS 7bit and ISO8859-1 Latin-1
69 ** m MIME conversion for ISO-2022-JP
70 ** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
71 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
72 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
73 ** M MIME output conversion
75 ** r {de/en}crypt ROT13/47
79 ** T Text mode output (for MS-DOS)
81 ** x Do not convert X0201 kana into X0208
82 ** Z Convert X0208 alphabet to ASCII
87 ** B try to fix broken JIS, missing Escape
88 ** B[1-9] broken level
90 ** O Output to 'nkf.out' file or last file name
91 ** d Delete \r in line feed
92 ** c Add \r in line feed
93 ** -- other long option
94 ** -- ignore following option (don't use with -O )
98 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__)) && !defined(MSDOS)
100 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
113 #if defined(MSDOS) || defined(__OS2__)
120 #define setbinmode(fp) fsetbin(fp)
121 #else /* Microsoft C, Turbo C */
122 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
124 #else /* UNIX,OS/2 */
125 #define setbinmode(fp)
128 #ifdef _IOFBF /* SysV and MSDOS, Windows */
129 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
131 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
134 /*Borland C++ 4.5 EasyWin*/
135 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
144 /* added by satoru@isoternet.org */
147 #include <sys/stat.h>
148 #ifndef MSDOS /* UNIX, OS/2 */
152 #if defined(_MSC_VER) || defined(__MINGW32__) /* VC++, MinGW */
153 #include <sys/utime.h>
154 #elif defined(__TURBOC__) /* BCC */
156 #elif defined(LSI_C) /* LSI C */
168 /* state of output_mode and input_mode
185 /* Input Assumption */
189 #define LATIN1_INPUT 6
191 #define STRICT_MIME 8
196 #define JAPANESE_EUC 10
200 #define UTF8_INPUT 13
201 #define UTF16_INPUT 14
202 #define UTF16BE_INPUT 15
220 #define is_alnum(c) \
221 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
223 #define HOLD_SIZE 1024
224 #define IOBUF_SIZE 16384
226 #define DEFAULT_J 'B'
227 #define DEFAULT_R 'B'
229 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
230 #define SJ6394 0x0161 /* 63 - 94 ku offset */
232 #define RANGE_NUM_MAX 18
237 #if defined( UTF8_OUTPUT_ENABLE ) || defined( UTF8_INPUT_ENABLE )
238 #define sizeof_euc_utf8 94
239 #define sizeof_euc_to_utf8_1byte 94
240 #define sizeof_euc_to_utf8_2bytes 94
241 #define sizeof_utf8_to_euc_C2 64
242 #define sizeof_utf8_to_euc_E5B8 64
243 #define sizeof_utf8_to_euc_2bytes 112
244 #define sizeof_utf8_to_euc_3bytes 112
247 /* MIME preprocessor */
250 #ifdef EASYWIN /*Easy Win */
251 extern POINT _BufferSize;
254 /* function prototype */
256 #ifdef ANSI_C_PROTOTYPE
258 #define STATIC static
270 void (*status_func)PROTO((struct input_code *, int));
271 int (*iconv_func)PROTO((int c2, int c1, int c0));
275 STATIC char *input_codename = "";
277 STATIC int noconvert PROTO((FILE *f));
278 STATIC int kanji_convert PROTO((FILE *f));
279 STATIC int h_conv PROTO((FILE *f,int c2,int c1));
280 STATIC int push_hold_buf PROTO((int c2));
281 STATIC void set_iconv PROTO((int f, int (*iconv_func)()));
282 STATIC int s_iconv PROTO((int c2,int c1,int c0));
283 STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
284 STATIC int e_iconv PROTO((int c2,int c1,int c0));
285 #ifdef UTF8_INPUT_ENABLE
286 STATIC int w2e_conv PROTO((int c2,int c1,int c0,int *p2,int *p1));
287 STATIC int w_iconv PROTO((int c2,int c1,int c0));
288 STATIC int w_iconv16 PROTO((int c2,int c1,int c0));
289 STATIC int w_iconv_common PROTO((int c1,int c0,unsigned short **pp,int psize,int *p2,int *p1));
290 STATIC int ww16_conv PROTO((int c2, int c1, int c0));
292 #ifdef UTF8_OUTPUT_ENABLE
293 STATIC int e2w_conv PROTO((int c2,int c1));
294 STATIC void w_oconv PROTO((int c2,int c1));
295 STATIC void w_oconv16 PROTO((int c2,int c1));
297 STATIC void e_oconv PROTO((int c2,int c1));
298 STATIC void e2s_conv PROTO((int c2, int c1, int *p2, int *p1));
299 STATIC void s_oconv PROTO((int c2,int c1));
300 STATIC void j_oconv PROTO((int c2,int c1));
301 STATIC void fold_conv PROTO((int c2,int c1));
302 STATIC void cr_conv PROTO((int c2,int c1));
303 STATIC void z_conv PROTO((int c2,int c1));
304 STATIC void rot_conv PROTO((int c2,int c1));
305 STATIC void hira_conv PROTO((int c2,int c1));
306 STATIC void base64_conv PROTO((int c2,int c1));
307 STATIC void iso2022jp_check_conv PROTO((int c2,int c1));
308 STATIC void no_connection PROTO((int c2,int c1));
309 STATIC int no_connection2 PROTO((int c2,int c1,int c0));
311 STATIC void code_score PROTO((struct input_code *ptr));
312 STATIC void code_status PROTO((int c));
314 STATIC void std_putc PROTO((int c));
315 STATIC int std_getc PROTO((FILE *f));
316 STATIC int std_ungetc PROTO((int c,FILE *f));
318 STATIC int broken_getc PROTO((FILE *f));
319 STATIC int broken_ungetc PROTO((int c,FILE *f));
321 STATIC int mime_begin PROTO((FILE *f));
322 STATIC int mime_getc PROTO((FILE *f));
323 STATIC int mime_ungetc PROTO((int c,FILE *f));
325 STATIC int mime_begin_strict PROTO((FILE *f));
326 STATIC int mime_getc_buf PROTO((FILE *f));
327 STATIC int mime_ungetc_buf PROTO((int c,FILE *f));
328 STATIC int mime_integrity PROTO((FILE *f,unsigned char *p));
330 STATIC int base64decode PROTO((int c));
331 STATIC void mime_putc PROTO((int c));
332 STATIC void open_mime PROTO((int c));
333 STATIC void close_mime PROTO(());
334 STATIC void usage PROTO(());
335 STATIC void version PROTO(());
336 STATIC void options PROTO((unsigned char *c));
338 STATIC void reinit PROTO(());
343 static unsigned char stdibuf[IOBUF_SIZE];
344 static unsigned char stdobuf[IOBUF_SIZE];
345 static unsigned char hold_buf[HOLD_SIZE*2];
346 static int hold_count;
348 /* MIME preprocessor fifo */
350 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
351 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
352 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
353 static unsigned char mime_buf[MIME_BUF_SIZE];
354 static unsigned int mime_top = 0;
355 static unsigned int mime_last = 0; /* decoded */
356 static unsigned int mime_input = 0; /* undecoded */
359 static int unbuf_f = FALSE;
360 static int estab_f = FALSE;
361 static int nop_f = FALSE;
362 static int binmode_f = TRUE; /* binary mode */
363 static int rot_f = FALSE; /* rot14/43 mode */
364 static int hira_f = FALSE; /* hira/kata henkan */
365 static int input_f = FALSE; /* non fixed input code */
366 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
367 static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
368 static int mimebuf_f = FALSE; /* MIME buffered input */
369 static int broken_f = FALSE; /* convert ESC-less broken JIS */
370 static int iso8859_f = FALSE; /* ISO8859 through */
371 static int mimeout_f = FALSE; /* base64 mode */
372 #if defined(MSDOS) || defined(__OS2__)
373 static int x0201_f = TRUE; /* Assume JISX0201 kana */
375 static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
377 static int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
378 #ifdef UTF8_OUTPUT_ENABLE
379 static int w_oconv16_begin_f= 0; /* utf-16 header */
380 static int w_oconv16_LE = 0; /* utf-16 little endian */
384 #ifdef NUMCHAR_OPTION
386 #define CLASS_MASK 0x0f000000
387 #define CLASS_UTF16 0x01000000
391 static int cap_f = FALSE;
392 static int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */
393 static int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc;
394 STATIC int cap_getc PROTO((FILE *f));
395 STATIC int cap_ungetc PROTO((int c,FILE *f));
397 static int url_f = FALSE;
398 static int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
399 static int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc;
400 STATIC int url_getc PROTO((FILE *f));
401 STATIC int url_ungetc PROTO((int c,FILE *f));
403 static int numchar_f = FALSE;
404 static int (*i_ngetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
405 static int (*i_nungetc)PROTO((int c ,FILE *f)) = std_ungetc;
406 STATIC int numchar_getc PROTO((FILE *f));
407 STATIC int numchar_ungetc PROTO((int c,FILE *f));
411 static int noout_f = FALSE;
412 STATIC void no_putc PROTO((int c));
413 static int debug_f = FALSE;
414 STATIC void debug PROTO((char *str));
418 static int exec_f = 0;
421 #ifdef SHIFTJIS_CP932
422 STATIC int cp932_f = TRUE;
423 #define CP932_TABLE_BEGIN (0xfa)
424 #define CP932_TABLE_END (0xfc)
426 #endif /* SHIFTJIS_CP932 */
428 STATIC void e_status PROTO((struct input_code *, int));
429 STATIC void s_status PROTO((struct input_code *, int));
431 #ifdef UTF8_INPUT_ENABLE
432 STATIC void w_status PROTO((struct input_code *, int));
433 STATIC void w16_status PROTO((struct input_code *, int));
434 static int utf16_mode = UTF16_INPUT;
437 struct input_code input_code_list[] = {
438 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
439 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
440 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
441 {"UTF-16", 0, 0, 0, {0, 0, 0}, w16_status, w_iconv16, 0},
445 static int mimeout_mode = 0;
446 static int base64_count = 0;
448 /* X0208 -> ASCII converter */
451 static int f_line = 0; /* chars in line */
452 static int f_prev = 0;
453 static int fold_preserve_f = FALSE; /* preserve new lines */
454 static int fold_f = FALSE;
455 static int fold_len = 0;
458 static unsigned char kanji_intro = DEFAULT_J,
459 ascii_intro = DEFAULT_R;
463 #define FOLD_MARGIN 10
464 #define DEFAULT_FOLD 60
466 static int fold_margin = FOLD_MARGIN;
470 #ifdef DEFAULT_CODE_JIS
471 # define DEFAULT_CONV j_oconv
473 #ifdef DEFAULT_CODE_SJIS
474 # define DEFAULT_CONV s_oconv
476 #ifdef DEFAULT_CODE_EUC
477 # define DEFAULT_CONV e_oconv
479 #ifdef DEFAULT_CODE_UTF8
480 # define DEFAULT_CONV w_oconv
483 /* process default */
484 static void (*output_conv)PROTO((int c2,int c1)) = DEFAULT_CONV;
486 static void (*oconv)PROTO((int c2,int c1)) = no_connection;
487 /* s_iconv or oconv */
488 static int (*iconv)PROTO((int c2,int c1,int c0)) = no_connection2;
490 static void (*o_zconv)PROTO((int c2,int c1)) = no_connection;
491 static void (*o_fconv)PROTO((int c2,int c1)) = no_connection;
492 static void (*o_crconv)PROTO((int c2,int c1)) = no_connection;
493 static void (*o_rot_conv)PROTO((int c2,int c1)) = no_connection;
494 static void (*o_hira_conv)PROTO((int c2,int c1)) = no_connection;
495 static void (*o_base64conv)PROTO((int c2,int c1)) = no_connection;
496 static void (*o_iso2022jp_check_conv)PROTO((int c2,int c1)) = no_connection;
498 /* static redirections */
500 static void (*o_putc)PROTO((int c)) = std_putc;
502 static int (*i_getc)PROTO((FILE *f)) = std_getc; /* general input */
503 static int (*i_ungetc)PROTO((int c,FILE *f)) =std_ungetc;
505 static int (*i_bgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
506 static int (*i_bungetc)PROTO((int c ,FILE *f)) = std_ungetc;
508 static void (*o_mputc)PROTO((int c)) = std_putc ; /* output of mputc */
510 static int (*i_mgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
511 static int (*i_mungetc)PROTO((int c ,FILE *f)) = std_ungetc;
513 /* for strict mime */
514 static int (*i_mgetc_buf)PROTO((FILE *)) = std_getc; /* input of mgetc_buf */
515 static int (*i_mungetc_buf)PROTO((int c,FILE *f)) = std_ungetc;
518 static int output_mode = ASCII, /* output kanji mode */
519 input_mode = ASCII, /* input kanji mode */
520 shift_mode = FALSE; /* TRUE shift out, or X0201 */
521 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
523 /* X0201 / X0208 conversion tables */
525 /* X0201 kana conversion table */
528 unsigned char cv[]= {
529 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
530 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
531 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
532 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
533 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
534 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
535 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
536 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
537 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
538 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
539 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
540 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
541 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
542 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
543 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
544 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
548 /* X0201 kana conversion table for daguten */
551 unsigned char dv[]= {
552 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
553 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
554 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
555 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
556 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
557 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
558 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
559 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
560 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
561 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
562 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
563 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
564 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
565 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
566 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
567 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
570 /* X0201 kana conversion table for han-daguten */
573 unsigned char ev[]= {
574 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
575 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
576 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
577 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
578 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
579 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
580 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
581 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
582 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
583 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
584 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
585 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
586 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
587 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
588 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
589 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
593 /* X0208 kigou conversion table */
594 /* 0x8140 - 0x819e */
596 unsigned char fv[] = {
598 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
599 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
600 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
601 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
602 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
603 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
604 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
605 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
606 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
607 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
608 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
609 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
615 static int file_out = FALSE;
617 static int overwrite = FALSE;
620 static int crmode_f = 0; /* CR, NL, CRLF */
621 #ifdef EASYWIN /*Easy Win */
622 static int end_check;
637 #ifdef EASYWIN /*Easy Win */
638 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
641 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
642 cp = (unsigned char *)*argv;
647 if (pipe(fds) < 0 || (pid = fork()) < 0){
658 execvp(argv[1], &argv[1]);
672 if(x0201_f == WISH_TRUE)
673 x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
675 if (binmode_f == TRUE)
677 if (freopen("","wb",stdout) == NULL)
684 setbuf(stdout, (char *) NULL);
686 setvbuffer(stdout, stdobuf, IOBUF_SIZE);
689 if (binmode_f == TRUE)
691 if (freopen("","rb",stdin) == NULL) return (-1);
695 setvbuffer(stdin, stdibuf, IOBUF_SIZE);
699 kanji_convert(stdin);
702 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
711 /* reopen file for stdout */
712 if (file_out == TRUE) {
715 outfname = malloc(strlen(origfname)
716 + strlen(".nkftmpXXXXXX")
722 strcpy(outfname, origfname);
726 for (i = strlen(outfname); i; --i){
727 if (outfname[i - 1] == '/'
728 || outfname[i - 1] == '\\'){
734 strcat(outfname, "ntXXXXXX");
736 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC,
739 strcat(outfname, ".nkftmpXXXXXX");
740 fd = mkstemp(outfname);
743 || (fd_backup = dup(fileno(stdout))) < 0
744 || dup2(fd, fileno(stdout)) < 0
755 outfname = "nkf.out";
758 if(freopen(outfname, "w", stdout) == NULL) {
762 if (binmode_f == TRUE) {
764 if (freopen("","wb",stdout) == NULL)
771 if (binmode_f == TRUE)
773 if (freopen("","rb",fin) == NULL)
778 setvbuffer(fin, stdibuf, IOBUF_SIZE);
787 #if defined(MSDOS) && !defined(__MINGW32__)
795 if (dup2(fd_backup, fileno(stdout)) < 0){
798 if (stat(origfname, &sb)) {
799 fprintf(stderr, "Can't stat %s\n", origfname);
801 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
802 if (chmod(outfname, sb.st_mode)) {
803 fprintf(stderr, "Can't set permission %s\n", outfname);
806 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
807 #if defined(MSDOS) && !defined(__MINGW32__)
808 tb[0] = tb[1] = sb.st_mtime;
809 if (utime(outfname, tb)) {
810 fprintf(stderr, "Can't set timestamp %s\n", outfname);
813 tb.actime = sb.st_atime;
814 tb.modtime = sb.st_mtime;
815 if (utime(outfname, &tb)) {
816 fprintf(stderr, "Can't set timestamp %s\n", outfname);
820 if (unlink(origfname)){
824 if (rename(outfname, origfname)) {
826 fprintf(stderr, "Can't rename %s to %s\n",
827 outfname, origfname);
835 #ifdef EASYWIN /*Easy Win */
836 if (file_out == FALSE)
837 scanf("%d",&end_check);
840 #else /* for Other OS */
841 if (file_out == TRUE)
871 {"katakana-hiragana","h3"},
872 #ifdef UTF8_OUTPUT_ENABLE
876 #ifdef UTF8_INPUT_ENABLE
878 {"utf16-input", "W16"},
887 #ifdef NUMCHAR_OPTION
888 {"numchar-input", ""},
894 #ifdef SHIFTJIS_CP932
903 static int option_mode;
918 case '-': /* literal options */
919 if (!*cp) { /* ignore the rest of arguments */
923 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
925 p = (unsigned char *)long_option[i].name;
926 for (j=0;*p && *p++ == cp[j];j++);
927 if (! *p && !cp[j]) break;
930 cp = (unsigned char *)long_option[i].alias;
933 if (strcmp(long_option[i].name, "overwrite") == 0){
940 if (strcmp(long_option[i].name, "cap-input") == 0){
944 if (strcmp(long_option[i].name, "url-input") == 0){
949 #ifdef NUMCHAR_OPTION
950 if (strcmp(long_option[i].name, "numchar-input") == 0){
956 if (strcmp(long_option[i].name, "no-output") == 0){
960 if (strcmp(long_option[i].name, "debug") == 0){
965 #ifdef SHIFTJIS_CP932
966 if (strcmp(long_option[i].name, "no-cp932") == 0){
972 if (strcmp(long_option[i].name, "exec-in") == 0){
976 if (strcmp(long_option[i].name, "exec-out") == 0){
983 case 'b': /* buffered mode */
986 case 'u': /* non bufferd mode */
989 case 't': /* transparent mode */
992 case 'j': /* JIS output */
994 output_conv = j_oconv;
996 case 'e': /* AT&T EUC output */
997 output_conv = e_oconv;
999 case 's': /* SJIS output */
1000 output_conv = s_oconv;
1002 case 'l': /* ISO8859 Latin-1 support, no conversion */
1003 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
1004 input_f = LATIN1_INPUT;
1006 case 'i': /* Kanji IN ESC-$-@/B */
1007 if (*cp=='@'||*cp=='B')
1008 kanji_intro = *cp++;
1010 case 'o': /* ASCII IN ESC-(-J/B */
1011 if (*cp=='J'||*cp=='B'||*cp=='H')
1012 ascii_intro = *cp++;
1019 if ('9'>= *cp && *cp>='0')
1020 hira_f |= (*cp++ -'0');
1027 #if defined(MSDOS) || defined(__OS2__)
1042 #ifdef UTF8_OUTPUT_ENABLE
1043 case 'w': /* UTF-8 output */
1044 if ('1'== cp[0] && '6'==cp[1]) {
1045 output_conv = w_oconv16; cp+=2;
1047 w_oconv16_begin_f=2; cp++;
1050 w_oconv16_begin_f=1; cp++;
1052 } else if (cp[0] == 'B') {
1053 w_oconv16_begin_f=2; cp++;
1055 w_oconv16_begin_f=1; cp++;
1059 output_conv = w_oconv;
1062 #ifdef UTF8_INPUT_ENABLE
1063 case 'W': /* UTF-8 input */
1064 if ('1'== cp[0] && '6'==cp[1]) {
1065 input_f = UTF16_INPUT;
1067 input_f = UTF8_INPUT;
1070 /* Input code assumption */
1071 case 'J': /* JIS input */
1072 case 'E': /* AT&T EUC input */
1073 input_f = JIS_INPUT;
1075 case 'S': /* MS Kanji input */
1076 input_f = SJIS_INPUT;
1077 if (x0201_f==NO_X0201) x0201_f=TRUE;
1079 case 'Z': /* Convert X0208 alphabet to asii */
1080 /* bit:0 Convert X0208
1081 bit:1 Convert Kankaku to one space
1082 bit:2 Convert Kankaku to two spaces
1083 bit:3 Convert HTML Entity
1085 if ('9'>= *cp && *cp>='0')
1086 alpha_f |= 1<<(*cp++ -'0');
1090 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
1091 x0201_f = FALSE; /* No X0201->X0208 conversion */
1093 ESC-(-I in JIS, EUC, MS Kanji
1094 SI/SO in JIS, EUC, MS Kanji
1095 SSO in EUC, JIS, not in MS Kanji
1096 MS Kanji (0xa0-0xdf)
1098 ESC-(-I in JIS (0x20-0x5f)
1099 SSO in EUC (0xa0-0xdf)
1100 0xa0-0xd in MS Kanji (0xa0-0xdf)
1103 case 'X': /* Assume X0201 kana */
1104 /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1107 case 'F': /* prserve new lines */
1108 fold_preserve_f = TRUE;
1109 case 'f': /* folding -f60 or -f */
1112 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1114 fold_len += *cp++ - '0';
1116 if (!(0<fold_len && fold_len<BUFSIZ))
1117 fold_len = DEFAULT_FOLD;
1121 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1123 fold_margin += *cp++ - '0';
1127 case 'm': /* MIME support */
1128 if (*cp=='B'||*cp=='Q') {
1129 mime_decode_mode = *cp++;
1130 mimebuf_f = FIXED_MIME;
1131 } else if (*cp=='N') {
1132 mime_f = TRUE; cp++;
1133 } else if (*cp=='S') {
1134 mime_f = STRICT_MIME; cp++;
1135 } else if (*cp=='0') {
1136 mime_f = FALSE; cp++;
1139 case 'M': /* MIME output */
1142 mimeout_f = FIXED_MIME; cp++;
1143 } else if (*cp=='Q') {
1145 mimeout_f = FIXED_MIME; cp++;
1150 case 'B': /* Broken JIS support */
1152 bit:1 allow any x on ESC-(-x or ESC-$-x
1153 bit:2 reset to ascii on NL
1155 if ('9'>= *cp && *cp>='0')
1156 broken_f |= 1<<(*cp++ -'0');
1161 case 'O':/* for Output file */
1165 case 'c':/* add cr code */
1168 case 'd':/* delete cr code */
1171 case 'I': /* ISO-2022-JP output */
1174 case 'L': /* line mode */
1175 if (*cp=='u') { /* unix */
1176 crmode_f = NL; cp++;
1177 } else if (*cp=='m') { /* mac */
1178 crmode_f = CR; cp++;
1179 } else if (*cp=='w') { /* windows */
1180 crmode_f = CRLF; cp++;
1181 } else if (*cp=='0') { /* no conversion */
1186 /* module muliple options in a string are allowed for Perl moudle */
1187 while(*cp && *cp!='-') cp++;
1191 /* bogus option but ignored */
1197 #ifdef ANSI_C_PROTOTYPE
1198 struct input_code * find_inputcode_byfunc(int (*iconv_func)(int c2,int c1,int c0))
1200 struct input_code * find_inputcode_byfunc(iconv_func)
1201 int (*iconv_func)();
1205 struct input_code *p = input_code_list;
1207 if (iconv_func == p->iconv_func){
1216 #ifdef ANSI_C_PROTOTYPE
1217 void set_iconv(int f, int (*iconv_func)(int c2,int c1,int c0))
1219 void set_iconv(f, iconv_func)
1221 int (*iconv_func)();
1225 static int (*iconv_for_check)() = 0;
1227 #ifdef INPUT_CODE_FIX
1235 #ifdef INPUT_CODE_FIX
1236 && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1242 if (estab_f && iconv_for_check != iconv){
1243 struct input_code *p = find_inputcode_byfunc(iconv);
1245 debug(input_codename = p->name);
1247 iconv_for_check = iconv;
1252 #define SCORE_L2 (1) /*
\e$BBh
\e(B2
\e$B?e=`4A;z
\e(B */
1253 #define SCORE_KANA (SCORE_L2 << 1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
1254 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
1255 #ifdef SHIFTJIS_CP932
1256 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932
\e$B$K$h$kFI$_49$(
\e(B */
1257 #define SCORE_NO_EXIST (SCORE_CP932 << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1259 #define SCORE_NO_EXIST (SCORE_DEPEND << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1261 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME
\e$B$K$h$k;XDj
\e(B */
1262 #define SCORE_ERROR (SCORE_iMIME << 1) /*
\e$B%(%i!<
\e(B */
1264 #define SCORE_INIT (SCORE_iMIME)
1266 int score_table_A0[] = {
1269 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1270 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1273 int score_table_F0[] = {
1274 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1275 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1276 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1277 SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1280 void set_code_score(ptr, score)
1281 struct input_code *ptr;
1285 ptr->score |= score;
1289 void clr_code_score(ptr, score)
1290 struct input_code *ptr;
1294 ptr->score &= ~score;
1298 void code_score(ptr)
1299 struct input_code *ptr;
1301 int c2 = ptr->buf[0];
1302 int c1 = ptr->buf[1];
1304 set_code_score(ptr, SCORE_ERROR);
1305 }else if (c2 == SSO){
1306 set_code_score(ptr, SCORE_KANA);
1307 #ifdef UTF8_OUTPUT_ENABLE
1308 }else if (!e2w_conv(c2, c1)){
1309 set_code_score(ptr, SCORE_NO_EXIST);
1311 }else if ((c2 & 0x70) == 0x20){
1312 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1313 }else if ((c2 & 0x70) == 0x70){
1314 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1315 }else if ((c2 & 0x70) >= 0x50){
1316 set_code_score(ptr, SCORE_L2);
1320 void status_disable(ptr)
1321 struct input_code *ptr;
1326 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1329 void status_push_ch(ptr, c)
1330 struct input_code *ptr;
1333 ptr->buf[ptr->index++] = c;
1336 void status_clear(ptr)
1337 struct input_code *ptr;
1343 void status_reset(ptr)
1344 struct input_code *ptr;
1347 ptr->score = SCORE_INIT;
1350 void status_reinit(ptr)
1351 struct input_code *ptr;
1354 ptr->_file_stat = 0;
1357 void status_check(ptr, c)
1358 struct input_code *ptr;
1361 if (c <= DEL && estab_f){
1366 void s_status(ptr, c)
1367 struct input_code *ptr;
1372 status_check(ptr, c);
1377 #ifdef NUMCHAR_OPTION
1378 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1381 }else if (0xa1 <= c && c <= 0xdf){
1382 status_push_ch(ptr, SSO);
1383 status_push_ch(ptr, c);
1386 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
1388 status_push_ch(ptr, c);
1389 #ifdef SHIFTJIS_CP932
1391 && CP932_TABLE_BEGIN <= c && c <= CP932_TABLE_END){
1393 status_push_ch(ptr, c);
1394 #endif /* SHIFTJIS_CP932 */
1396 status_disable(ptr);
1400 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1401 status_push_ch(ptr, c);
1402 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
1406 status_disable(ptr);
1409 #ifdef SHIFTJIS_CP932
1411 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1412 status_push_ch(ptr, c);
1413 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
1414 set_code_score(ptr, SCORE_CP932);
1419 status_disable(ptr);
1421 #endif /* SHIFTJIS_CP932 */
1425 void e_status(ptr, c)
1426 struct input_code *ptr;
1431 status_check(ptr, c);
1436 #ifdef NUMCHAR_OPTION
1437 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1440 }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
1442 status_push_ch(ptr, c);
1444 status_disable(ptr);
1448 if (0xa1 <= c && c <= 0xfe){
1449 status_push_ch(ptr, c);
1453 status_disable(ptr);
1459 #ifdef UTF8_INPUT_ENABLE
1460 void w16_status(ptr, c)
1461 struct input_code *ptr;
1468 if (ptr->_file_stat == 0){
1469 if (c == 0xfe || c == 0xff){
1471 status_push_ch(ptr, c);
1472 ptr->_file_stat = 1;
1474 status_disable(ptr);
1475 ptr->_file_stat = -1;
1477 }else if (ptr->_file_stat > 0){
1479 status_push_ch(ptr, c);
1480 }else if (ptr->_file_stat < 0){
1481 status_disable(ptr);
1487 status_disable(ptr);
1488 ptr->_file_stat = -1;
1490 status_push_ch(ptr, c);
1497 if (ptr->stat != c && (c == 0xfe || c == 0xff)){
1498 status_push_ch(ptr, c);
1501 status_disable(ptr);
1502 ptr->_file_stat = -1;
1508 void w_status(ptr, c)
1509 struct input_code *ptr;
1514 status_check(ptr, c);
1519 #ifdef NUMCHAR_OPTION
1520 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1523 }else if (0xc0 <= c && c <= 0xdf){
1525 status_push_ch(ptr, c);
1526 }else if (0xe0 <= c && c <= 0xef){
1528 status_push_ch(ptr, c);
1530 status_disable(ptr);
1535 if (0x80 <= c && c <= 0xbf){
1536 status_push_ch(ptr, c);
1537 if (ptr->index > ptr->stat){
1538 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
1539 && ptr->buf[2] == 0xbf);
1540 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
1541 &ptr->buf[0], &ptr->buf[1]);
1548 status_disable(ptr);
1559 int action_flag = 1;
1560 struct input_code *result = 0;
1561 struct input_code *p = input_code_list;
1563 (p->status_func)(p, c);
1566 }else if(p->stat == 0){
1577 if (result && !estab_f){
1578 set_iconv(TRUE, result->iconv_func);
1579 }else if (c <= DEL){
1580 struct input_code *ptr = input_code_list;
1590 #define STD_GC_BUFSIZE (256)
1591 int std_gc_buf[STD_GC_BUFSIZE];
1601 return std_gc_buf[--std_gc_ndx];
1613 if (std_gc_ndx == STD_GC_BUFSIZE){
1616 std_gc_buf[std_gc_ndx++] = c;
1636 while ((c = (*i_getc)(f)) != EOF)
1645 oconv = output_conv;
1648 /* replace continucation module, from output side */
1650 /* output redicrection */
1659 if (mimeout_f == TRUE) {
1660 o_base64conv = oconv; oconv = base64_conv;
1662 /* base64_count = 0; */
1666 o_crconv = oconv; oconv = cr_conv;
1669 o_rot_conv = oconv; oconv = rot_conv;
1672 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
1675 o_hira_conv = oconv; oconv = hira_conv;
1678 o_fconv = oconv; oconv = fold_conv;
1681 if (alpha_f || x0201_f) {
1682 o_zconv = oconv; oconv = z_conv;
1686 i_ungetc = std_ungetc;
1687 /* input redicrection */
1690 i_cgetc = i_getc; i_getc = cap_getc;
1691 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
1694 i_ugetc = i_getc; i_getc = url_getc;
1695 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
1698 #ifdef NUMCHAR_OPTION
1700 i_ngetc = i_getc; i_getc = numchar_getc;
1701 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
1704 if (mime_f && mimebuf_f==FIXED_MIME) {
1705 i_mgetc = i_getc; i_getc = mime_getc;
1706 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
1709 i_bgetc = i_getc; i_getc = broken_getc;
1710 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
1712 if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
1713 set_iconv(-TRUE, e_iconv);
1714 } else if (input_f == SJIS_INPUT) {
1715 set_iconv(-TRUE, s_iconv);
1716 #ifdef UTF8_INPUT_ENABLE
1717 } else if (input_f == UTF8_INPUT) {
1718 set_iconv(-TRUE, w_iconv);
1719 } else if (input_f == UTF16_INPUT) {
1720 set_iconv(-TRUE, w_iconv16);
1723 set_iconv(FALSE, e_iconv);
1727 struct input_code *p = input_code_list;
1735 Conversion main loop. Code detection only.
1745 module_connection();
1750 output_mode = ASCII;
1753 #define NEXT continue /* no output, get next */
1754 #define SEND ; /* output c1 and c2, get next */
1755 #define LAST break /* end of loop, go closing */
1757 while ((c1 = (*i_getc)(f)) != EOF) {
1762 /* in case of 8th bit is on */
1764 /* in case of not established yet */
1765 /* It is still ambiguious */
1766 if (h_conv(f, c2, c1)==EOF)
1772 /* in case of already established */
1774 /* ignore bogus code */
1780 /* second byte, 7 bit code */
1781 /* it might be kanji shitfted */
1782 if ((c1 == DEL) || (c1 <= SPACE)) {
1783 /* ignore bogus first code */
1791 #ifdef UTF8_INPUT_ENABLE
1800 #ifdef NUMCHAR_OPTION
1801 } else if ((c1 & CLASS_MASK) == CLASS_UTF16){
1804 } else if (c1 > DEL) {
1806 if (!estab_f && !iso8859_f) {
1807 /* not established yet */
1810 } else { /* estab_f==TRUE */
1815 } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
1816 /* SJIS X0201 Case... */
1817 if(iso2022jp_f && x0201_f==NO_X0201) {
1818 (*oconv)(GETA1, GETA2);
1825 } else if (c1==SSO && iconv != s_iconv) {
1826 /* EUC X0201 Case */
1827 c1 = (*i_getc)(f); /* skip SSO */
1829 if (SSP<=c1 && c1<0xe0) {
1830 if(iso2022jp_f && x0201_f==NO_X0201) {
1831 (*oconv)(GETA1, GETA2);
1838 } else { /* bogus code, skip SSO and one byte */
1842 /* already established */
1847 } else if ((c1 > SPACE) && (c1 != DEL)) {
1848 /* in case of Roman characters */
1850 /* output 1 shifted byte */
1854 } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
1855 /* output 1 shifted byte */
1856 if(iso2022jp_f && x0201_f==NO_X0201) {
1857 (*oconv)(GETA1, GETA2);
1864 /* look like bogus code */
1867 } else if (input_mode == X0208) {
1868 /* in case of Kanji shifted */
1871 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
1872 /* Check MIME code */
1873 if ((c1 = (*i_getc)(f)) == EOF) {
1876 } else if (c1 == '?') {
1877 /* =? is mime conversion start sequence */
1878 if(mime_f == STRICT_MIME) {
1879 /* check in real detail */
1880 if (mime_begin_strict(f) == EOF)
1884 } else if (mime_begin(f) == EOF)
1894 /* normal ASCII code */
1897 } else if (c1 == SI) {
1900 } else if (c1 == SO) {
1903 } else if (c1 == ESC ) {
1904 if ((c1 = (*i_getc)(f)) == EOF) {
1905 /* (*oconv)(0, ESC); don't send bogus code */
1907 } else if (c1 == '$') {
1908 if ((c1 = (*i_getc)(f)) == EOF) {
1910 (*oconv)(0, ESC); don't send bogus code
1911 (*oconv)(0, '$'); */
1913 } else if (c1 == '@'|| c1 == 'B') {
1914 /* This is kanji introduction */
1917 debug(input_codename = "ISO-2022-JP");
1919 } else if (c1 == '(') {
1920 if ((c1 = (*i_getc)(f)) == EOF) {
1921 /* don't send bogus code
1927 } else if (c1 == '@'|| c1 == 'B') {
1928 /* This is kanji introduction */
1933 /* could be some special code */
1940 } else if (broken_f&0x2) {
1941 /* accept any ESC-(-x as broken code ... */
1951 } else if (c1 == '(') {
1952 if ((c1 = (*i_getc)(f)) == EOF) {
1953 /* don't send bogus code
1955 (*oconv)(0, '('); */
1959 /* This is X0201 kana introduction */
1960 input_mode = X0201; shift_mode = X0201;
1962 } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
1963 /* This is X0208 kanji introduction */
1964 input_mode = ASCII; shift_mode = FALSE;
1966 } else if (broken_f&0x2) {
1967 input_mode = ASCII; shift_mode = FALSE;
1972 /* maintain various input_mode here */
1976 } else if ( c1 == 'N' || c1 == 'n' ){
1978 c3 = (*i_getc)(f); /* skip SS2 */
1979 if ( (SPACE<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
1994 } else if ((c1 == NL || c1 == CR) && broken_f&4) {
1995 input_mode = ASCII; set_iconv(FALSE, 0);
2001 if (input_mode == X0208)
2002 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2003 else if (input_mode)
2004 (*oconv)(input_mode, c1); /* other special case */
2005 else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */
2006 int c0 = (*i_getc)(f);
2009 (*iconv)(c2, c1, c0);
2015 /* goto next_word */
2019 (*iconv)(EOF, 0, 0);
2032 /** it must NOT be in the kanji shifte sequence */
2033 /** it must NOT be written in JIS7 */
2034 /** and it must be after 2 byte 8bit code */
2041 while ((c1 = (*i_getc)(f)) != EOF) {
2047 if (push_hold_buf(c1) == EOF || estab_f){
2053 struct input_code *p = input_code_list;
2054 struct input_code *result = p;
2059 if (p->score < result->score){
2064 set_iconv(FALSE, result->iconv_func);
2069 ** 1) EOF is detected, or
2070 ** 2) Code is established, or
2071 ** 3) Buffer is FULL (but last word is pushed)
2073 ** in 1) and 3) cases, we continue to use
2074 ** Kanji codes by oconv and leave estab_f unchanged.
2079 while (wc < hold_count){
2080 c2 = hold_buf[wc++];
2082 #ifdef NUMCHAR_OPTION
2083 || (c2 & CLASS_MASK) == CLASS_UTF16
2088 }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
2089 (*iconv)(X0201, c2, 0);
2092 if (wc < hold_count){
2093 c1 = hold_buf[wc++];
2102 if ((*iconv)(c2, c1, 0) < 0){
2104 if (wc < hold_count){
2105 c0 = hold_buf[wc++];
2114 (*iconv)(c2, c1, c0);
2127 if (hold_count >= HOLD_SIZE*2)
2129 hold_buf[hold_count++] = c2;
2130 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
2133 int s2e_conv(c2, c1, p2, p1)
2137 #ifdef SHIFTJIS_CP932
2138 if (cp932_f && CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){
2139 extern unsigned short shiftjis_cp932[3][189];
2140 c1 = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
2141 if (c1 == 0) return 1;
2145 #endif /* SHIFTJIS_CP932 */
2146 c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
2148 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
2165 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2168 int ret = s2e_conv(c2, c1, &c2, &c1);
2169 if (ret) return ret;
2182 } else if (c2 == SSO){
2185 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2195 #ifdef UTF8_INPUT_ENABLE
2197 w2e_conv(c2, c1, c0, p2, p1)
2201 extern unsigned short * utf8_to_euc_2bytes[];
2202 extern unsigned short ** utf8_to_euc_3bytes[];
2205 if (0xc0 <= c2 && c2 <= 0xef) {
2206 unsigned short **pp;
2209 if (c0 == 0) return -1;
2210 pp = utf8_to_euc_3bytes[c2 - 0x80];
2211 ret = w_iconv_common(c1, c0, pp, sizeof_utf8_to_euc_C2, p2, p1);
2213 ret = w_iconv_common(c2, c1, utf8_to_euc_2bytes, sizeof_utf8_to_euc_2bytes, p2, p1);
2215 #ifdef NUMCHAR_OPTION
2218 if (p1) *p1 = CLASS_UTF16 | ww16_conv(c2, c1, c0);
2223 } else if (c2 == X0201) {
2236 int ret = w2e_conv(c2, c1, c0, &c2, &c1);
2244 w16w_conv(val, p2, p1, p0)
2252 }else if (val < 0x800){
2253 *p2 = 0xc0 | (val >> 6);
2254 *p1 = 0x80 | (val & 0x3f);
2257 *p2 = 0xe0 | (val >> 12);
2258 *p1 = 0x80 | ((val >> 6) & 0x3f);
2259 *p0 = 0x80 | (val & 0x3f);
2264 ww16_conv(c2, c1, c0)
2269 val = (c2 & 0x0f) << 12;
2270 val |= (c1 & 0x3f) << 6;
2272 }else if (c2 >= 0xc0){
2273 val = (c2 & 0x1f) << 6;
2274 val |= (c1 & 0x3f) << 6;
2282 w16e_conv(val, p2, p1)
2286 extern unsigned short * utf8_to_euc_2bytes[];
2287 extern unsigned short ** utf8_to_euc_3bytes[];
2289 unsigned short **pp;
2293 w16w_conv(val, &c2, &c1, &c0);
2296 pp = utf8_to_euc_3bytes[c2 - 0x80];
2297 psize = sizeof_utf8_to_euc_C2;
2298 ret = w_iconv_common(c1, c0, pp, psize, p2, p1);
2300 pp = utf8_to_euc_2bytes;
2301 psize = sizeof_utf8_to_euc_2bytes;
2302 ret = w_iconv_common(c2, c1, pp, psize, p2, p1);
2304 #ifdef NUMCHAR_OPTION
2307 *p1 = CLASS_UTF16 | val;
2316 w_iconv16(c2, c1, c0)
2321 if (c2==0376 && c1==0377){
2322 utf16_mode = UTF16_INPUT;
2324 } else if (c2==0377 && c1==0376){
2325 utf16_mode = UTF16BE_INPUT;
2328 if (utf16_mode == UTF16BE_INPUT) {
2330 tmp=c1; c1=c2; c2=tmp;
2332 if ((c2==0 && c1 < 0x80) || c2==EOF) {
2336 ret = w16e_conv(((c2<<8)&0xff00) + c1, &c2, &c1);
2337 if (ret) return ret;
2343 w_iconv_common(c1, c0, pp, psize, p2, p1)
2345 unsigned short **pp;
2353 if (pp == 0) return 1;
2356 if (c1 < 0 || psize <= c1) return 1;
2358 if (p == 0) return 1;
2361 if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1;
2363 if (val == 0) return 1;
2366 if (c2 == SO) c2 = X0201;
2375 #ifdef UTF8_OUTPUT_ENABLE
2380 extern unsigned short euc_to_utf8_1byte[];
2381 extern unsigned short * euc_to_utf8_2bytes[];
2385 p = euc_to_utf8_1byte;
2388 c2 = (c2&0x7f) - 0x21;
2389 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2390 p = euc_to_utf8_2bytes[c2];
2395 c1 = (c1 & 0x7f) - 0x21;
2396 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
2407 #ifdef NUMCHAR_OPTION
2408 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2409 w16w_conv(c1, &c2, &c1, &c0);
2413 if (c0) (*o_putc)(c0);
2420 } else if (c2 == 0) {
2421 output_mode = ASCII;
2423 } else if (c2 == ISO8859_1) {
2424 output_mode = ISO8859_1;
2425 (*o_putc)(c1 | 0x080);
2428 w16w_conv((unsigned short)e2w_conv(c2, c1), &c2, &c1, &c0);
2432 if (c0) (*o_putc)(c0);
2447 if (w_oconv16_begin_f==2) {
2449 (*o_putc)((unsigned char)'\377');
2453 (*o_putc)((unsigned char)'\377');
2455 w_oconv16_begin_f=1;
2458 if (c2 == ISO8859_1) {
2461 #ifdef NUMCHAR_OPTION
2462 } else if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16) {
2463 c2 = (c1 >> 8) & 0xff;
2467 unsigned short val = (unsigned short)e2w_conv(c2, c1);
2468 c2 = (val >> 8) & 0xff;
2487 #ifdef NUMCHAR_OPTION
2488 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2489 w16e_conv(c1, &c2, &c1);
2495 } else if (c2 == 0) {
2496 output_mode = ASCII;
2498 } else if (c2 == X0201) {
2499 output_mode = JAPANESE_EUC;
2500 (*o_putc)(SSO); (*o_putc)(c1|0x80);
2501 } else if (c2 == ISO8859_1) {
2502 output_mode = ISO8859_1;
2503 (*o_putc)(c1 | 0x080);
2505 if ((c1<0x21 || 0x7e<c1) ||
2506 (c2<0x21 || 0x7e<c2)) {
2507 set_iconv(FALSE, 0);
2508 return; /* too late to rescue this char */
2510 output_mode = JAPANESE_EUC;
2511 (*o_putc)(c2 | 0x080);
2512 (*o_putc)(c1 | 0x080);
2517 e2s_conv(c2, c1, p2, p1)
2518 int c2, c1, *p2, *p1;
2520 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
2521 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
2529 #ifdef NUMCHAR_OPTION
2530 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2531 w16e_conv(c1, &c2, &c1);
2537 } else if (c2 == 0) {
2538 output_mode = ASCII;
2540 } else if (c2 == X0201) {
2541 output_mode = SHIFT_JIS;
2543 } else if (c2 == ISO8859_1) {
2544 output_mode = ISO8859_1;
2545 (*o_putc)(c1 | 0x080);
2547 if ((c1<0x20 || 0x7e<c1) ||
2548 (c2<0x20 || 0x7e<c2)) {
2549 set_iconv(FALSE, 0);
2550 return; /* too late to rescue this char */
2552 output_mode = SHIFT_JIS;
2553 e2s_conv(c2, c1, &c2, &c1);
2564 #ifdef NUMCHAR_OPTION
2565 if ((c1 & CLASS_MASK) == CLASS_UTF16){
2566 w16e_conv(c1, &c2, &c1);
2570 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2573 (*o_putc)(ascii_intro);
2574 output_mode = ASCII;
2577 } else if (c2==X0201) {
2578 if (output_mode!=X0201) {
2579 output_mode = X0201;
2585 } else if (c2==ISO8859_1) {
2586 /* iso8859 introduction, or 8th bit on */
2587 /* Can we convert in 7bit form using ESC-'-'-A ?
2589 output_mode = ISO8859_1;
2591 } else if (c2 == 0) {
2592 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2595 (*o_putc)(ascii_intro);
2596 output_mode = ASCII;
2600 if (output_mode != X0208) {
2601 output_mode = X0208;
2604 (*o_putc)(kanji_intro);
2606 if (c1<0x20 || 0x7e<c1)
2608 if (c2<0x20 || 0x7e<c2)
2620 if (base64_count>50 && !mimeout_mode && c2==0 && c1==SPACE) {
2622 } else if (base64_count>66 && mimeout_mode) {
2623 (*o_base64conv)(EOF,0);
2625 (*o_putc)('\t'); base64_count += 7;
2627 (*o_base64conv)(c2,c1);
2631 static int broken_buf[3];
2632 static int broken_counter = 0;
2633 static int broken_last = 0;
2640 if (broken_counter>0) {
2641 return broken_buf[--broken_counter];
2644 if (c=='$' && broken_last != ESC
2645 && (input_mode==ASCII || input_mode==X0201)) {
2648 if (c1=='@'|| c1=='B') {
2649 broken_buf[0]=c1; broken_buf[1]=c;
2656 } else if (c=='(' && broken_last != ESC
2657 && (input_mode==X0208 || input_mode==X0201)) { /* ) */
2660 if (c1=='J'|| c1=='B') {
2661 broken_buf[0]=c1; broken_buf[1]=c;
2679 if (broken_counter<2)
2680 broken_buf[broken_counter++]=c;
2684 static int prev_cr = 0;
2692 if (! (c2==0&&c1==NL) ) {
2698 } else if (c1=='\r') {
2700 } else if (c1=='\n') {
2701 if (crmode_f==CRLF) {
2702 (*o_crconv)(0,'\r');
2703 } else if (crmode_f==CR) {
2704 (*o_crconv)(0,'\r');
2708 } else if (c1!='\032' || crmode_f!=NL){
2714 Return value of fold_conv()
2716 \n add newline and output char
2717 \r add newline and output nothing
2720 1 (or else) normal output
2722 fold state in prev (previous character)
2724 >0x80 Japanese (X0208/X0201)
2729 This fold algorthm does not preserve heading space in a line.
2730 This is the main difference from fmt.
2733 #define char_size(c2,c1) (c2?2:1)
2742 if (c1== '\r' && !fold_preserve_f) {
2743 fold_state=0; /* ignore cr */
2744 }else if (c1== '\n'&&f_prev=='\r' && fold_preserve_f) {
2746 fold_state=0; /* ignore cr */
2747 } else if (c1== BS) {
2748 if (f_line>0) f_line--;
2750 } else if (c2==EOF && f_line != 0) { /* close open last line */
2752 } else if ((c1=='\n' && !fold_preserve_f)
2753 || ((c1=='\r'||(c1=='\n'&&f_prev!='\r'))
2754 && fold_preserve_f)) {
2756 if (fold_preserve_f) {
2760 } else if ((f_prev == c1 && !fold_preserve_f)
2761 || (f_prev == '\n' && fold_preserve_f)
2762 ) { /* duplicate newline */
2765 fold_state = '\n'; /* output two newline */
2771 if (f_prev&0x80) { /* Japanese? */
2773 fold_state = 0; /* ignore given single newline */
2774 } else if (f_prev==' ') {
2778 if (++f_line<=fold_len)
2782 fold_state = '\r'; /* fold and output nothing */
2786 } else if (c1=='\f') {
2791 fold_state = '\n'; /* output newline and clear */
2792 } else if ( (c2==0 && c1==' ')||
2793 (c2==0 && c1=='\t')||
2794 (c2=='!'&& c1=='!')) {
2795 /* X0208 kankaku or ascii space */
2796 if (f_prev == ' ') {
2797 fold_state = 0; /* remove duplicate spaces */
2800 if (++f_line<=fold_len)
2801 fold_state = ' '; /* output ASCII space only */
2803 f_prev = ' '; f_line = 0;
2804 fold_state = '\r'; /* fold and output nothing */
2808 prev0 = f_prev; /* we still need this one... , but almost done */
2810 if (c2 || c2==X0201)
2811 f_prev |= 0x80; /* this is Japanese */
2812 f_line += char_size(c2,c1);
2813 if (f_line<=fold_len) { /* normal case */
2816 if (f_line>=fold_len+fold_margin) { /* too many kinsou suspension */
2817 f_line = char_size(c2,c1);
2818 fold_state = '\n'; /* We can't wait, do fold now */
2819 } else if (c2==X0201) {
2820 /* simple kinsoku rules return 1 means no folding */
2821 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
2822 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
2823 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
2824 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
2825 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
2826 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
2827 else if (SPACE<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
2829 fold_state = '\n';/* add one new f_line before this character */
2832 fold_state = '\n';/* add one new f_line before this character */
2835 /* kinsoku point in ASCII */
2836 if ( c1==')'|| /* { [ ( */
2847 /* just after special */
2848 } else if (!is_alnum(prev0)) {
2849 f_line = char_size(c2,c1);
2851 } else if ((prev0==' ') || /* ignored new f_line */
2852 (prev0=='\n')|| /* ignored new f_line */
2853 (prev0&0x80)) { /* X0208 - ASCII */
2854 f_line = char_size(c2,c1);
2855 fold_state = '\n';/* add one new f_line before this character */
2857 fold_state = 1; /* default no fold in ASCII */
2861 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
2862 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
2863 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
2864 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
2865 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
2866 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
2867 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
2868 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
2869 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
2870 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
2871 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
2872 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
2873 /* default no fold in kinsoku */
2876 f_line = char_size(c2,c1);
2877 /* add one new f_line before this character */
2880 f_line = char_size(c2,c1);
2882 /* add one new f_line before this character */
2887 /* terminator process */
2888 switch(fold_state) {
2907 int z_prev2=0,z_prev1=0;
2914 /* if (c2) c1 &= 0x7f; assertion */
2916 if (x0201_f && z_prev2==X0201) { /* X0201 */
2917 if (c1==(0xde&0x7f)) { /*
\e$BByE@
\e(B */
2919 (*o_zconv)(dv[(z_prev1-SPACE)*2],dv[(z_prev1-SPACE)*2+1]);
2921 } else if (c1==(0xdf&0x7f)&&ev[(z_prev1-SPACE)*2]) { /*
\e$BH>ByE@
\e(B */
2923 (*o_zconv)(ev[(z_prev1-SPACE)*2],ev[(z_prev1-SPACE)*2+1]);
2927 (*o_zconv)(cv[(z_prev1-SPACE)*2],cv[(z_prev1-SPACE)*2+1]);
2936 if (x0201_f && c2==X0201) {
2937 if (dv[(c1-SPACE)*2]||ev[(c1-SPACE)*2]) {
2938 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
2939 z_prev1 = c1; z_prev2 = c2;
2942 (*o_zconv)(cv[(c1-SPACE)*2],cv[(c1-SPACE)*2+1]);
2947 /* JISX0208 Alphabet */
2948 if (alpha_f && c2 == 0x23 ) {
2950 } else if (alpha_f && c2 == 0x21 ) {
2951 /* JISX0208 Kigou */
2956 } else if (alpha_f&0x4) {
2961 } else if (0x20<c1 && c1<0x7f && fv[c1-0x20]) {
2967 case '>': entity = ">"; break;
2968 case '<': entity = "<"; break;
2969 case '\"': entity = """; break;
2970 case '&': entity = "&"; break;
2973 while (*entity) (*o_zconv)(0, *entity++);
2983 #define rot13(c) ( \
2985 (c <= 'M') ? (c + 13): \
2986 (c <= 'Z') ? (c - 13): \
2988 (c <= 'm') ? (c + 13): \
2989 (c <= 'z') ? (c - 13): \
2993 #define rot47(c) ( \
2995 ( c <= 'O' ) ? (c + 47) : \
2996 ( c <= '~' ) ? (c - 47) : \
3004 if (c2==0 || c2==X0201 || c2==ISO8859_1) {
3010 (*o_rot_conv)(c2,c1);
3017 if ((hira_f & 1) && c2==0x25 && 0x20<c1 && c1<0x74) {
3019 } else if ((hira_f & 2) && c2==0x24 && 0x20<c1 && c1<0x74) {
3022 (*o_hira_conv)(c2,c1);
3027 iso2022jp_check_conv(c2,c1)
3030 static int range[RANGE_NUM_MAX][2] = {
3053 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
3057 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
3062 for (i = 0; i < RANGE_NUM_MAX; i++) {
3063 start = range[i][0];
3066 if (c >= start && c <= end) {
3071 (*o_iso2022jp_check_conv)(c2,c1);
3075 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
3077 unsigned char *mime_pattern[] = {
3078 (unsigned char *)"\075?EUC-JP?B?",
3079 (unsigned char *)"\075?SHIFT_JIS?B?",
3080 (unsigned char *)"\075?ISO-8859-1?Q?",
3081 (unsigned char *)"\075?ISO-8859-1?B?",
3082 (unsigned char *)"\075?ISO-2022-JP?B?",
3083 (unsigned char *)"\075?ISO-2022-JP?Q?",
3084 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3085 (unsigned char *)"\075?UTF-8?B?",
3087 (unsigned char *)"\075?US-ASCII?Q?",
3092 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
3093 int (*mime_priority_func[])PROTO((int c2, int c1, int c0)) = {
3094 e_iconv, s_iconv, 0, 0, 0, 0,
3095 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3101 int mime_encode[] = {
3102 JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201,
3103 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3110 int mime_encode_method[] = {
3111 'B', 'B','Q', 'B', 'B', 'Q',
3112 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3120 #define MAXRECOVER 20
3122 /* I don't trust portablity of toupper */
3123 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
3124 #define nkf_isdigit(c) ('0'<=c && c<='9')
3125 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
3130 if (i_getc!=mime_getc) {
3131 i_mgetc = i_getc; i_getc = mime_getc;
3132 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
3133 if(mime_f==STRICT_MIME) {
3134 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
3135 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
3141 unswitch_mime_getc()
3143 if(mime_f==STRICT_MIME) {
3144 i_mgetc = i_mgetc_buf;
3145 i_mungetc = i_mungetc_buf;
3148 i_ungetc = i_mungetc;
3152 mime_begin_strict(f)
3157 unsigned char *p,*q;
3158 int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
3160 mime_decode_mode = FALSE;
3161 /* =? has been checked */
3163 p = mime_pattern[j];
3166 for(i=2;p[i]>' ';i++) { /* start at =? */
3167 if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) {
3168 /* pattern fails, try next one */
3170 while ((p = mime_pattern[++j])) {
3171 for(k=2;k<i;k++) /* assume length(p) > i */
3172 if (p[k]!=q[k]) break;
3173 if (k==i && nkf_toupper(c1)==p[k]) break;
3175 if (p) continue; /* found next one, continue */
3176 /* all fails, output from recovery buffer */
3184 mime_decode_mode = p[i-2];
3186 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
3188 if (mime_decode_mode=='B') {
3189 mimebuf_f = unbuf_f;
3191 /* do MIME integrity check */
3192 return mime_integrity(f,mime_pattern[j]);
3204 /* we don't keep eof of Fifo, becase it contains ?= as
3205 a terminator. It was checked in mime_integrity. */
3206 return ((mimebuf_f)?
3207 (*i_mgetc_buf)(f):Fifo(mime_input++));
3211 mime_ungetc_buf(c,f)
3216 (*i_mungetc_buf)(c,f);
3218 Fifo(--mime_input)=c;
3229 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
3230 /* re-read and convert again from mime_buffer. */
3232 /* =? has been checked */
3234 Fifo(mime_last++)='='; Fifo(mime_last++)='?';
3235 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
3236 /* We accept any character type even if it is breaked by new lines */
3237 c1 = (*i_getc)(f); Fifo(mime_last++)= c1 ;
3238 if (c1=='\n'||c1==' '||c1=='\r'||
3239 c1=='-'||c1=='_'||is_alnum(c1) ) continue;
3241 /* Failed. But this could be another MIME preemble */
3249 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3250 if (!(++i<MAXRECOVER) || c1==EOF) break;
3251 if (c1=='b'||c1=='B') {
3252 mime_decode_mode = 'B';
3253 } else if (c1=='q'||c1=='Q') {
3254 mime_decode_mode = 'Q';
3258 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3259 if (!(++i<MAXRECOVER) || c1==EOF) break;
3261 mime_decode_mode = FALSE;
3267 if (!mime_decode_mode) {
3268 /* false MIME premble, restart from mime_buffer */
3269 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
3270 /* Since we are in MIME mode until buffer becomes empty, */
3271 /* we never go into mime_begin again for a while. */
3274 /* discard mime preemble, and goto MIME mode */
3276 /* do no MIME integrity check */
3277 return c1; /* used only for checking EOF */
3292 fprintf(stderr, "%s\n", str);
3301 if (nkf_isdigit(x)) return x - '0';
3302 return nkf_toupper(x) - 'A' + 10;
3307 #ifdef ANSI_C_PROTOTYPE
3308 int hex_getc(int ch, FILE *f, int (*g)(FILE *f), int (*u)(int c, FILE *f))
3311 hex_getc(ch, f, g, u)
3324 if (!nkf_isxdigit(c2)){
3329 if (!nkf_isxdigit(c3)){
3334 return (hex2bin(c2) << 4) | hex2bin(c3);
3341 return hex_getc(':', f, i_cgetc, i_cungetc);
3349 return (*i_cungetc)(c, f);
3356 return hex_getc('%', f, i_ugetc, i_uungetc);
3364 return (*i_uungetc)(c, f);
3368 #ifdef NUMCHAR_OPTION
3373 int (*g)() = i_ngetc;
3374 int (*u)() = i_nungetc;
3385 if (buf[i] == 'x' || buf[i] == 'X'){
3386 for (j = 0; j < 5; j++){
3388 if (!nkf_isxdigit(buf[i])){
3395 c |= hex2bin(buf[i]);
3398 for (j = 0; j < 6; j++){
3402 if (!nkf_isdigit(buf[i])){
3409 c += hex2bin(buf[i]);
3415 return CLASS_UTF16 | c;
3425 numchar_ungetc(c, f)
3429 return (*i_nungetc)(c, f);
3438 int c1, c2, c3, c4, cc;
3439 int t1, t2, t3, t4, mode, exit_mode;
3441 if (mime_top != mime_last) { /* Something is in FIFO */
3442 return Fifo(mime_top++);
3444 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
3445 mime_decode_mode=FALSE;
3446 unswitch_mime_getc();
3447 return (*i_getc)(f);
3450 if (mimebuf_f == FIXED_MIME)
3451 exit_mode = mime_decode_mode;
3454 if (mime_decode_mode == 'Q') {
3455 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3457 if (c1=='_') return ' ';
3458 if (c1!='=' && c1!='?') {
3462 mime_decode_mode = exit_mode; /* prepare for quit */
3463 if (c1<=' ') return c1;
3464 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
3465 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
3466 /* end Q encoding */
3467 input_mode = exit_mode;
3468 while((c1=(*i_getc)(f))!=EOF && c1==SPACE
3469 /* && (c1==NL||c1==TAB||c1=='\r') */ ) ;
3472 if (c1=='='&&c2<' ') { /* this is soft wrap */
3473 while((c1 = (*i_mgetc)(f)) <=' ') {
3474 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3476 mime_decode_mode = 'Q'; /* still in MIME */
3477 goto restart_mime_q;
3480 mime_decode_mode = 'Q'; /* still in MIME */
3484 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
3485 if (c2<=' ') return c2;
3486 mime_decode_mode = 'Q'; /* still in MIME */
3487 #define hex(c) (('0'<=c&&c<='9')?(c-'0'):\
3488 ('A'<=c&&c<='F')?(c-'A'+10):('a'<=c&&c<='f')?(c-'a'+10):0)
3489 return ((hex(c2)<<4) + hex(c3));
3492 if (mime_decode_mode != 'B') {
3493 mime_decode_mode = FALSE;
3494 return (*i_mgetc)(f);
3498 /* Base64 encoding */
3500 MIME allows line break in the middle of
3501 Base64, but we are very pessimistic in decoding
3502 in unbuf mode because MIME encoded code may broken by
3503 less or editor's control sequence (such as ESC-[-K in unbuffered
3504 mode. ignore incomplete MIME.
3506 mode = mime_decode_mode;
3507 mime_decode_mode = exit_mode; /* prepare for quit */
3509 while ((c1 = (*i_mgetc)(f))<=' ') {
3514 if ((c2 = (*i_mgetc)(f))<=' ') {
3517 if (mime_f != STRICT_MIME) goto mime_c2_retry;
3518 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3521 if ((c1 == '?') && (c2 == '=')) {
3523 while((c1=(*i_getc)(f))!=EOF && c1==SPACE
3524 /* && (c1==NL||c1==TAB||c1=='\r') */ ) ;
3528 if ((c3 = (*i_mgetc)(f))<=' ') {
3531 if (mime_f != STRICT_MIME) goto mime_c3_retry;
3532 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3536 if ((c4 = (*i_mgetc)(f))<=' ') {
3539 if (mime_f != STRICT_MIME) goto mime_c4_retry;
3540 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3544 mime_decode_mode = mode; /* still in MIME sigh... */
3546 /* BASE 64 decoding */
3548 t1 = 0x3f & base64decode(c1);
3549 t2 = 0x3f & base64decode(c2);
3550 t3 = 0x3f & base64decode(c3);
3551 t4 = 0x3f & base64decode(c4);
3552 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
3554 Fifo(mime_last++) = cc;
3555 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
3557 Fifo(mime_last++) = cc;
3558 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
3560 Fifo(mime_last++) = cc;
3565 return Fifo(mime_top++);
3573 Fifo(--mime_top) = c;
3584 /* In buffered mode, read until =? or NL or buffer full
3586 mime_input = mime_top;
3587 mime_last = mime_top;
3588 while(*p) Fifo(mime_input++) = *p++;
3591 while((c=(*i_getc)(f))!=EOF) {
3592 if (((mime_input-mime_top)&MIME_BUF_MASK)==0) {
3593 break; /* buffer full */
3595 if (c=='=' && d=='?') {
3596 /* checked. skip header, start decode */
3597 Fifo(mime_input++) = c;
3598 /* mime_last_input = mime_input; */
3603 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
3605 /* Should we check length mod 4? */
3606 Fifo(mime_input++) = c;
3609 /* In case of Incomplete MIME, no MIME decode */
3610 Fifo(mime_input++) = c;
3611 mime_last = mime_input; /* point undecoded buffer */
3612 mime_decode_mode = 1; /* no decode on Fifo last in mime_getc */
3613 switch_mime_getc(); /* anyway we need buffered getc */
3624 i = c - 'A'; /* A..Z 0-25 */
3626 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
3628 } else if (c > '/') {
3629 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
3630 } else if (c == '+') {
3631 i = '>' /* 62 */ ; /* + 62 */
3633 i = '?' /* 63 */ ; /* / 63 */
3638 static char basis_64[] =
3639 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
3649 p = mime_pattern[0];
3650 for(i=0;mime_encode[i];i++) {
3651 if (mode == mime_encode[i]) {
3652 p = mime_pattern[i];
3656 mimeout_mode = mime_encode_method[i];
3658 /* (*o_mputc)(' '); */
3675 #define itoh4(c) (c>=10?c+'A'-10:c+'0')
3681 if (mimeout_f==FIXED_MIME) {
3682 if (base64_count>71) {
3690 if ( c<=DEL &&(output_mode==ASCII ||output_mode == ISO8859_1 )
3691 && mimeout_f!=FIXED_MIME) {
3692 if (mimeout_mode=='Q') {
3699 if (mimeout_mode!='B' || c!=SPACE) {
3708 } else if (!mimeout_mode && mimeout_f!=FIXED_MIME) {
3709 open_mime(output_mode);
3711 } else { /* c==EOF */
3712 switch(mimeout_mode) {
3717 (*o_mputc)(basis_64[((b64c & 0x3)<< 4)]);
3723 (*o_mputc)(basis_64[((b64c & 0xF) << 2)]);
3729 if (mimeout_f!=FIXED_MIME) {
3731 } else if (mimeout_mode != 'Q')
3736 switch(mimeout_mode) {
3740 (*o_mputc)(itoh4(((c>>4)&0xf)));
3741 (*o_mputc)(itoh4((c&0xf)));
3748 (*o_mputc)(basis_64[c>>2]);
3753 (*o_mputc)(basis_64[((b64c & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
3759 (*o_mputc)(basis_64[((b64c & 0xF) << 2) | ((c & 0xC0) >>6)]);
3760 (*o_mputc)(basis_64[c & 0x3F]);
3780 mime_f = STRICT_MIME;
3784 #if defined(MSDOS) || defined(__OS2__)
3789 iso2022jp_f = FALSE;
3791 kanji_intro = DEFAULT_J;
3792 ascii_intro = DEFAULT_R;
3794 output_conv = DEFAULT_CONV;
3795 oconv = DEFAULT_CONV;
3798 i_mungetc = std_ungetc;
3799 i_mgetc_buf = std_getc;
3800 i_mungetc_buf = std_ungetc;
3803 i_ungetc=std_ungetc;
3806 i_bungetc= std_ungetc;
3810 o_crconv = no_connection;
3811 o_rot_conv = no_connection;
3812 o_iso2022jp_check_conv = no_connection;
3813 o_hira_conv = no_connection;
3814 o_fconv = no_connection;
3815 o_zconv = no_connection;
3818 i_ungetc = std_ungetc;
3820 i_mungetc = std_ungetc;
3822 output_mode = ASCII;
3825 mime_decode_mode = FALSE;
3834 struct input_code *p = input_code_list;
3839 #ifdef UTF8_OUTPUT_ENABLE
3840 if (w_oconv16_begin_f) {
3841 w_oconv16_begin_f = 2;
3846 fold_preserve_f = FALSE;
3849 fold_margin = FOLD_MARGIN;
3852 z_prev2=0,z_prev1=0;
3854 input_codename = "";
3859 no_connection(c2,c1)
3862 no_connection2(c2,c1,0);
3866 no_connection2(c2,c1,c0)
3869 fprintf(stderr,"nkf internal module connection failure.\n");
3877 fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
3878 fprintf(stderr,"Flags:\n");
3879 fprintf(stderr,"b,u Output is bufferred (DEFAULT),Output is unbufferred\n");
3880 #ifdef DEFAULT_CODE_SJIS
3881 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC), UTF-8\n");
3883 #ifdef DEFAULT_CODE_JIS
3884 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC), UTF-8\n");
3886 #ifdef DEFAULT_CODE_EUC
3887 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT), UTF-8\n");
3889 #ifdef DEFAULT_CODE_UTF8
3890 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC), UTF-8 (DEFAULT)\n");
3892 fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC), UTF-8\n");
3893 fprintf(stderr,"t no conversion\n");
3894 fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n");
3895 fprintf(stderr,"r {de/en}crypt ROT13/47\n");
3896 fprintf(stderr,"h 1 hirakana->katakana, 2 katakana->hirakana,3 both\n");
3897 fprintf(stderr,"v Show this usage. V: show version\n");
3898 fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
3899 fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
3900 fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
3901 fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
3902 fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces,\n");
3903 fprintf(stderr," 3: Convert HTML Entity\n");
3904 fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
3905 fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
3907 fprintf(stderr,"T Text mode output\n");
3909 fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
3910 fprintf(stderr,"d,c Delete \\r in line feed and \\032, Add \\r in line feed\n");
3911 fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n");
3912 fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
3913 fprintf(stderr,"long name options\n");
3914 fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n");
3915 fprintf(stderr," --jis,--euc,--sjis,--utf8,--utf16,--mime,--base64 convert for the code\n");
3917 fprintf(stderr," --overwrite Overwrite original listed files by filtered result\n");
3919 fprintf(stderr," --help,--version\n");
3926 fprintf(stderr,"Network Kanji Filter Version %s (%s) "
3927 #if defined(MSDOS) && !defined(__WIN32__) && !defined(__WIN16__)
3930 #if defined(MSDOS) && defined(__WIN16__)
3933 #if defined(MSDOS) && defined(__WIN32__)
3939 ,Version,Patchlevel);
3940 fprintf(stderr,"\n%s\n",CopyRight);
3945 **
\e$B%Q%C%A@):n<T
\e(B
3946 ** void@merope.pleiades.or.jp (Kusakabe Youichi)
3947 ** NIDE Naoyuki <nide@ics.nara-wu.ac.jp>
3948 ** ohta@src.ricoh.co.jp (Junn Ohta)
3949 ** inouet@strl.nhk.or.jp (Tomoyuki Inoue)
3950 ** kiri@pulser.win.or.jp (Tetsuaki Kiriyama)
3951 ** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp>
3952 ** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe)
3953 ** kono@ie.u-ryukyu.ac.jp (Shinji Kono)
3954 ** GHG00637@nifty-serve.or.jp (COW)