1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
5 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
6 ** Copyright (C) 1996,1998
8 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
9 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
10 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
11 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
13 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
14 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
15 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
16 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
17 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
18 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
20 ** Everyone is permitted to do anything on this program
21 ** including copying, modifying, improving,
22 ** as long as you don't try to pretend that you wrote it.
23 ** i.e., the above copyright notice has to appear in all copies.
24 ** Binary distribution requires original version messages.
25 ** You don't have to ask before copying, redistribution or publishing.
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
29 /***********************************************************************
30 ** UTF-8
\e$B%5%]!<%H$K$D$$$F
\e(B
31 **
\e$B=>Mh$N
\e(B nkf
\e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9
\e(B
32 ** nkf -e
\e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G
\e(B UTF-8
\e$B$HH=Dj$5$l$l$P!"
\e(B
33 **
\e$B$=$N$^$^
\e(B euc-jp
\e$B$KJQ49$5$l$^$9
\e(B
35 **
\e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#
\e(B
36 ** (
\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O
\e(B)
38 **
\e$B2?$+LdBj$r8+$D$1$?$i!"
\e(B
39 ** E-Mail: furukawa@tcp-ip.or.jp
40 **
\e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#
\e(B
41 ***********************************************************************/
44 static char *CopyRight =
45 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2003 Kono, Furukawa";
46 static char *Version =
48 static char *Patchlevel =
55 ** USAGE: nkf [flags] [file]
58 ** b Output is bufferred (DEFAULT)
59 ** u Output is unbufferred
63 ** j Outout code is JIS 7 bit (DEFAULT SELECT)
64 ** s Output code is MS Kanji (DEFAULT SELECT)
65 ** e Output code is AT&T JIS (DEFAULT SELECT)
66 ** w Output code is AT&T JIS (DEFAULT SELECT)
67 ** l Output code is JIS 7bit and ISO8859-1 Latin-1
69 ** m MIME conversion for ISO-2022-JP
70 ** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
71 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
72 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
73 ** M MIME output conversion
75 ** r {de/en}crypt ROT13/47
79 ** T Text mode output (for MS-DOS)
81 ** x Do not convert X0201 kana into X0208
82 ** Z Convert X0208 alphabet to ASCII
87 ** B try to fix broken JIS, missing Escape
88 ** B[1-9] broken level
90 ** O Output to 'nkf.out' file or last file name
91 ** d Delete \r in line feed
92 ** c Add \r in line feed
93 ** -- other long option
94 ** -- ignore following option (don't use with -O )
98 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__)) && !defined(MSDOS)
100 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
116 #if defined(MSDOS) || defined(__OS2__)
123 #define setbinmode(fp) fsetbin(fp)
124 #else /* Microsoft C, Turbo C */
125 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
127 #else /* UNIX,OS/2 */
128 #define setbinmode(fp)
131 #ifdef _IOFBF /* SysV and MSDOS, Windows */
132 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
134 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
137 /*Borland C++ 4.5 EasyWin*/
138 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
147 /* added by satoru@isoternet.org */
148 #include <sys/stat.h>
149 #ifndef MSDOS /* UNIX, OS/2 */
153 #if defined(_MSC_VER) || defined(__MINGW32__) /* VC++, MinGW */
154 #include <sys/utime.h>
155 #elif defined(__TURBOC__) /* BCC */
157 #elif defined(LSI_C) /* LSI C */
169 /* state of output_mode and input_mode
186 /* Input Assumption */
190 #define LATIN1_INPUT 6
192 #define STRICT_MIME 8
197 #define JAPANESE_EUC 10
201 #define UTF8_INPUT 13
202 #define UTF16_INPUT 14
203 #define UTF16BE_INPUT 15
221 #define is_alnum(c) \
222 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
224 #define HOLD_SIZE 1024
225 #define IOBUF_SIZE 16384
227 #define DEFAULT_J 'B'
228 #define DEFAULT_R 'B'
230 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
231 #define SJ6394 0x0161 /* 63 - 94 ku offset */
233 #define RANGE_NUM_MAX 18
238 #if defined( UTF8_OUTPUT_ENABLE ) || defined( UTF8_INPUT_ENABLE )
239 #define sizeof_euc_utf8 94
240 #define sizeof_euc_to_utf8_1byte 94
241 #define sizeof_euc_to_utf8_2bytes 94
242 #define sizeof_utf8_to_euc_C2 64
243 #define sizeof_utf8_to_euc_E5B8 64
244 #define sizeof_utf8_to_euc_2bytes 112
245 #define sizeof_utf8_to_euc_3bytes 112
248 /* MIME preprocessor */
251 #ifdef EASYWIN /*Easy Win */
252 extern POINT _BufferSize;
255 /* function prototype */
257 #ifdef ANSI_C_PROTOTYPE
259 #define STATIC static
271 void (*status_func)PROTO((struct input_code *, int));
272 int (*iconv_func)PROTO((int c2, int c1, int c0));
276 STATIC char *input_codename = "";
278 STATIC int noconvert PROTO((FILE *f));
279 STATIC int kanji_convert PROTO((FILE *f));
280 STATIC int h_conv PROTO((FILE *f,int c2,int c1));
281 STATIC int push_hold_buf PROTO((int c2));
282 STATIC void set_iconv PROTO((int f, int (*iconv_func)()));
283 STATIC int s_iconv PROTO((int c2,int c1,int c0));
284 STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
285 STATIC int e_iconv PROTO((int c2,int c1,int c0));
286 #ifdef UTF8_INPUT_ENABLE
287 STATIC int w2e_conv PROTO((int c2,int c1,int c0,int *p2,int *p1));
288 STATIC int w_iconv PROTO((int c2,int c1,int c0));
289 STATIC int w_iconv16 PROTO((int c2,int c1,int c0));
290 STATIC int w_iconv_common PROTO((int c1,int c0,unsigned short **pp,int psize,int *p2,int *p1));
291 STATIC int ww16_conv PROTO((int c2, int c1, int c0));
293 #ifdef UTF8_OUTPUT_ENABLE
294 STATIC int e2w_conv PROTO((int c2,int c1));
295 STATIC void w_oconv PROTO((int c2,int c1));
296 STATIC void w_oconv16 PROTO((int c2,int c1));
298 STATIC void e_oconv PROTO((int c2,int c1));
299 STATIC void e2s_conv PROTO((int c2, int c1, int *p2, int *p1));
300 STATIC void s_oconv PROTO((int c2,int c1));
301 STATIC void j_oconv PROTO((int c2,int c1));
302 STATIC void fold_conv PROTO((int c2,int c1));
303 STATIC void cr_conv PROTO((int c2,int c1));
304 STATIC void z_conv PROTO((int c2,int c1));
305 STATIC void rot_conv PROTO((int c2,int c1));
306 STATIC void hira_conv PROTO((int c2,int c1));
307 STATIC void base64_conv PROTO((int c2,int c1));
308 STATIC void iso2022jp_check_conv PROTO((int c2,int c1));
309 STATIC void no_connection PROTO((int c2,int c1));
310 STATIC int no_connection2 PROTO((int c2,int c1,int c0));
312 STATIC void code_score PROTO((struct input_code *ptr));
313 STATIC void code_status PROTO((int c));
315 STATIC void std_putc PROTO((int c));
316 STATIC int std_getc PROTO((FILE *f));
317 STATIC int std_ungetc PROTO((int c,FILE *f));
319 STATIC int broken_getc PROTO((FILE *f));
320 STATIC int broken_ungetc PROTO((int c,FILE *f));
322 STATIC int mime_begin PROTO((FILE *f));
323 STATIC int mime_getc PROTO((FILE *f));
324 STATIC int mime_ungetc PROTO((int c,FILE *f));
326 STATIC int mime_begin_strict PROTO((FILE *f));
327 STATIC int mime_getc_buf PROTO((FILE *f));
328 STATIC int mime_ungetc_buf PROTO((int c,FILE *f));
329 STATIC int mime_integrity PROTO((FILE *f,unsigned char *p));
331 STATIC int base64decode PROTO((int c));
332 STATIC void mime_putc PROTO((int c));
333 STATIC void open_mime PROTO((int c));
334 STATIC void close_mime PROTO(());
335 STATIC void usage PROTO(());
336 STATIC void version PROTO(());
337 STATIC void options PROTO((unsigned char *c));
339 STATIC void reinit PROTO(());
344 static unsigned char stdibuf[IOBUF_SIZE];
345 static unsigned char stdobuf[IOBUF_SIZE];
346 static unsigned char hold_buf[HOLD_SIZE*2];
347 static int hold_count;
349 /* MIME preprocessor fifo */
351 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
352 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
353 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
354 static unsigned char mime_buf[MIME_BUF_SIZE];
355 static unsigned int mime_top = 0;
356 static unsigned int mime_last = 0; /* decoded */
357 static unsigned int mime_input = 0; /* undecoded */
360 static int unbuf_f = FALSE;
361 static int estab_f = FALSE;
362 static int nop_f = FALSE;
363 static int binmode_f = TRUE; /* binary mode */
364 static int rot_f = FALSE; /* rot14/43 mode */
365 static int hira_f = FALSE; /* hira/kata henkan */
366 static int input_f = FALSE; /* non fixed input code */
367 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
368 static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
369 static int mimebuf_f = FALSE; /* MIME buffered input */
370 static int broken_f = FALSE; /* convert ESC-less broken JIS */
371 static int iso8859_f = FALSE; /* ISO8859 through */
372 static int mimeout_f = FALSE; /* base64 mode */
373 #if defined(MSDOS) || defined(__OS2__)
374 static int x0201_f = TRUE; /* Assume JISX0201 kana */
376 static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
378 static int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
379 #ifdef UTF8_OUTPUT_ENABLE
380 static int w_oconv16_begin_f= 0; /* utf-16 header */
381 static int w_oconv16_LE = 0; /* utf-16 little endian */
385 #ifdef NUMCHAR_OPTION
387 #define CLASS_MASK 0x0f000000
388 #define CLASS_UTF16 0x01000000
392 static int cap_f = FALSE;
393 static int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */
394 static int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc;
395 STATIC int cap_getc PROTO((FILE *f));
396 STATIC int cap_ungetc PROTO((int c,FILE *f));
398 static int url_f = FALSE;
399 static int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
400 static int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc;
401 STATIC int url_getc PROTO((FILE *f));
402 STATIC int url_ungetc PROTO((int c,FILE *f));
404 static int numchar_f = FALSE;
405 static int (*i_ngetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
406 static int (*i_nungetc)PROTO((int c ,FILE *f)) = std_ungetc;
407 STATIC int numchar_getc PROTO((FILE *f));
408 STATIC int numchar_ungetc PROTO((int c,FILE *f));
412 static int noout_f = FALSE;
413 STATIC void no_putc PROTO((int c));
414 static int debug_f = FALSE;
415 STATIC void debug PROTO((char *str));
419 static int exec_f = 0;
422 #ifdef SHIFTJIS_CP932
423 STATIC int cp932_f = TRUE;
424 #define CP932_TABLE_BEGIN (0xfa)
425 #define CP932_TABLE_END (0xfc)
427 #endif /* SHIFTJIS_CP932 */
429 STATIC void e_status PROTO((struct input_code *, int));
430 STATIC void s_status PROTO((struct input_code *, int));
432 #ifdef UTF8_INPUT_ENABLE
433 STATIC void w_status PROTO((struct input_code *, int));
434 STATIC void w16_status PROTO((struct input_code *, int));
435 static int utf16_mode = UTF16_INPUT;
438 struct input_code input_code_list[] = {
439 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
440 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
441 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
442 {"UTF-16", 0, 0, 0, {0, 0, 0}, w16_status, w_iconv16, 0},
446 static int mimeout_mode = 0;
447 static int base64_count = 0;
449 /* X0208 -> ASCII converter */
452 static int f_line = 0; /* chars in line */
453 static int f_prev = 0;
454 static int fold_preserve_f = FALSE; /* preserve new lines */
455 static int fold_f = FALSE;
456 static int fold_len = 0;
459 static unsigned char kanji_intro = DEFAULT_J,
460 ascii_intro = DEFAULT_R;
464 #define FOLD_MARGIN 10
465 #define DEFAULT_FOLD 60
467 static int fold_margin = FOLD_MARGIN;
471 #ifdef DEFAULT_CODE_JIS
472 # define DEFAULT_CONV j_oconv
474 #ifdef DEFAULT_CODE_SJIS
475 # define DEFAULT_CONV s_oconv
477 #ifdef DEFAULT_CODE_EUC
478 # define DEFAULT_CONV e_oconv
480 #ifdef DEFAULT_CODE_UTF8
481 # define DEFAULT_CONV w_oconv
484 /* process default */
485 static void (*output_conv)PROTO((int c2,int c1)) = DEFAULT_CONV;
487 static void (*oconv)PROTO((int c2,int c1)) = no_connection;
488 /* s_iconv or oconv */
489 static int (*iconv)PROTO((int c2,int c1,int c0)) = no_connection2;
491 static void (*o_zconv)PROTO((int c2,int c1)) = no_connection;
492 static void (*o_fconv)PROTO((int c2,int c1)) = no_connection;
493 static void (*o_crconv)PROTO((int c2,int c1)) = no_connection;
494 static void (*o_rot_conv)PROTO((int c2,int c1)) = no_connection;
495 static void (*o_hira_conv)PROTO((int c2,int c1)) = no_connection;
496 static void (*o_base64conv)PROTO((int c2,int c1)) = no_connection;
497 static void (*o_iso2022jp_check_conv)PROTO((int c2,int c1)) = no_connection;
499 /* static redirections */
501 static void (*o_putc)PROTO((int c)) = std_putc;
503 static int (*i_getc)PROTO((FILE *f)) = std_getc; /* general input */
504 static int (*i_ungetc)PROTO((int c,FILE *f)) =std_ungetc;
506 static int (*i_bgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
507 static int (*i_bungetc)PROTO((int c ,FILE *f)) = std_ungetc;
509 static void (*o_mputc)PROTO((int c)) = std_putc ; /* output of mputc */
511 static int (*i_mgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
512 static int (*i_mungetc)PROTO((int c ,FILE *f)) = std_ungetc;
514 /* for strict mime */
515 static int (*i_mgetc_buf)PROTO((FILE *)) = std_getc; /* input of mgetc_buf */
516 static int (*i_mungetc_buf)PROTO((int c,FILE *f)) = std_ungetc;
519 static int output_mode = ASCII, /* output kanji mode */
520 input_mode = ASCII, /* input kanji mode */
521 shift_mode = FALSE; /* TRUE shift out, or X0201 */
522 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
524 /* X0201 / X0208 conversion tables */
526 /* X0201 kana conversion table */
529 unsigned char cv[]= {
530 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
531 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
532 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
533 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
534 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
535 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
536 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
537 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
538 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
539 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
540 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
541 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
542 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
543 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
544 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
545 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
549 /* X0201 kana conversion table for daguten */
552 unsigned char dv[]= {
553 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
554 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
555 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
556 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
557 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
558 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
559 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
560 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
561 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
562 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
563 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
564 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
565 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
566 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
567 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
568 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
571 /* X0201 kana conversion table for han-daguten */
574 unsigned char ev[]= {
575 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
576 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
577 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
578 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
579 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
580 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
581 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
582 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
583 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
584 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
585 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
586 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
587 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
588 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
589 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
590 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
594 /* X0208 kigou conversion table */
595 /* 0x8140 - 0x819e */
597 unsigned char fv[] = {
599 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
600 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
601 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
602 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
603 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
604 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
605 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
606 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
607 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
608 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
610 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
616 static int file_out = FALSE;
618 static int overwrite = FALSE;
621 static int crmode_f = 0; /* CR, NL, CRLF */
622 #ifdef EASYWIN /*Easy Win */
623 static int end_check;
635 #ifdef EASYWIN /*Easy Win */
636 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
639 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
640 cp = (unsigned char *)*argv;
645 if (pipe(fds) < 0 || (pid = fork()) < 0){
656 execvp(argv[1], &argv[1]);
670 if(x0201_f == WISH_TRUE)
671 x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
673 if (binmode_f == TRUE)
675 if (freopen("","wb",stdout) == NULL)
682 setbuf(stdout, (char *) NULL);
684 setvbuffer(stdout, stdobuf, IOBUF_SIZE);
687 if (binmode_f == TRUE)
689 if (freopen("","rb",stdin) == NULL) return (-1);
693 setvbuffer(stdin, stdibuf, IOBUF_SIZE);
697 kanji_convert(stdin);
703 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
712 /* reopen file for stdout */
713 if (file_out == TRUE) {
716 outfname = malloc(strlen(origfname)
717 + strlen(".nkftmpXXXXXX")
723 strcpy(outfname, origfname);
727 for (i = strlen(outfname); i; --i){
728 if (outfname[i - 1] == '/'
729 || outfname[i - 1] == '\\'){
735 strcat(outfname, "ntXXXXXX");
737 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC,
740 strcat(outfname, ".nkftmpXXXXXX");
741 fd = mkstemp(outfname);
744 || (fd_backup = dup(fileno(stdout))) < 0
745 || dup2(fd, fileno(stdout)) < 0
756 outfname = "nkf.out";
759 if(freopen(outfname, "w", stdout) == NULL) {
763 if (binmode_f == TRUE) {
765 if (freopen("","wb",stdout) == NULL)
772 if (binmode_f == TRUE)
774 if (freopen("","rb",fin) == NULL)
779 setvbuffer(fin, stdibuf, IOBUF_SIZE);
788 #if defined(MSDOS) && !defined(__MINGW32__)
796 if (dup2(fd_backup, fileno(stdout)) < 0){
799 if (stat(origfname, &sb)) {
800 fprintf(stderr, "Can't stat %s\n", origfname);
802 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
803 if (chmod(outfname, sb.st_mode)) {
804 fprintf(stderr, "Can't set permission %s\n", outfname);
807 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
808 #if defined(MSDOS) && !defined(__MINGW32__)
809 tb[0] = tb[1] = sb.st_mtime;
810 if (utime(outfname, tb)) {
811 fprintf(stderr, "Can't set timestamp %s\n", outfname);
814 tb.actime = sb.st_atime;
815 tb.modtime = sb.st_mtime;
816 if (utime(outfname, &tb)) {
817 fprintf(stderr, "Can't set timestamp %s\n", outfname);
821 if (unlink(origfname)){
825 if (rename(outfname, origfname)) {
827 fprintf(stderr, "Can't rename %s to %s\n",
828 outfname, origfname);
836 #ifdef EASYWIN /*Easy Win */
837 if (file_out == FALSE)
838 scanf("%d",&end_check);
841 #else /* for Other OS */
842 if (file_out == TRUE)
872 {"katakana-hiragana","h3"},
873 #ifdef UTF8_OUTPUT_ENABLE
877 #ifdef UTF8_INPUT_ENABLE
879 {"utf16-input", "W16"},
888 #ifdef NUMCHAR_OPTION
889 {"numchar-input", ""},
895 #ifdef SHIFTJIS_CP932
904 static int option_mode;
919 case '-': /* literal options */
920 if (!*cp) { /* ignore the rest of arguments */
924 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
926 p = (unsigned char *)long_option[i].name;
927 for (j=0;*p && *p++ == cp[j];j++);
928 if (! *p && !cp[j]) break;
931 cp = (unsigned char *)long_option[i].alias;
934 if (strcmp(long_option[i].name, "overwrite") == 0){
941 if (strcmp(long_option[i].name, "cap-input") == 0){
945 if (strcmp(long_option[i].name, "url-input") == 0){
950 #ifdef NUMCHAR_OPTION
951 if (strcmp(long_option[i].name, "numchar-input") == 0){
957 if (strcmp(long_option[i].name, "no-output") == 0){
961 if (strcmp(long_option[i].name, "debug") == 0){
966 #ifdef SHIFTJIS_CP932
967 if (strcmp(long_option[i].name, "no-cp932") == 0){
973 if (strcmp(long_option[i].name, "exec-in") == 0){
977 if (strcmp(long_option[i].name, "exec-out") == 0){
984 case 'b': /* buffered mode */
987 case 'u': /* non bufferd mode */
990 case 't': /* transparent mode */
993 case 'j': /* JIS output */
995 output_conv = j_oconv;
997 case 'e': /* AT&T EUC output */
998 output_conv = e_oconv;
1000 case 's': /* SJIS output */
1001 output_conv = s_oconv;
1003 case 'l': /* ISO8859 Latin-1 support, no conversion */
1004 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
1005 input_f = LATIN1_INPUT;
1007 case 'i': /* Kanji IN ESC-$-@/B */
1008 if (*cp=='@'||*cp=='B')
1009 kanji_intro = *cp++;
1011 case 'o': /* ASCII IN ESC-(-J/B */
1012 if (*cp=='J'||*cp=='B'||*cp=='H')
1013 ascii_intro = *cp++;
1020 if ('9'>= *cp && *cp>='0')
1021 hira_f |= (*cp++ -'0');
1028 #if defined(MSDOS) || defined(__OS2__)
1043 #ifdef UTF8_OUTPUT_ENABLE
1044 case 'w': /* UTF-8 output */
1045 if ('1'== cp[0] && '6'==cp[1]) {
1046 output_conv = w_oconv16; cp+=2;
1048 w_oconv16_begin_f=2; cp++;
1051 w_oconv16_begin_f=1; cp++;
1053 } else if (cp[0] == 'B') {
1054 w_oconv16_begin_f=2; cp++;
1056 w_oconv16_begin_f=1; cp++;
1060 output_conv = w_oconv;
1063 #ifdef UTF8_INPUT_ENABLE
1064 case 'W': /* UTF-8 input */
1065 if ('1'== cp[0] && '6'==cp[1]) {
1066 input_f = UTF16_INPUT;
1068 input_f = UTF8_INPUT;
1071 /* Input code assumption */
1072 case 'J': /* JIS input */
1073 case 'E': /* AT&T EUC input */
1074 input_f = JIS_INPUT;
1076 case 'S': /* MS Kanji input */
1077 input_f = SJIS_INPUT;
1078 if (x0201_f==NO_X0201) x0201_f=TRUE;
1080 case 'Z': /* Convert X0208 alphabet to asii */
1081 /* bit:0 Convert X0208
1082 bit:1 Convert Kankaku to one space
1083 bit:2 Convert Kankaku to two spaces
1084 bit:3 Convert HTML Entity
1086 if ('9'>= *cp && *cp>='0')
1087 alpha_f |= 1<<(*cp++ -'0');
1091 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
1092 x0201_f = FALSE; /* No X0201->X0208 conversion */
1094 ESC-(-I in JIS, EUC, MS Kanji
1095 SI/SO in JIS, EUC, MS Kanji
1096 SSO in EUC, JIS, not in MS Kanji
1097 MS Kanji (0xa0-0xdf)
1099 ESC-(-I in JIS (0x20-0x5f)
1100 SSO in EUC (0xa0-0xdf)
1101 0xa0-0xd in MS Kanji (0xa0-0xdf)
1104 case 'X': /* Assume X0201 kana */
1105 /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1108 case 'F': /* prserve new lines */
1109 fold_preserve_f = TRUE;
1110 case 'f': /* folding -f60 or -f */
1113 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1115 fold_len += *cp++ - '0';
1117 if (!(0<fold_len && fold_len<BUFSIZ))
1118 fold_len = DEFAULT_FOLD;
1122 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1124 fold_margin += *cp++ - '0';
1128 case 'm': /* MIME support */
1129 if (*cp=='B'||*cp=='Q') {
1130 mime_decode_mode = *cp++;
1131 mimebuf_f = FIXED_MIME;
1132 } else if (*cp=='N') {
1133 mime_f = TRUE; cp++;
1134 } else if (*cp=='S') {
1135 mime_f = STRICT_MIME; cp++;
1136 } else if (*cp=='0') {
1137 mime_f = FALSE; cp++;
1140 case 'M': /* MIME output */
1143 mimeout_f = FIXED_MIME; cp++;
1144 } else if (*cp=='Q') {
1146 mimeout_f = FIXED_MIME; cp++;
1151 case 'B': /* Broken JIS support */
1153 bit:1 allow any x on ESC-(-x or ESC-$-x
1154 bit:2 reset to ascii on NL
1156 if ('9'>= *cp && *cp>='0')
1157 broken_f |= 1<<(*cp++ -'0');
1162 case 'O':/* for Output file */
1166 case 'c':/* add cr code */
1169 case 'd':/* delete cr code */
1172 case 'I': /* ISO-2022-JP output */
1175 case 'L': /* line mode */
1176 if (*cp=='u') { /* unix */
1177 crmode_f = NL; cp++;
1178 } else if (*cp=='m') { /* mac */
1179 crmode_f = CR; cp++;
1180 } else if (*cp=='w') { /* windows */
1181 crmode_f = CRLF; cp++;
1182 } else if (*cp=='0') { /* no conversion */
1187 /* module muliple options in a string are allowed for Perl moudle */
1188 while(*cp && *cp!='-') cp++;
1192 /* bogus option but ignored */
1198 #ifdef ANSI_C_PROTOTYPE
1199 struct input_code * find_inputcode_byfunc(int (*iconv_func)(int c2,int c1,int c0))
1201 struct input_code * find_inputcode_byfunc(iconv_func)
1202 int (*iconv_func)();
1206 struct input_code *p = input_code_list;
1208 if (iconv_func == p->iconv_func){
1217 #ifdef ANSI_C_PROTOTYPE
1218 void set_iconv(int f, int (*iconv_func)(int c2,int c1,int c0))
1220 void set_iconv(f, iconv_func)
1222 int (*iconv_func)();
1226 static int (*iconv_for_check)() = 0;
1228 #ifdef INPUT_CODE_FIX
1236 #ifdef INPUT_CODE_FIX
1237 && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1243 if (estab_f && iconv_for_check != iconv){
1244 struct input_code *p = find_inputcode_byfunc(iconv);
1246 debug(input_codename = p->name);
1248 iconv_for_check = iconv;
1253 #define SCORE_L2 (1) /*
\e$BBh
\e(B2
\e$B?e=`4A;z
\e(B */
1254 #define SCORE_KANA (SCORE_L2 << 1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
1255 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
1256 #ifdef SHIFTJIS_CP932
1257 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932
\e$B$K$h$kFI$_49$(
\e(B */
1258 #define SCORE_NO_EXIST (SCORE_CP932 << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1260 #define SCORE_NO_EXIST (SCORE_DEPEND << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1262 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME
\e$B$K$h$k;XDj
\e(B */
1263 #define SCORE_ERROR (SCORE_iMIME << 1) /*
\e$B%(%i!<
\e(B */
1265 #define SCORE_INIT (SCORE_iMIME)
1267 int score_table_A0[] = {
1270 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1271 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1274 int score_table_F0[] = {
1275 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1276 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1277 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1278 SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1281 void set_code_score(ptr, score)
1282 struct input_code *ptr;
1286 ptr->score |= score;
1290 void clr_code_score(ptr, score)
1291 struct input_code *ptr;
1295 ptr->score &= ~score;
1299 void code_score(ptr)
1300 struct input_code *ptr;
1302 int c2 = ptr->buf[0];
1303 int c1 = ptr->buf[1];
1305 set_code_score(ptr, SCORE_ERROR);
1306 }else if (c2 == SSO){
1307 set_code_score(ptr, SCORE_KANA);
1308 #ifdef UTF8_OUTPUT_ENABLE
1309 }else if (!e2w_conv(c2, c1)){
1310 set_code_score(ptr, SCORE_NO_EXIST);
1312 }else if ((c2 & 0x70) == 0x20){
1313 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1314 }else if ((c2 & 0x70) == 0x70){
1315 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1316 }else if ((c2 & 0x70) >= 0x50){
1317 set_code_score(ptr, SCORE_L2);
1321 void status_disable(ptr)
1322 struct input_code *ptr;
1327 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1330 void status_push_ch(ptr, c)
1331 struct input_code *ptr;
1334 ptr->buf[ptr->index++] = c;
1337 void status_clear(ptr)
1338 struct input_code *ptr;
1344 void status_reset(ptr)
1345 struct input_code *ptr;
1348 ptr->score = SCORE_INIT;
1351 void status_reinit(ptr)
1352 struct input_code *ptr;
1355 ptr->_file_stat = 0;
1358 void status_check(ptr, c)
1359 struct input_code *ptr;
1362 if (c <= DEL && estab_f){
1367 void s_status(ptr, c)
1368 struct input_code *ptr;
1373 status_check(ptr, c);
1378 #ifdef NUMCHAR_OPTION
1379 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1382 }else if (0xa1 <= c && c <= 0xdf){
1383 status_push_ch(ptr, SSO);
1384 status_push_ch(ptr, c);
1387 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
1389 status_push_ch(ptr, c);
1390 #ifdef SHIFTJIS_CP932
1392 && CP932_TABLE_BEGIN <= c && c <= CP932_TABLE_END){
1394 status_push_ch(ptr, c);
1395 #endif /* SHIFTJIS_CP932 */
1397 status_disable(ptr);
1401 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1402 status_push_ch(ptr, c);
1403 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
1407 status_disable(ptr);
1410 #ifdef SHIFTJIS_CP932
1412 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1413 status_push_ch(ptr, c);
1414 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
1415 set_code_score(ptr, SCORE_CP932);
1420 status_disable(ptr);
1422 #endif /* SHIFTJIS_CP932 */
1426 void e_status(ptr, c)
1427 struct input_code *ptr;
1432 status_check(ptr, c);
1437 #ifdef NUMCHAR_OPTION
1438 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1441 }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
1443 status_push_ch(ptr, c);
1445 status_disable(ptr);
1449 if (0xa1 <= c && c <= 0xfe){
1450 status_push_ch(ptr, c);
1454 status_disable(ptr);
1460 #ifdef UTF8_INPUT_ENABLE
1461 void w16_status(ptr, c)
1462 struct input_code *ptr;
1469 if (ptr->_file_stat == 0){
1470 if (c == 0xfe || c == 0xff){
1472 status_push_ch(ptr, c);
1473 ptr->_file_stat = 1;
1475 status_disable(ptr);
1476 ptr->_file_stat = -1;
1478 }else if (ptr->_file_stat > 0){
1480 status_push_ch(ptr, c);
1481 }else if (ptr->_file_stat < 0){
1482 status_disable(ptr);
1488 status_disable(ptr);
1489 ptr->_file_stat = -1;
1491 status_push_ch(ptr, c);
1498 if (ptr->stat != c && (c == 0xfe || c == 0xff)){
1499 status_push_ch(ptr, c);
1502 status_disable(ptr);
1503 ptr->_file_stat = -1;
1509 void w_status(ptr, c)
1510 struct input_code *ptr;
1515 status_check(ptr, c);
1520 #ifdef NUMCHAR_OPTION
1521 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1524 }else if (0xc0 <= c && c <= 0xdf){
1526 status_push_ch(ptr, c);
1527 }else if (0xe0 <= c && c <= 0xef){
1529 status_push_ch(ptr, c);
1531 status_disable(ptr);
1536 if (0x80 <= c && c <= 0xbf){
1537 status_push_ch(ptr, c);
1538 if (ptr->index > ptr->stat){
1539 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
1540 &ptr->buf[0], &ptr->buf[1]);
1545 status_disable(ptr);
1556 int action_flag = 1;
1557 struct input_code *result = 0;
1558 struct input_code *p = input_code_list;
1560 (p->status_func)(p, c);
1563 }else if(p->stat == 0){
1574 if (result && !estab_f){
1575 set_iconv(TRUE, result->iconv_func);
1576 }else if (c <= DEL){
1577 struct input_code *ptr = input_code_list;
1587 #define STD_GC_BUFSIZE (256)
1588 int std_gc_buf[STD_GC_BUFSIZE];
1598 return std_gc_buf[--std_gc_ndx];
1610 if (std_gc_ndx == STD_GC_BUFSIZE){
1613 std_gc_buf[std_gc_ndx++] = c;
1633 while ((c = (*i_getc)(f)) != EOF)
1642 oconv = output_conv;
1645 /* replace continucation module, from output side */
1647 /* output redicrection */
1656 if (mimeout_f == TRUE) {
1657 o_base64conv = oconv; oconv = base64_conv;
1659 /* base64_count = 0; */
1663 o_crconv = oconv; oconv = cr_conv;
1666 o_rot_conv = oconv; oconv = rot_conv;
1669 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
1672 o_hira_conv = oconv; oconv = hira_conv;
1675 o_fconv = oconv; oconv = fold_conv;
1678 if (alpha_f || x0201_f) {
1679 o_zconv = oconv; oconv = z_conv;
1683 i_ungetc = std_ungetc;
1684 /* input redicrection */
1687 i_cgetc = i_getc; i_getc = cap_getc;
1688 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
1691 i_ugetc = i_getc; i_getc = url_getc;
1692 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
1695 #ifdef NUMCHAR_OPTION
1697 i_ngetc = i_getc; i_getc = numchar_getc;
1698 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
1701 if (mime_f && mimebuf_f==FIXED_MIME) {
1702 i_mgetc = i_getc; i_getc = mime_getc;
1703 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
1706 i_bgetc = i_getc; i_getc = broken_getc;
1707 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
1709 if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
1710 set_iconv(-TRUE, e_iconv);
1711 } else if (input_f == SJIS_INPUT) {
1712 set_iconv(-TRUE, s_iconv);
1713 #ifdef UTF8_INPUT_ENABLE
1714 } else if (input_f == UTF8_INPUT) {
1715 set_iconv(-TRUE, w_iconv);
1716 } else if (input_f == UTF16_INPUT) {
1717 set_iconv(-TRUE, w_iconv16);
1720 set_iconv(FALSE, e_iconv);
1724 struct input_code *p = input_code_list;
1732 Conversion main loop. Code detection only.
1742 module_connection();
1747 output_mode = ASCII;
1750 #define NEXT continue /* no output, get next */
1751 #define SEND ; /* output c1 and c2, get next */
1752 #define LAST break /* end of loop, go closing */
1754 while ((c1 = (*i_getc)(f)) != EOF) {
1759 /* in case of 8th bit is on */
1761 /* in case of not established yet */
1762 /* It is still ambiguious */
1763 if (h_conv(f, c2, c1)==EOF)
1769 /* in case of already established */
1771 /* ignore bogus code */
1777 /* second byte, 7 bit code */
1778 /* it might be kanji shitfted */
1779 if ((c1 == DEL) || (c1 <= SPACE)) {
1780 /* ignore bogus first code */
1788 #ifdef UTF8_INPUT_ENABLE
1797 #ifdef NUMCHAR_OPTION
1798 } else if ((c1 & CLASS_MASK) == CLASS_UTF16){
1801 } else if (c1 > DEL) {
1803 if (!estab_f && !iso8859_f) {
1804 /* not established yet */
1807 } else { /* estab_f==TRUE */
1812 } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
1813 /* SJIS X0201 Case... */
1814 if(iso2022jp_f && x0201_f==NO_X0201) {
1815 (*oconv)(GETA1, GETA2);
1822 } else if (c1==SSO && iconv != s_iconv) {
1823 /* EUC X0201 Case */
1824 c1 = (*i_getc)(f); /* skip SSO */
1826 if (SSP<=c1 && c1<0xe0) {
1827 if(iso2022jp_f && x0201_f==NO_X0201) {
1828 (*oconv)(GETA1, GETA2);
1835 } else { /* bogus code, skip SSO and one byte */
1839 /* already established */
1844 } else if ((c1 > SPACE) && (c1 != DEL)) {
1845 /* in case of Roman characters */
1847 /* output 1 shifted byte */
1851 } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
1852 /* output 1 shifted byte */
1853 if(iso2022jp_f && x0201_f==NO_X0201) {
1854 (*oconv)(GETA1, GETA2);
1861 /* look like bogus code */
1864 } else if (input_mode == X0208) {
1865 /* in case of Kanji shifted */
1868 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
1869 /* Check MIME code */
1870 if ((c1 = (*i_getc)(f)) == EOF) {
1873 } else if (c1 == '?') {
1874 /* =? is mime conversion start sequence */
1875 if(mime_f == STRICT_MIME) {
1876 /* check in real detail */
1877 if (mime_begin_strict(f) == EOF)
1881 } else if (mime_begin(f) == EOF)
1891 /* normal ASCII code */
1894 } else if (c1 == SI) {
1897 } else if (c1 == SO) {
1900 } else if (c1 == ESC ) {
1901 if ((c1 = (*i_getc)(f)) == EOF) {
1902 /* (*oconv)(0, ESC); don't send bogus code */
1904 } else if (c1 == '$') {
1905 if ((c1 = (*i_getc)(f)) == EOF) {
1907 (*oconv)(0, ESC); don't send bogus code
1908 (*oconv)(0, '$'); */
1910 } else if (c1 == '@'|| c1 == 'B') {
1911 /* This is kanji introduction */
1914 debug(input_codename = "ISO-2022-JP");
1916 } else if (c1 == '(') {
1917 if ((c1 = (*i_getc)(f)) == EOF) {
1918 /* don't send bogus code
1924 } else if (c1 == '@'|| c1 == 'B') {
1925 /* This is kanji introduction */
1930 /* could be some special code */
1937 } else if (broken_f&0x2) {
1938 /* accept any ESC-(-x as broken code ... */
1948 } else if (c1 == '(') {
1949 if ((c1 = (*i_getc)(f)) == EOF) {
1950 /* don't send bogus code
1952 (*oconv)(0, '('); */
1956 /* This is X0201 kana introduction */
1957 input_mode = X0201; shift_mode = X0201;
1959 } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
1960 /* This is X0208 kanji introduction */
1961 input_mode = ASCII; shift_mode = FALSE;
1963 } else if (broken_f&0x2) {
1964 input_mode = ASCII; shift_mode = FALSE;
1969 /* maintain various input_mode here */
1973 } else if ( c1 == 'N' || c1 == 'n' ){
1975 c1 = (*i_getc)(f); /* skip SS2 */
1976 if ( SPACE<=c1 && c1 < 0xe0 ) {
1985 } else if ((c1 == NL || c1 == CR) && broken_f&4) {
1986 input_mode = ASCII; set_iconv(FALSE, 0);
1992 if (input_mode == X0208)
1993 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
1994 else if (input_mode)
1995 (*oconv)(input_mode, c1); /* other special case */
1996 else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */
1997 int c0 = (*i_getc)(f);
2000 (*iconv)(c2, c1, c0);
2006 /* goto next_word */
2010 (*iconv)(EOF, 0, 0);
2023 /** it must NOT be in the kanji shifte sequence */
2024 /** it must NOT be written in JIS7 */
2025 /** and it must be after 2 byte 8bit code */
2032 while ((c1 = (*i_getc)(f)) != EOF) {
2038 if (push_hold_buf(c1) == EOF || estab_f){
2044 struct input_code *p = input_code_list;
2045 struct input_code *result = p;
2050 if (p->score < result->score){
2055 set_iconv(FALSE, result->iconv_func);
2060 ** 1) EOF is detected, or
2061 ** 2) Code is established, or
2062 ** 3) Buffer is FULL (but last word is pushed)
2064 ** in 1) and 3) cases, we continue to use
2065 ** Kanji codes by oconv and leave estab_f unchanged.
2070 while (wc < hold_count){
2071 c2 = hold_buf[wc++];
2073 #ifdef NUMCHAR_OPTION
2074 || (c2 & CLASS_MASK) == CLASS_UTF16
2079 }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
2080 (*iconv)(X0201, c2, 0);
2083 if (wc < hold_count){
2084 c1 = hold_buf[wc++];
2093 if ((*iconv)(c2, c1, 0) < 0){
2095 if (wc < hold_count){
2096 c0 = hold_buf[wc++];
2105 (*iconv)(c2, c1, c0);
2118 if (hold_count >= HOLD_SIZE*2)
2120 hold_buf[hold_count++] = c2;
2121 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
2124 int s2e_conv(c2, c1, p2, p1)
2128 #ifdef SHIFTJIS_CP932
2129 if (cp932_f && CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){
2130 extern unsigned short shiftjis_cp932[3][189];
2131 c1 = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
2132 if (c1 == 0) return 1;
2136 #endif /* SHIFTJIS_CP932 */
2137 c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
2139 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
2156 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2159 int ret = s2e_conv(c2, c1, &c2, &c1);
2160 if (ret) return ret;
2173 } else if (c2 == SSO){
2176 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2186 #ifdef UTF8_INPUT_ENABLE
2188 w2e_conv(c2, c1, c0, p2, p1)
2192 extern unsigned short * utf8_to_euc_2bytes[];
2193 extern unsigned short ** utf8_to_euc_3bytes[];
2196 if (0xc0 <= c2 && c2 <= 0xef) {
2197 unsigned short **pp;
2200 if (c0 == 0) return -1;
2201 pp = utf8_to_euc_3bytes[c2 - 0x80];
2202 ret = w_iconv_common(c1, c0, pp, sizeof_utf8_to_euc_C2, p2, p1);
2204 ret = w_iconv_common(c2, c1, utf8_to_euc_2bytes, sizeof_utf8_to_euc_2bytes, p2, p1);
2206 #ifdef NUMCHAR_OPTION
2208 c1 = CLASS_UTF16 | ww16_conv(c2, c1, c0);
2214 } else if (c2 == X0201) {
2227 int ret = w2e_conv(c2, c1, c0, &c2, &c1);
2235 w16w_conv(val, p2, p1, p0)
2243 }else if (val < 0x800){
2244 *p2 = 0xc0 | (val >> 6);
2245 *p1 = 0x80 | (val & 0x3f);
2248 *p2 = 0xe0 | (val >> 12);
2249 *p1 = 0x80 | ((val >> 6) & 0x3f);
2250 *p0 = 0x80 | (val & 0x3f);
2255 ww16_conv(c2, c1, c0)
2260 val = (c2 & 0x0f) << 12;
2261 val |= (c1 & 0x3f) << 6;
2263 }else if (c2 >= 0xc0){
2264 val = (c2 & 0x1f) << 6;
2265 val |= (c1 & 0x3f) << 6;
2273 w16e_conv(val, p2, p1)
2277 extern unsigned short * utf8_to_euc_2bytes[];
2278 extern unsigned short ** utf8_to_euc_3bytes[];
2280 unsigned short **pp;
2284 w16w_conv(val, &c2, &c1, &c0);
2287 pp = utf8_to_euc_3bytes[c2 - 0x80];
2288 psize = sizeof_utf8_to_euc_C2;
2289 ret = w_iconv_common(c1, c0, pp, psize, p2, p1);
2291 pp = utf8_to_euc_2bytes;
2292 psize = sizeof_utf8_to_euc_2bytes;
2293 ret = w_iconv_common(c2, c1, pp, psize, p2, p1);
2295 #ifdef NUMCHAR_OPTION
2298 *p1 = CLASS_UTF16 | val;
2307 w_iconv16(c2, c1, c0)
2312 if (c2==0376 && c1==0377){
2313 utf16_mode = UTF16_INPUT;
2315 } else if (c2==0377 && c1==0376){
2316 utf16_mode = UTF16BE_INPUT;
2319 if (utf16_mode == UTF16BE_INPUT) {
2321 tmp=c1; c1=c2; c2=tmp;
2323 if ((c2==0 && c1 < 0x80) || c2==EOF) {
2327 ret = w16e_conv(((c2<<8)&0xff00) + c1, &c2, &c1);
2328 if (ret) return ret;
2334 w_iconv_common(c1, c0, pp, psize, p2, p1)
2336 unsigned short **pp;
2344 if (pp == 0) return 1;
2347 if (c1 < 0 || psize <= c1) return 1;
2349 if (p == 0) return 1;
2352 if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1;
2354 if (val == 0) return 1;
2357 if (c2 == SO) c2 = X0201;
2366 #ifdef UTF8_OUTPUT_ENABLE
2371 extern unsigned short euc_to_utf8_1byte[];
2372 extern unsigned short * euc_to_utf8_2bytes[];
2376 p = euc_to_utf8_1byte;
2379 c2 = (c2&0x7f) - 0x21;
2380 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2381 p = euc_to_utf8_2bytes[c2];
2386 c1 = (c1 & 0x7f) - 0x21;
2387 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
2398 #ifdef NUMCHAR_OPTION
2399 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2400 w16w_conv(c1, &c2, &c1, &c0);
2404 if (c0) (*o_putc)(c0);
2411 } else if (c2 == 0) {
2412 output_mode = ASCII;
2414 } else if (c2 == ISO8859_1) {
2415 output_mode = ISO8859_1;
2416 (*o_putc)(c1 | 0x080);
2419 w16w_conv((unsigned short)e2w_conv(c2, c1), &c2, &c1, &c0);
2423 if (c0) (*o_putc)(c0);
2438 if (w_oconv16_begin_f==2) {
2440 (*o_putc)((unsigned char)'\377');
2444 (*o_putc)((unsigned char)'\377');
2446 w_oconv16_begin_f=1;
2449 if (c2 == ISO8859_1) {
2452 #ifdef NUMCHAR_OPTION
2453 } else if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16) {
2454 c2 = (c1 >> 8) & 0xff;
2458 unsigned short val = (unsigned short)e2w_conv(c2, c1);
2459 c2 = (val >> 8) & 0xff;
2478 #ifdef NUMCHAR_OPTION
2479 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2480 w16e_conv(c1, &c2, &c1);
2486 } else if (c2 == 0) {
2487 output_mode = ASCII;
2489 } else if (c2 == X0201) {
2490 output_mode = JAPANESE_EUC;
2491 (*o_putc)(SSO); (*o_putc)(c1|0x80);
2492 } else if (c2 == ISO8859_1) {
2493 output_mode = ISO8859_1;
2494 (*o_putc)(c1 | 0x080);
2496 if ((c1<0x21 || 0x7e<c1) ||
2497 (c2<0x21 || 0x7e<c2)) {
2498 set_iconv(FALSE, 0);
2499 return; /* too late to rescue this char */
2501 output_mode = JAPANESE_EUC;
2502 (*o_putc)(c2 | 0x080);
2503 (*o_putc)(c1 | 0x080);
2508 e2s_conv(c2, c1, p2, p1)
2509 int c2, c1, *p2, *p1;
2511 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
2512 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
2520 #ifdef NUMCHAR_OPTION
2521 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2522 w16e_conv(c1, &c2, &c1);
2528 } else if (c2 == 0) {
2529 output_mode = ASCII;
2531 } else if (c2 == X0201) {
2532 output_mode = SHIFT_JIS;
2534 } else if (c2 == ISO8859_1) {
2535 output_mode = ISO8859_1;
2536 (*o_putc)(c1 | 0x080);
2538 if ((c1<0x20 || 0x7e<c1) ||
2539 (c2<0x20 || 0x7e<c2)) {
2540 set_iconv(FALSE, 0);
2541 return; /* too late to rescue this char */
2543 output_mode = SHIFT_JIS;
2544 e2s_conv(c2, c1, &c2, &c1);
2555 #ifdef NUMCHAR_OPTION
2556 if ((c1 & CLASS_MASK) == CLASS_UTF16){
2557 w16e_conv(c1, &c2, &c1);
2561 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2564 (*o_putc)(ascii_intro);
2565 output_mode = ASCII;
2568 } else if (c2==X0201) {
2569 if (output_mode!=X0201) {
2570 output_mode = X0201;
2576 } else if (c2==ISO8859_1) {
2577 /* iso8859 introduction, or 8th bit on */
2578 /* Can we convert in 7bit form using ESC-'-'-A ?
2580 output_mode = ISO8859_1;
2582 } else if (c2 == 0) {
2583 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2586 (*o_putc)(ascii_intro);
2587 output_mode = ASCII;
2591 if (output_mode != X0208) {
2592 output_mode = X0208;
2595 (*o_putc)(kanji_intro);
2597 if (c1<0x20 || 0x7e<c1)
2599 if (c2<0x20 || 0x7e<c2)
2611 if (base64_count>50 && !mimeout_mode && c2==0 && c1==SPACE) {
2613 } else if (base64_count>66 && mimeout_mode) {
2614 (*o_base64conv)(EOF,0);
2616 (*o_putc)('\t'); base64_count += 7;
2618 (*o_base64conv)(c2,c1);
2622 static int broken_buf[3];
2623 static int broken_counter = 0;
2624 static int broken_last = 0;
2631 if (broken_counter>0) {
2632 return broken_buf[--broken_counter];
2635 if (c=='$' && broken_last != ESC
2636 && (input_mode==ASCII || input_mode==X0201)) {
2639 if (c1=='@'|| c1=='B') {
2640 broken_buf[0]=c1; broken_buf[1]=c;
2647 } else if (c=='(' && broken_last != ESC
2648 && (input_mode==X0208 || input_mode==X0201)) { /* ) */
2651 if (c1=='J'|| c1=='B') {
2652 broken_buf[0]=c1; broken_buf[1]=c;
2670 if (broken_counter<2)
2671 broken_buf[broken_counter++]=c;
2675 static int prev_cr = 0;
2683 if (! (c2==0&&c1==NL) ) {
2689 } else if (c1=='\r') {
2691 } else if (c1=='\n') {
2692 if (crmode_f==CRLF) {
2693 (*o_crconv)(0,'\r');
2694 } else if (crmode_f==CR) {
2695 (*o_crconv)(0,'\r');
2699 } else if (c1!='\032' || crmode_f!=NL){
2705 Return value of fold_conv()
2707 \n add newline and output char
2708 \r add newline and output nothing
2711 1 (or else) normal output
2713 fold state in prev (previous character)
2715 >0x80 Japanese (X0208/X0201)
2720 This fold algorthm does not preserve heading space in a line.
2721 This is the main difference from fmt.
2724 #define char_size(c2,c1) (c2?2:1)
2733 if (c1== '\r' && !fold_preserve_f) {
2734 fold_state=0; /* ignore cr */
2735 }else if (c1== '\n'&&f_prev=='\r' && fold_preserve_f) {
2737 fold_state=0; /* ignore cr */
2738 } else if (c1== BS) {
2739 if (f_line>0) f_line--;
2741 } else if (c2==EOF && f_line != 0) { /* close open last line */
2743 } else if ((c1=='\n' && !fold_preserve_f)
2744 || ((c1=='\r'||(c1=='\n'&&f_prev!='\r'))
2745 && fold_preserve_f)) {
2747 if (fold_preserve_f) {
2751 } else if ((f_prev == c1 && !fold_preserve_f)
2752 || (f_prev == '\n' && fold_preserve_f)
2753 ) { /* duplicate newline */
2756 fold_state = '\n'; /* output two newline */
2762 if (f_prev&0x80) { /* Japanese? */
2764 fold_state = 0; /* ignore given single newline */
2765 } else if (f_prev==' ') {
2769 if (++f_line<=fold_len)
2773 fold_state = '\r'; /* fold and output nothing */
2777 } else if (c1=='\f') {
2782 fold_state = '\n'; /* output newline and clear */
2783 } else if ( (c2==0 && c1==' ')||
2784 (c2==0 && c1=='\t')||
2785 (c2=='!'&& c1=='!')) {
2786 /* X0208 kankaku or ascii space */
2787 if (f_prev == ' ') {
2788 fold_state = 0; /* remove duplicate spaces */
2791 if (++f_line<=fold_len)
2792 fold_state = ' '; /* output ASCII space only */
2794 f_prev = ' '; f_line = 0;
2795 fold_state = '\r'; /* fold and output nothing */
2799 prev0 = f_prev; /* we still need this one... , but almost done */
2801 if (c2 || c2==X0201)
2802 f_prev |= 0x80; /* this is Japanese */
2803 f_line += char_size(c2,c1);
2804 if (f_line<=fold_len) { /* normal case */
2807 if (f_line>=fold_len+fold_margin) { /* too many kinsou suspension */
2808 f_line = char_size(c2,c1);
2809 fold_state = '\n'; /* We can't wait, do fold now */
2810 } else if (c2==X0201) {
2811 /* simple kinsoku rules return 1 means no folding */
2812 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
2813 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
2814 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
2815 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
2816 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
2817 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
2818 else if (SPACE<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
2820 fold_state = '\n';/* add one new f_line before this character */
2823 fold_state = '\n';/* add one new f_line before this character */
2826 /* kinsoku point in ASCII */
2827 if ( c1==')'|| /* { [ ( */
2838 /* just after special */
2839 } else if (!is_alnum(prev0)) {
2840 f_line = char_size(c2,c1);
2842 } else if ((prev0==' ') || /* ignored new f_line */
2843 (prev0=='\n')|| /* ignored new f_line */
2844 (prev0&0x80)) { /* X0208 - ASCII */
2845 f_line = char_size(c2,c1);
2846 fold_state = '\n';/* add one new f_line before this character */
2848 fold_state = 1; /* default no fold in ASCII */
2852 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
2853 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
2854 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
2855 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
2856 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
2857 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
2858 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
2859 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
2860 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
2861 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
2862 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
2863 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
2864 /* default no fold in kinsoku */
2867 f_line = char_size(c2,c1);
2868 /* add one new f_line before this character */
2871 f_line = char_size(c2,c1);
2873 /* add one new f_line before this character */
2878 /* terminator process */
2879 switch(fold_state) {
2898 int z_prev2=0,z_prev1=0;
2905 /* if (c2) c1 &= 0x7f; assertion */
2907 if (x0201_f && z_prev2==X0201) { /* X0201 */
2908 if (c1==(0xde&0x7f)) { /*
\e$BByE@
\e(B */
2910 (*o_zconv)(dv[(z_prev1-SPACE)*2],dv[(z_prev1-SPACE)*2+1]);
2912 } else if (c1==(0xdf&0x7f)&&ev[(z_prev1-SPACE)*2]) { /*
\e$BH>ByE@
\e(B */
2914 (*o_zconv)(ev[(z_prev1-SPACE)*2],ev[(z_prev1-SPACE)*2+1]);
2918 (*o_zconv)(cv[(z_prev1-SPACE)*2],cv[(z_prev1-SPACE)*2+1]);
2927 if (x0201_f && c2==X0201) {
2928 if (dv[(c1-SPACE)*2]||ev[(c1-SPACE)*2]) {
2929 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
2930 z_prev1 = c1; z_prev2 = c2;
2933 (*o_zconv)(cv[(c1-SPACE)*2],cv[(c1-SPACE)*2+1]);
2938 /* JISX0208 Alphabet */
2939 if (alpha_f && c2 == 0x23 ) {
2941 } else if (alpha_f && c2 == 0x21 ) {
2942 /* JISX0208 Kigou */
2947 } else if (alpha_f&0x4) {
2952 } else if (0x20<c1 && c1<0x7f && fv[c1-0x20]) {
2958 case '>': entity = ">"; break;
2959 case '<': entity = "<"; break;
2960 case '\"': entity = """; break;
2961 case '&': entity = "&"; break;
2964 while (*entity) (*o_zconv)(0, *entity++);
2974 #define rot13(c) ( \
2976 (c <= 'M') ? (c + 13): \
2977 (c <= 'Z') ? (c - 13): \
2979 (c <= 'm') ? (c + 13): \
2980 (c <= 'z') ? (c - 13): \
2984 #define rot47(c) ( \
2986 ( c <= 'O' ) ? (c + 47) : \
2987 ( c <= '~' ) ? (c - 47) : \
2995 if (c2==0 || c2==X0201 || c2==ISO8859_1) {
3001 (*o_rot_conv)(c2,c1);
3008 if ((hira_f & 1) && c2==0x25 && 0x20<c1 && c1<0x74) {
3010 } else if ((hira_f & 2) && c2==0x24 && 0x20<c1 && c1<0x74) {
3013 (*o_hira_conv)(c2,c1);
3018 iso2022jp_check_conv(c2,c1)
3021 static int range[RANGE_NUM_MAX][2] = {
3044 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
3048 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
3053 for (i = 0; i < RANGE_NUM_MAX; i++) {
3054 start = range[i][0];
3057 if (c >= start && c <= end) {
3062 (*o_iso2022jp_check_conv)(c2,c1);
3066 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
3068 unsigned char *mime_pattern[] = {
3069 (unsigned char *)"\075?EUC-JP?B?",
3070 (unsigned char *)"\075?SHIFT_JIS?B?",
3071 (unsigned char *)"\075?ISO-8859-1?Q?",
3072 (unsigned char *)"\075?ISO-8859-1?B?",
3073 (unsigned char *)"\075?ISO-2022-JP?B?",
3074 (unsigned char *)"\075?ISO-2022-JP?Q?",
3075 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3076 (unsigned char *)"\075?UTF-8?B?",
3078 (unsigned char *)"\075?US-ASCII?Q?",
3083 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
3084 int (*mime_priority_func[])PROTO((int c2, int c1, int c0)) = {
3085 e_iconv, s_iconv, 0, 0, 0, 0,
3086 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3092 int mime_encode[] = {
3093 JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201,
3094 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3101 int mime_encode_method[] = {
3102 'B', 'B','Q', 'B', 'B', 'Q',
3103 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3111 #define MAXRECOVER 20
3113 /* I don't trust portablity of toupper */
3114 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
3115 #define nkf_isdigit(c) ('0'<=c && c<='9')
3116 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
3121 if (i_getc!=mime_getc) {
3122 i_mgetc = i_getc; i_getc = mime_getc;
3123 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
3124 if(mime_f==STRICT_MIME) {
3125 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
3126 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
3132 unswitch_mime_getc()
3134 if(mime_f==STRICT_MIME) {
3135 i_mgetc = i_mgetc_buf;
3136 i_mungetc = i_mungetc_buf;
3139 i_ungetc = i_mungetc;
3143 mime_begin_strict(f)
3148 unsigned char *p,*q;
3149 int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
3151 mime_decode_mode = FALSE;
3152 /* =? has been checked */
3154 p = mime_pattern[j];
3157 for(i=2;p[i]>' ';i++) { /* start at =? */
3158 if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) {
3159 /* pattern fails, try next one */
3161 while ((p = mime_pattern[++j])) {
3162 for(k=2;k<i;k++) /* assume length(p) > i */
3163 if (p[k]!=q[k]) break;
3164 if (k==i && nkf_toupper(c1)==p[k]) break;
3166 if (p) continue; /* found next one, continue */
3167 /* all fails, output from recovery buffer */
3175 mime_decode_mode = p[i-2];
3177 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
3179 if (mime_decode_mode=='B') {
3180 mimebuf_f = unbuf_f;
3182 /* do MIME integrity check */
3183 return mime_integrity(f,mime_pattern[j]);
3195 /* we don't keep eof of Fifo, becase it contains ?= as
3196 a terminator. It was checked in mime_integrity. */
3197 return ((mimebuf_f)?
3198 (*i_mgetc_buf)(f):Fifo(mime_input++));
3202 mime_ungetc_buf(c,f)
3207 (*i_mungetc_buf)(c,f);
3209 Fifo(--mime_input)=c;
3220 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
3221 /* re-read and convert again from mime_buffer. */
3223 /* =? has been checked */
3225 Fifo(mime_last++)='='; Fifo(mime_last++)='?';
3226 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
3227 /* We accept any character type even if it is breaked by new lines */
3228 c1 = (*i_getc)(f); Fifo(mime_last++)= c1 ;
3229 if (c1=='\n'||c1==' '||c1=='\r'||
3230 c1=='-'||c1=='_'||is_alnum(c1) ) continue;
3232 /* Failed. But this could be another MIME preemble */
3240 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3241 if (!(++i<MAXRECOVER) || c1==EOF) break;
3242 if (c1=='b'||c1=='B') {
3243 mime_decode_mode = 'B';
3244 } else if (c1=='q'||c1=='Q') {
3245 mime_decode_mode = 'Q';
3249 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3250 if (!(++i<MAXRECOVER) || c1==EOF) break;
3252 mime_decode_mode = FALSE;
3258 if (!mime_decode_mode) {
3259 /* false MIME premble, restart from mime_buffer */
3260 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
3261 /* Since we are in MIME mode until buffer becomes empty, */
3262 /* we never go into mime_begin again for a while. */
3265 /* discard mime preemble, and goto MIME mode */
3267 /* do no MIME integrity check */
3268 return c1; /* used only for checking EOF */
3283 fprintf(stderr, "%s\n", str);
3292 if (nkf_isdigit(x)) return x - '0';
3293 return nkf_toupper(x) - 'A' + 10;
3298 #ifdef ANSI_C_PROTOTYPE
3299 int hex_getc(int ch, FILE *f, int (*g)(FILE *f), int (*u)(int c, FILE *f))
3302 hex_getc(ch, f, g, u)
3315 if (!nkf_isxdigit(c2)){
3320 if (!nkf_isxdigit(c3)){
3325 return (hex2bin(c2) << 4) | hex2bin(c3);
3332 return hex_getc(':', f, i_cgetc, i_cungetc);
3340 return (*i_cungetc)(c, f);
3347 return hex_getc('%', f, i_ugetc, i_uungetc);
3355 return (*i_uungetc)(c, f);
3359 #ifdef NUMCHAR_OPTION
3364 int (*g)() = i_ngetc;
3365 int (*u)() = i_nungetc;
3376 if (buf[i] == 'x' || buf[i] == 'X'){
3377 for (j = 0; j < 5; j++){
3379 if (!nkf_isxdigit(buf[i])){
3386 c |= hex2bin(buf[i]);
3389 for (j = 0; j < 6; j++){
3393 if (!nkf_isdigit(buf[i])){
3400 c += hex2bin(buf[i]);
3406 return CLASS_UTF16 | c;
3416 numchar_ungetc(c, f)
3420 return (*i_nungetc)(c, f);
3429 int c1, c2, c3, c4, cc;
3430 int t1, t2, t3, t4, mode, exit_mode;
3432 if (mime_top != mime_last) { /* Something is in FIFO */
3433 return Fifo(mime_top++);
3435 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
3436 mime_decode_mode=FALSE;
3437 unswitch_mime_getc();
3438 return (*i_getc)(f);
3441 if (mimebuf_f == FIXED_MIME)
3442 exit_mode = mime_decode_mode;
3445 if (mime_decode_mode == 'Q') {
3446 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3448 if (c1=='_') return ' ';
3449 if (c1!='=' && c1!='?') {
3453 mime_decode_mode = exit_mode; /* prepare for quit */
3454 if (c1<=' ') return c1;
3455 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
3456 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
3457 /* end Q encoding */
3458 input_mode = exit_mode;
3459 while((c1=(*i_getc)(f))!=EOF && c1==SPACE
3460 /* && (c1==NL||c1==TAB||c1=='\r') */ ) ;
3463 if (c1=='='&&c2<' ') { /* this is soft wrap */
3464 while((c1 = (*i_mgetc)(f)) <=' ') {
3465 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3467 mime_decode_mode = 'Q'; /* still in MIME */
3468 goto restart_mime_q;
3471 mime_decode_mode = 'Q'; /* still in MIME */
3475 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
3476 if (c2<=' ') return c2;
3477 mime_decode_mode = 'Q'; /* still in MIME */
3478 #define hex(c) (('0'<=c&&c<='9')?(c-'0'):\
3479 ('A'<=c&&c<='F')?(c-'A'+10):('a'<=c&&c<='f')?(c-'a'+10):0)
3480 return ((hex(c2)<<4) + hex(c3));
3483 if (mime_decode_mode != 'B') {
3484 mime_decode_mode = FALSE;
3485 return (*i_mgetc)(f);
3489 /* Base64 encoding */
3491 MIME allows line break in the middle of
3492 Base64, but we are very pessimistic in decoding
3493 in unbuf mode because MIME encoded code may broken by
3494 less or editor's control sequence (such as ESC-[-K in unbuffered
3495 mode. ignore incomplete MIME.
3497 mode = mime_decode_mode;
3498 mime_decode_mode = exit_mode; /* prepare for quit */
3500 while ((c1 = (*i_mgetc)(f))<=' ') {
3505 if ((c2 = (*i_mgetc)(f))<=' ') {
3508 if (mime_f != STRICT_MIME) goto mime_c2_retry;
3509 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3512 if ((c1 == '?') && (c2 == '=')) {
3514 while((c1=(*i_getc)(f))!=EOF && c1==SPACE
3515 /* && (c1==NL||c1==TAB||c1=='\r') */ ) ;
3519 if ((c3 = (*i_mgetc)(f))<=' ') {
3522 if (mime_f != STRICT_MIME) goto mime_c3_retry;
3523 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3527 if ((c4 = (*i_mgetc)(f))<=' ') {
3530 if (mime_f != STRICT_MIME) goto mime_c4_retry;
3531 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3535 mime_decode_mode = mode; /* still in MIME sigh... */
3537 /* BASE 64 decoding */
3539 t1 = 0x3f & base64decode(c1);
3540 t2 = 0x3f & base64decode(c2);
3541 t3 = 0x3f & base64decode(c3);
3542 t4 = 0x3f & base64decode(c4);
3543 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
3545 Fifo(mime_last++) = cc;
3546 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
3548 Fifo(mime_last++) = cc;
3549 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
3551 Fifo(mime_last++) = cc;
3556 return Fifo(mime_top++);
3564 Fifo(--mime_top) = c;
3575 /* In buffered mode, read until =? or NL or buffer full
3577 mime_input = mime_top;
3578 mime_last = mime_top;
3579 while(*p) Fifo(mime_input++) = *p++;
3582 while((c=(*i_getc)(f))!=EOF) {
3583 if (((mime_input-mime_top)&MIME_BUF_MASK)==0) {
3584 break; /* buffer full */
3586 if (c=='=' && d=='?') {
3587 /* checked. skip header, start decode */
3588 Fifo(mime_input++) = c;
3589 /* mime_last_input = mime_input; */
3594 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
3596 /* Should we check length mod 4? */
3597 Fifo(mime_input++) = c;
3600 /* In case of Incomplete MIME, no MIME decode */
3601 Fifo(mime_input++) = c;
3602 mime_last = mime_input; /* point undecoded buffer */
3603 mime_decode_mode = 1; /* no decode on Fifo last in mime_getc */
3604 switch_mime_getc(); /* anyway we need buffered getc */
3615 i = c - 'A'; /* A..Z 0-25 */
3617 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
3619 } else if (c > '/') {
3620 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
3621 } else if (c == '+') {
3622 i = '>' /* 62 */ ; /* + 62 */
3624 i = '?' /* 63 */ ; /* / 63 */
3629 static char basis_64[] =
3630 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
3640 p = mime_pattern[0];
3641 for(i=0;mime_encode[i];i++) {
3642 if (mode == mime_encode[i]) {
3643 p = mime_pattern[i];
3647 mimeout_mode = mime_encode_method[i];
3649 /* (*o_mputc)(' '); */
3666 #define itoh4(c) (c>=10?c+'A'-10:c+'0')
3672 if (mimeout_f==FIXED_MIME) {
3673 if (base64_count>71) {
3681 if ( c<=DEL &&(output_mode==ASCII ||output_mode == ISO8859_1 )
3682 && mimeout_f!=FIXED_MIME) {
3683 if (mimeout_mode=='Q') {
3690 if (mimeout_mode!='B' || c!=SPACE) {
3699 } else if (!mimeout_mode && mimeout_f!=FIXED_MIME) {
3700 open_mime(output_mode);
3702 } else { /* c==EOF */
3703 switch(mimeout_mode) {
3708 (*o_mputc)(basis_64[((b64c & 0x3)<< 4)]);
3714 (*o_mputc)(basis_64[((b64c & 0xF) << 2)]);
3720 if (mimeout_f!=FIXED_MIME) {
3722 } else if (mimeout_mode != 'Q')
3727 switch(mimeout_mode) {
3731 (*o_mputc)(itoh4(((c>>4)&0xf)));
3732 (*o_mputc)(itoh4((c&0xf)));
3739 (*o_mputc)(basis_64[c>>2]);
3744 (*o_mputc)(basis_64[((b64c & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
3750 (*o_mputc)(basis_64[((b64c & 0xF) << 2) | ((c & 0xC0) >>6)]);
3751 (*o_mputc)(basis_64[c & 0x3F]);
3771 mime_f = STRICT_MIME;
3775 #if defined(MSDOS) || defined(__OS2__)
3780 iso2022jp_f = FALSE;
3782 kanji_intro = DEFAULT_J;
3783 ascii_intro = DEFAULT_R;
3785 output_conv = DEFAULT_CONV;
3786 oconv = DEFAULT_CONV;
3789 i_mungetc = std_ungetc;
3790 i_mgetc_buf = std_getc;
3791 i_mungetc_buf = std_ungetc;
3794 i_ungetc=std_ungetc;
3797 i_bungetc= std_ungetc;
3801 o_crconv = no_connection;
3802 o_rot_conv = no_connection;
3803 o_iso2022jp_check_conv = no_connection;
3804 o_hira_conv = no_connection;
3805 o_fconv = no_connection;
3806 o_zconv = no_connection;
3809 i_ungetc = std_ungetc;
3811 i_mungetc = std_ungetc;
3813 output_mode = ASCII;
3816 mime_decode_mode = FALSE;
3825 struct input_code *p = input_code_list;
3830 #ifdef UTF8_OUTPUT_ENABLE
3831 if (w_oconv16_begin_f) {
3832 w_oconv16_begin_f = 2;
3837 fold_preserve_f = FALSE;
3840 fold_margin = FOLD_MARGIN;
3843 z_prev2=0,z_prev1=0;
3845 input_codename = "";
3850 no_connection(c2,c1)
3853 no_connection2(c2,c1,0);
3857 no_connection2(c2,c1,c0)
3860 fprintf(stderr,"nkf internal module connection failure.\n");
3868 fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
3869 fprintf(stderr,"Flags:\n");
3870 fprintf(stderr,"b,u Output is bufferred (DEFAULT),Output is unbufferred\n");
3871 #ifdef DEFAULT_CODE_SJIS
3872 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC), UTF-8\n");
3874 #ifdef DEFAULT_CODE_JIS
3875 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC), UTF-8\n");
3877 #ifdef DEFAULT_CODE_EUC
3878 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT), UTF-8\n");
3880 #ifdef DEFAULT_CODE_UTF8
3881 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC), UTF-8 (DEFAULT)\n");
3883 fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC), UTF-8\n");
3884 fprintf(stderr,"t no conversion\n");
3885 fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n");
3886 fprintf(stderr,"r {de/en}crypt ROT13/47\n");
3887 fprintf(stderr,"h 1 hirakana->katakana, 2 katakana->hirakana,3 both\n");
3888 fprintf(stderr,"v Show this usage. V: show version\n");
3889 fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
3890 fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
3891 fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
3892 fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
3893 fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces,\n");
3894 fprintf(stderr," 3: Convert HTML Entity\n");
3895 fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
3896 fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
3898 fprintf(stderr,"T Text mode output\n");
3900 fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
3901 fprintf(stderr,"d,c Delete \\r in line feed and \\032, Add \\r in line feed\n");
3902 fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n");
3903 fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
3904 fprintf(stderr,"long name options\n");
3905 fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n");
3906 fprintf(stderr," --jis,--euc,--sjis,--utf8,--utf16,--mime,--base64 convert for the code\n");
3908 fprintf(stderr," --overwrite Overwrite original listed files by filtered result\n");
3910 fprintf(stderr," --help,--version\n");
3917 fprintf(stderr,"Network Kanji Filter Version %s (%s) "
3918 #if defined(MSDOS) && !defined(__WIN32__) && !defined(__WIN16__)
3921 #if defined(MSDOS) && defined(__WIN16__)
3924 #if defined(MSDOS) && defined(__WIN32__)
3930 ,Version,Patchlevel);
3931 fprintf(stderr,"\n%s\n",CopyRight);
3936 **
\e$B%Q%C%A@):n<T
\e(B
3937 ** void@merope.pleiades.or.jp (Kusakabe Youichi)
3938 ** NIDE Naoyuki <nide@ics.nara-wu.ac.jp>
3939 ** ohta@src.ricoh.co.jp (Junn Ohta)
3940 ** inouet@strl.nhk.or.jp (Tomoyuki Inoue)
3941 ** kiri@pulser.win.or.jp (Tetsuaki Kiriyama)
3942 ** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp>
3943 ** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe)
3944 ** kono@ie.u-ryukyu.ac.jp (Shinji Kono)
3945 ** GHG00637@nifty-serve.or.jp (COW)