1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
5 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
6 ** Copyright (C) 1996,1998
8 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
9 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
10 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
11 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
13 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
14 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
15 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
16 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
17 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
18 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
20 ** Everyone is permitted to do anything on this program
21 ** including copying, modifying, improving,
22 ** as long as you don't try to pretend that you wrote it.
23 ** i.e., the above copyright notice has to appear in all copies.
24 ** Binary distribution requires original version messages.
25 ** You don't have to ask before copying, redistribution or publishing.
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
29 /***********************************************************************
30 ** UTF-8
\e$B%5%]!<%H$K$D$$$F
\e(B
31 **
\e$B=>Mh$N
\e(B nkf
\e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9
\e(B
32 ** nkf -e
\e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G
\e(B UTF-8
\e$B$HH=Dj$5$l$l$P!"
\e(B
33 **
\e$B$=$N$^$^
\e(B euc-jp
\e$B$KJQ49$5$l$^$9
\e(B
35 **
\e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#
\e(B
36 ** (
\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O
\e(B)
38 **
\e$B2?$+LdBj$r8+$D$1$?$i!"
\e(B
39 ** E-Mail: furukawa@tcp-ip.or.jp
40 **
\e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#
\e(B
41 ***********************************************************************/
44 static char *CopyRight =
45 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002 Kono, Furukawa";
46 static char *Version =
48 static char *Patchlevel =
55 ** USAGE: nkf [flags] [file]
58 ** b Output is bufferred (DEFAULT)
59 ** u Output is unbufferred
63 ** j Outout code is JIS 7 bit (DEFAULT SELECT)
64 ** s Output code is MS Kanji (DEFAULT SELECT)
65 ** e Output code is AT&T JIS (DEFAULT SELECT)
66 ** w Output code is AT&T JIS (DEFAULT SELECT)
67 ** l Output code is JIS 7bit and ISO8859-1 Latin-1
69 ** m MIME conversion for ISO-2022-JP
70 ** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
71 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
72 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
73 ** M MIME output conversion
75 ** r {de/en}crypt ROT13/47
79 ** T Text mode output (for MS-DOS)
81 ** x Do not convert X0201 kana into X0208
82 ** Z Convert X0208 alphabet to ASCII
87 ** B try to fix broken JIS, missing Escape
88 ** B[1-9] broken level
90 ** O Output to 'nkf.out' file or last file name
91 ** d Delete \r in line feed
92 ** c Add \r in line feed
93 ** -- other long option
94 ** -- ignore following option (don't use with -O )
98 #if (defined(__TURBOC__) || defined(LSI_C)) && !defined(MSDOS)
113 #if defined(MSDOS) || defined(__OS2__)
120 #define setbinmode(fp) fsetbin(fp)
121 #else /* Microsoft C, Turbo C */
122 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
124 #else /* UNIX,OS/2 */
125 #define setbinmode(fp)
128 #ifdef _IOFBF /* SysV and MSDOS */
129 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
131 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
134 /*Borland C++ 4.5 EasyWin*/
135 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
141 /* added by satoru@isoternet.org */
142 #include <sys/stat.h>
152 /* state of output_mode and input_mode
169 /* Input Assumption */
173 #define LATIN1_INPUT 6
175 #define STRICT_MIME 8
180 #define JAPANESE_EUC 10
184 #define UTF8_INPUT 13
185 #define UTF16_INPUT 14
186 #define UTF16BE_INPUT 15
204 #define is_alnum(c) \
205 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
207 #define HOLD_SIZE 1024
208 #define IOBUF_SIZE 16384
210 #define DEFAULT_J 'B'
211 #define DEFAULT_R 'B'
213 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
214 #define SJ6394 0x0161 /* 63 - 94 ku offset */
216 #define RANGE_NUM_MAX 18
221 #if defined( UTF8_OUTPUT_ENABLE ) || defined( UTF8_INPUT_ENABLE )
222 #define sizeof_euc_utf8 94
223 #define sizeof_euc_to_utf8_1byte 94
224 #define sizeof_euc_to_utf8_2bytes 94
225 #define sizeof_utf8_to_euc_C2 64
226 #define sizeof_utf8_to_euc_E5B8 64
227 #define sizeof_utf8_to_euc_2bytes 112
228 #define sizeof_utf8_to_euc_3bytes 112
231 /* MIME preprocessor */
234 #ifdef EASYWIN /*Easy Win */
235 extern POINT _BufferSize;
238 /* function prototype */
240 #ifdef ANSI_C_PROTOTYPE
242 #define STATIC static
254 void (*status_func)PROTO((struct input_code *, int));
255 int (*iconv_func)PROTO((int c2, int c1, int c0));
258 STATIC int noconvert PROTO((FILE *f));
259 STATIC int kanji_convert PROTO((FILE *f));
260 STATIC int h_conv PROTO((FILE *f,int c2,int c1));
261 STATIC int push_hold_buf PROTO((int c2));
262 STATIC void set_iconv PROTO((int f, int (*iconv_func)()));
263 STATIC int s_iconv PROTO((int c2,int c1,int c0));
264 STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
265 STATIC int e_iconv PROTO((int c2,int c1,int c0));
266 #ifdef UTF8_INPUT_ENABLE
267 STATIC int w2e_conv PROTO((int c2,int c1,int c0,int *p2,int *p1));
268 STATIC int w_iconv PROTO((int c2,int c1,int c0));
269 STATIC int w_iconv16 PROTO((int c2,int c1,int c0));
270 STATIC int w_iconv_common PROTO((int c1,int c0,unsigned short **pp,int psize,int *p2,int *p1));
272 #ifdef UTF8_OUTPUT_ENABLE
273 STATIC int e2w_conv PROTO((int c2,int c1));
274 STATIC void w_oconv PROTO((int c2,int c1));
275 STATIC void w_oconv16 PROTO((int c2,int c1));
277 STATIC void e_oconv PROTO((int c2,int c1));
278 STATIC void s_oconv PROTO((int c2,int c1));
279 STATIC void j_oconv PROTO((int c2,int c1));
280 STATIC void fold_conv PROTO((int c2,int c1));
281 STATIC void cr_conv PROTO((int c2,int c1));
282 STATIC void z_conv PROTO((int c2,int c1));
283 STATIC void rot_conv PROTO((int c2,int c1));
284 STATIC void hira_conv PROTO((int c2,int c1));
285 STATIC void base64_conv PROTO((int c2,int c1));
286 STATIC void iso2022jp_check_conv PROTO((int c2,int c1));
287 STATIC void no_connection PROTO((int c2,int c1));
288 STATIC int no_connection2 PROTO((int c2,int c1,int c0));
290 STATIC void code_score PROTO((struct input_code *ptr));
291 STATIC void code_status PROTO((int c));
293 STATIC void std_putc PROTO((int c));
294 STATIC int std_getc PROTO((FILE *f));
295 STATIC int std_ungetc PROTO((int c,FILE *f));
297 STATIC int broken_getc PROTO((FILE *f));
298 STATIC int broken_ungetc PROTO((int c,FILE *f));
300 STATIC int mime_begin PROTO((FILE *f));
301 STATIC int mime_getc PROTO((FILE *f));
302 STATIC int mime_ungetc PROTO((int c,FILE *f));
304 STATIC int mime_begin_strict PROTO((FILE *f));
305 STATIC int mime_getc_buf PROTO((FILE *f));
306 STATIC int mime_ungetc_buf PROTO((int c,FILE *f));
307 STATIC int mime_integrity PROTO((FILE *f,unsigned char *p));
309 STATIC int base64decode PROTO((int c));
310 STATIC void mime_putc PROTO((int c));
311 STATIC void open_mime PROTO((int c));
312 STATIC void close_mime PROTO(());
313 STATIC void usage PROTO(());
314 STATIC void version PROTO(());
315 STATIC void options PROTO((unsigned char *c));
317 STATIC void reinit PROTO(());
322 static unsigned char stdibuf[IOBUF_SIZE];
323 static unsigned char stdobuf[IOBUF_SIZE];
324 static unsigned char hold_buf[HOLD_SIZE*2];
325 static int hold_count;
327 /* MIME preprocessor fifo */
329 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
330 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
331 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
332 static unsigned char mime_buf[MIME_BUF_SIZE];
333 static unsigned int mime_top = 0;
334 static unsigned int mime_last = 0; /* decoded */
335 static unsigned int mime_input = 0; /* undecoded */
338 static int unbuf_f = FALSE;
339 static int estab_f = FALSE;
340 static int nop_f = FALSE;
341 static int binmode_f = TRUE; /* binary mode */
342 static int rot_f = FALSE; /* rot14/43 mode */
343 static int hira_f = FALSE; /* hira/kata henkan */
344 static int input_f = FALSE; /* non fixed input code */
345 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
346 static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
347 static int mimebuf_f = FALSE; /* MIME buffered input */
348 static int broken_f = FALSE; /* convert ESC-less broken JIS */
349 static int iso8859_f = FALSE; /* ISO8859 through */
350 static int mimeout_f = FALSE; /* base64 mode */
351 #if defined(MSDOS) || defined(__OS2__)
352 static int x0201_f = TRUE; /* Assume JISX0201 kana */
354 static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
356 static int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
357 #ifdef UTF8_OUTPUT_ENABLE
358 static int w_oconv16_begin_f= 0; /* utf-16 header */
362 #ifdef CAP_URL_OPTION
363 static int cap_f = FALSE;
364 static int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */
365 static int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc;
366 STATIC int cap_getc PROTO((FILE *f));
367 STATIC int cap_ungetc PROTO((int c,FILE *f));
369 static int url_f = FALSE;
370 static int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
371 static int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc;
372 STATIC int url_getc PROTO((FILE *f));
373 STATIC int url_ungetc PROTO((int c,FILE *f));
377 static int noout_f = FALSE;
378 STATIC void no_putc PROTO((int c));
379 static int debug_f = FALSE;
380 STATIC void debug PROTO((char *str));
383 STATIC void e_status PROTO((struct input_code *, int));
384 STATIC void s_status PROTO((struct input_code *, int));
386 #ifdef UTF8_INPUT_ENABLE
387 STATIC void w_status PROTO((struct input_code *, int));
388 static int utf16_mode = UTF16_INPUT;
391 struct input_code input_code_list[] = {
392 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv},
393 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv},
394 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv},
398 static int mimeout_mode = 0;
399 static int base64_count = 0;
401 /* X0208 -> ASCII converter */
404 static int f_line = 0; /* chars in line */
405 static int f_prev = 0;
406 static int fold_preserve_f = FALSE; /* preserve new lines */
407 static int fold_f = FALSE;
408 static int fold_len = 0;
411 static unsigned char kanji_intro = DEFAULT_J,
412 ascii_intro = DEFAULT_R;
416 #define FOLD_MARGIN 10
417 #define DEFAULT_FOLD 60
419 static int fold_margin = FOLD_MARGIN;
423 #ifdef DEFAULT_CODE_JIS
424 # define DEFAULT_CONV j_oconv
426 #ifdef DEFAULT_CODE_SJIS
427 # define DEFAULT_CONV s_oconv
429 #ifdef DEFAULT_CODE_EUC
430 # define DEFAULT_CONV e_oconv
432 #ifdef DEFAULT_CODE_UTF8
433 # define DEFAULT_CONV w_oconv
436 /* process default */
437 static void (*output_conv)PROTO((int c2,int c1)) = DEFAULT_CONV;
439 static void (*oconv)PROTO((int c2,int c1)) = no_connection;
440 /* s_iconv or oconv */
441 static int (*iconv)PROTO((int c2,int c1,int c0)) = no_connection2;
443 static void (*o_zconv)PROTO((int c2,int c1)) = no_connection;
444 static void (*o_fconv)PROTO((int c2,int c1)) = no_connection;
445 static void (*o_crconv)PROTO((int c2,int c1)) = no_connection;
446 static void (*o_rot_conv)PROTO((int c2,int c1)) = no_connection;
447 static void (*o_hira_conv)PROTO((int c2,int c1)) = no_connection;
448 static void (*o_base64conv)PROTO((int c2,int c1)) = no_connection;
449 static void (*o_iso2022jp_check_conv)PROTO((int c2,int c1)) = no_connection;
451 /* static redirections */
453 static void (*o_putc)PROTO((int c)) = std_putc;
455 static int (*i_getc)PROTO((FILE *f)) = std_getc; /* general input */
456 static int (*i_ungetc)PROTO((int c,FILE *f)) =std_ungetc;
458 static int (*i_bgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
459 static int (*i_bungetc)PROTO((int c ,FILE *f)) = std_ungetc;
461 static void (*o_mputc)PROTO((int c)) = std_putc ; /* output of mputc */
463 static int (*i_mgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
464 static int (*i_mungetc)PROTO((int c ,FILE *f)) = std_ungetc;
466 /* for strict mime */
467 static int (*i_mgetc_buf)PROTO((FILE *)) = std_getc; /* input of mgetc_buf */
468 static int (*i_mungetc_buf)PROTO((int c,FILE *f)) = std_ungetc;
471 static int output_mode = ASCII, /* output kanji mode */
472 input_mode = ASCII, /* input kanji mode */
473 shift_mode = FALSE; /* TRUE shift out, or X0201 */
474 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
476 /* X0201 / X0208 conversion tables */
478 /* X0201 kana conversion table */
481 unsigned char cv[]= {
482 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
483 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
484 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
485 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
486 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
487 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
488 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
489 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
490 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
491 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
492 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
493 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
494 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
495 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
496 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
497 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
501 /* X0201 kana conversion table for daguten */
504 unsigned char dv[]= {
505 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
506 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
507 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
508 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
509 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
510 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
511 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
512 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
513 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
514 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
515 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
516 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
517 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
518 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
519 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
520 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
523 /* X0201 kana conversion table for han-daguten */
526 unsigned char ev[]= {
527 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
528 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
529 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
530 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
531 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
532 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
533 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
534 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
535 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
536 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
537 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
538 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
539 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
540 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
541 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
542 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
546 /* X0208 kigou conversion table */
547 /* 0x8140 - 0x819e */
549 unsigned char fv[] = {
551 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
552 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
553 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
554 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
555 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
556 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
557 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
558 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
559 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
560 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
561 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
562 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
568 static int file_out = FALSE;
570 static int overwrite = FALSE;
573 static int crmode_f = 0; /* CR, NL, CRLF */
574 #ifdef EASYWIN /*Easy Win */
575 static int end_check;
587 #ifdef EASYWIN /*Easy Win */
588 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
591 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
592 cp = (unsigned char *)*argv;
595 if(x0201_f == WISH_TRUE)
596 x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
598 if (binmode_f == TRUE)
600 if (freopen("","wb",stdout) == NULL)
607 setbuf(stdout, (char *) NULL);
609 setvbuffer(stdout, stdobuf, IOBUF_SIZE);
612 if (binmode_f == TRUE)
614 if (freopen("","rb",stdin) == NULL) return (-1);
618 setvbuffer(stdin, stdibuf, IOBUF_SIZE);
622 kanji_convert(stdin);
628 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
637 /* reopen file for stdout */
638 if (file_out == TRUE) {
641 outfname = malloc(strlen(origfname)
642 + strlen(".nkftmpXXXXXX")
648 strcpy(outfname, origfname);
652 for (i = strlen(outfname); i; --i){
653 if (outfname[i - 1] == '/'
654 || outfname[i - 1] == '\\'){
660 strcat(outfname, "ntXXXXXX");
662 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC,
665 strcat(outfname, ".nkftmpXXXXXX");
666 fd = mkstemp(outfname);
669 || (fd_backup = dup(fileno(stdout))) < 0
670 || dup2(fd, fileno(stdout)) < 0
681 outfname = "nkf.out";
684 if(freopen(outfname, "w", stdout) == NULL) {
688 if (binmode_f == TRUE) {
690 if (freopen("","wb",stdout) == NULL)
697 if (binmode_f == TRUE)
699 if (freopen("","rb",fin) == NULL)
704 setvbuffer(fin, stdibuf, IOBUF_SIZE);
721 if (dup2(fd_backup, fileno(stdout)) < 0){
724 if (stat(origfname, &sb)) {
725 fprintf(stderr, "Can't stat %s\n", origfname);
727 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
728 if (chmod(outfname, sb.st_mode)) {
729 fprintf(stderr, "Can't set permission %s\n", outfname);
732 tb[0] = tb[1] = sb.st_mtime;
733 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
734 if (utime(outfname, tb)) {
735 fprintf(stderr, "Can't set timestamp %s\n", outfname);
738 if (unlink(origfname)){
742 tb.actime = sb.st_atime;
743 tb.modtime = sb.st_mtime;
744 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
745 if (utime(outfname, &tb)) {
746 fprintf(stderr, "Can't set timestamp %s\n", outfname);
749 if (rename(outfname, origfname)) {
751 fprintf(stderr, "Can't rename %s to %s\n",
752 outfname, origfname);
760 #ifdef EASYWIN /*Easy Win */
761 if (file_out == FALSE)
762 scanf("%d",&end_check);
765 #else /* for Other OS */
766 if (file_out == TRUE)
796 {"katakana-hiragana","h3"},
797 #ifdef UTF8_OUTPUT_ENABLE
801 #ifdef UTF8_INPUT_ENABLE
803 {"utf16-input", "W16"},
808 #ifdef CAP_URL_OPTION
818 static int option_mode;
833 case '-': /* literal options */
834 if (!*cp) { /* ignore the rest of arguments */
838 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
840 p = (unsigned char *)long_option[i].name;
841 for (j=0;*p && *p++ == cp[j];j++);
842 if (! *p && !cp[j]) break;
845 cp = (unsigned char *)long_option[i].alias;
848 if (strcmp(long_option[i].name, "overwrite") == 0){
854 #ifdef CAP_URL_OPTION
855 if (strcmp(long_option[i].name, "cap-input") == 0){
859 if (strcmp(long_option[i].name, "url-input") == 0){
865 if (strcmp(long_option[i].name, "no-output") == 0){
869 if (strcmp(long_option[i].name, "debug") == 0){
876 case 'b': /* buffered mode */
879 case 'u': /* non bufferd mode */
882 case 't': /* transparent mode */
885 case 'j': /* JIS output */
887 output_conv = j_oconv;
889 case 'e': /* AT&T EUC output */
890 output_conv = e_oconv;
892 case 's': /* SJIS output */
893 output_conv = s_oconv;
895 case 'l': /* ISO8859 Latin-1 support, no conversion */
896 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
897 input_f = LATIN1_INPUT;
899 case 'i': /* Kanji IN ESC-$-@/B */
900 if (*cp=='@'||*cp=='B')
903 case 'o': /* ASCII IN ESC-(-J/B */
904 if (*cp=='J'||*cp=='B'||*cp=='H')
912 if ('9'>= *cp && *cp>='0')
913 hira_f |= (*cp++ -'0');
920 #if defined(MSDOS) || defined(__OS2__)
935 #ifdef UTF8_OUTPUT_ENABLE
936 case 'w': /* UTF-8 output */
937 if ('1'== cp[0] && '6'==cp[1]) {
938 output_conv = w_oconv16; cp+=2;
940 w_oconv16_begin_f=2; cp++;
943 output_conv = w_oconv;
946 #ifdef UTF8_INPUT_ENABLE
947 case 'W': /* UTF-8 input */
948 if ('1'== cp[0] && '6'==cp[1]) {
949 input_f = UTF16_INPUT;
951 input_f = UTF8_INPUT;
954 /* Input code assumption */
955 case 'J': /* JIS input */
956 case 'E': /* AT&T EUC input */
959 case 'S': /* MS Kanji input */
960 input_f = SJIS_INPUT;
961 if (x0201_f==NO_X0201) x0201_f=TRUE;
963 case 'Z': /* Convert X0208 alphabet to asii */
964 /* bit:0 Convert X0208
965 bit:1 Convert Kankaku to one space
966 bit:2 Convert Kankaku to two spaces
967 bit:3 Convert HTML Entity
969 if ('9'>= *cp && *cp>='0')
970 alpha_f |= 1<<(*cp++ -'0');
974 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
975 x0201_f = FALSE; /* No X0201->X0208 conversion */
977 ESC-(-I in JIS, EUC, MS Kanji
978 SI/SO in JIS, EUC, MS Kanji
979 SSO in EUC, JIS, not in MS Kanji
982 ESC-(-I in JIS (0x20-0x5f)
983 SSO in EUC (0xa0-0xdf)
984 0xa0-0xd in MS Kanji (0xa0-0xdf)
987 case 'X': /* Assume X0201 kana */
988 /* Default value is NO_X0201 for EUC/MS-Kanji mix */
991 case 'F': /* prserve new lines */
992 fold_preserve_f = TRUE;
993 case 'f': /* folding -f60 or -f */
996 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
998 fold_len += *cp++ - '0';
1000 if (!(0<fold_len && fold_len<BUFSIZ))
1001 fold_len = DEFAULT_FOLD;
1005 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1007 fold_margin += *cp++ - '0';
1011 case 'm': /* MIME support */
1012 if (*cp=='B'||*cp=='Q') {
1013 mime_decode_mode = *cp++;
1014 mimebuf_f = FIXED_MIME;
1015 } else if (*cp=='N') {
1016 mime_f = TRUE; cp++;
1017 } else if (*cp=='S') {
1018 mime_f = STRICT_MIME; cp++;
1019 } else if (*cp=='0') {
1020 mime_f = FALSE; cp++;
1023 case 'M': /* MIME output */
1026 mimeout_f = FIXED_MIME; cp++;
1027 } else if (*cp=='Q') {
1029 mimeout_f = FIXED_MIME; cp++;
1034 case 'B': /* Broken JIS support */
1036 bit:1 allow any x on ESC-(-x or ESC-$-x
1037 bit:2 reset to ascii on NL
1039 if ('9'>= *cp && *cp>='0')
1040 broken_f |= 1<<(*cp++ -'0');
1045 case 'O':/* for Output file */
1049 case 'c':/* add cr code */
1052 case 'd':/* delete cr code */
1055 case 'I': /* ISO-2022-JP output */
1058 case 'L': /* line mode */
1059 if (*cp=='u') { /* unix */
1060 crmode_f = NL; cp++;
1061 } else if (*cp=='m') { /* mac */
1062 crmode_f = CR; cp++;
1063 } else if (*cp=='w') { /* windows */
1064 crmode_f = CRLF; cp++;
1065 } else if (*cp=='0') { /* no conversion */
1070 /* module muliple options in a string are allowed for Perl moudle */
1071 while(*cp && *cp!='-') cp++;
1075 /* bogus option but ignored */
1081 #ifdef ANSI_C_PROTOTYPE
1082 void set_iconv(int f, int (*iconv_func)(int c2,int c1,int c0))
1084 void set_iconv(f, iconv_func)
1086 int (*iconv_func)();
1090 static int (*iconv_for_check)() = 0;
1092 #ifdef INPUT_CODE_FIX
1100 #ifdef INPUT_CODE_FIX
1101 && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1107 if (estab_f && iconv_for_check != iconv){
1108 #ifdef UTF8_INPUT_ENABLE
1109 if (iconv == w_iconv) debug("UTF-8\n");
1110 if (iconv == w_iconv16) debug("UTF-16\n");
1112 if (iconv == s_iconv) debug("Shift_JIS\n");
1113 if (iconv == e_iconv) debug("EUC-JP\n");
1114 iconv_for_check = iconv;
1119 #define SCORE_KANA (1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
1120 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
1121 #define SCORE_NO_EXIST (SCORE_DEPEND << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1122 #define SCORE_ERROR (SCORE_NO_EXIST << 1) /*
\e$B%(%i!<
\e(B */
1123 int score_table_A0[] = {
1126 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1127 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1130 int score_table_F0[] = {
1132 0, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1133 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1134 SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1137 void code_score(ptr)
1138 struct input_code *ptr;
1141 int c2 = ptr->buf[0];
1142 int c1 = ptr->buf[1];
1145 }else if ((c2 & 0xf0) == 0xa0){
1146 s |= score_table_A0[c2 & 0x0f];
1147 }else if ((c2 & 0xf0) == 0xf0){
1148 s |= score_table_F0[c2 & 0x0f];
1149 }else if (c2 == SSO){
1152 #ifdef UTF8_OUTPUT_ENABLE
1153 else if (!e2w_conv(c2, c1)){
1154 s |= SCORE_NO_EXIST;
1160 void status_disable(ptr)
1161 struct input_code *ptr;
1166 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1169 void status_push_ch(ptr, c)
1170 struct input_code *ptr;
1173 ptr->buf[ptr->index++] = c;
1176 void status_reset(ptr)
1177 struct input_code *ptr;
1184 void status_check(ptr, c)
1185 struct input_code *ptr;
1188 if (c <= DEL && estab_f){
1193 void s_status(ptr, c)
1194 struct input_code *ptr;
1199 status_check(ptr, c);
1204 }else if (0xa1 <= c && c <= 0xef){
1205 status_push_ch(ptr, SSO);
1206 status_push_ch(ptr, c);
1209 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
1211 status_push_ch(ptr, c);
1213 status_disable(ptr);
1217 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfd)){
1218 status_push_ch(ptr, c);
1219 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
1223 status_disable(ptr);
1229 void e_status(ptr, c)
1230 struct input_code *ptr;
1235 status_check(ptr, c);
1240 }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
1242 status_push_ch(ptr, c);
1244 status_disable(ptr);
1248 if (0xa1 <= c && c <= 0xfe){
1249 status_push_ch(ptr, c);
1253 status_disable(ptr);
1259 #ifdef UTF8_INPUT_ENABLE
1260 void w_status(ptr, c)
1261 struct input_code *ptr;
1266 status_check(ptr, c);
1271 }else if (0xc0 <= c && c <= 0xdf){
1273 status_push_ch(ptr, c);
1274 }else if (0xe0 <= c && c <= 0xef){
1276 status_push_ch(ptr, c);
1278 status_disable(ptr);
1283 if (0x80 <= c && c <= 0xbf){
1284 status_push_ch(ptr, c);
1285 if (ptr->index > ptr->stat){
1286 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
1287 &ptr->buf[0], &ptr->buf[1]);
1292 status_disable(ptr);
1303 int action_flag = 1;
1304 struct input_code *result = 0;
1305 struct input_code *p = input_code_list;
1307 (p->status_func)(p, c);
1310 }else if(p->stat == 0){
1322 set_iconv(TRUE, result->iconv_func);
1323 }else if (c <= DEL){
1324 struct input_code *ptr = input_code_list;
1362 while ((c = (*i_getc)(f)) != EOF)
1371 oconv = output_conv;
1374 /* replace continucation module, from output side */
1376 /* output redicrection */
1385 if (mimeout_f == TRUE) {
1386 o_base64conv = oconv; oconv = base64_conv;
1388 /* base64_count = 0; */
1392 o_crconv = oconv; oconv = cr_conv;
1395 o_rot_conv = oconv; oconv = rot_conv;
1398 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
1401 o_hira_conv = oconv; oconv = hira_conv;
1404 o_fconv = oconv; oconv = fold_conv;
1407 if (alpha_f || x0201_f) {
1408 o_zconv = oconv; oconv = z_conv;
1412 /* input redicrection */
1413 #ifdef CAP_URL_OPTION
1415 i_cgetc = i_getc; i_getc = cap_getc;
1416 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
1419 i_ugetc = i_getc; i_getc = url_getc;
1420 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
1423 if (mime_f && mimebuf_f==FIXED_MIME) {
1424 i_mgetc = i_getc; i_getc = mime_getc;
1425 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
1428 i_bgetc = i_getc; i_getc = broken_getc;
1429 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
1431 if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
1432 set_iconv(-TRUE, e_iconv);
1433 } else if (input_f == SJIS_INPUT) {
1434 set_iconv(-TRUE, s_iconv);
1435 #ifdef UTF8_INPUT_ENABLE
1436 } else if (input_f == UTF8_INPUT) {
1437 set_iconv(-TRUE, w_iconv);
1438 } else if (input_f == UTF16_INPUT) {
1439 set_iconv(-TRUE, w_iconv16);
1442 set_iconv(FALSE, e_iconv);
1446 struct input_code *p = input_code_list;
1454 Conversion main loop. Code detection only.
1464 module_connection();
1469 output_mode = ASCII;
1472 #define NEXT continue /* no output, get next */
1473 #define SEND ; /* output c1 and c2, get next */
1474 #define LAST break /* end of loop, go closing */
1476 while ((c1 = (*i_getc)(f)) != EOF) {
1481 /* in case of 8th bit is on */
1483 /* in case of not established yet */
1484 /* It is still ambiguious */
1485 if (h_conv(f, c2, c1)==EOF)
1491 /* in case of already established */
1493 /* ignore bogus code */
1499 /* second byte, 7 bit code */
1500 /* it might be kanji shitfted */
1501 if ((c1 == DEL) || (c1 <= SPACE)) {
1502 /* ignore bogus first code */
1510 #ifdef UTF8_INPUT_ENABLE
1519 } else if (c1 > DEL) {
1521 if (!estab_f && !iso8859_f) {
1522 /* not established yet */
1525 } else { /* estab_f==TRUE */
1530 } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
1531 /* SJIS X0201 Case... */
1532 if(iso2022jp_f && x0201_f==NO_X0201) {
1533 (*oconv)(GETA1, GETA2);
1540 } else if (c1==SSO && iconv != s_iconv) {
1541 /* EUC X0201 Case */
1542 c1 = (*i_getc)(f); /* skip SSO */
1544 if (SSP<=c1 && c1<0xe0) {
1545 if(iso2022jp_f && x0201_f==NO_X0201) {
1546 (*oconv)(GETA1, GETA2);
1553 } else { /* bogus code, skip SSO and one byte */
1557 /* already established */
1562 } else if ((c1 > SPACE) && (c1 != DEL)) {
1563 /* in case of Roman characters */
1565 /* output 1 shifted byte */
1569 } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
1570 /* output 1 shifted byte */
1571 if(iso2022jp_f && x0201_f==NO_X0201) {
1572 (*oconv)(GETA1, GETA2);
1579 /* look like bogus code */
1582 } else if (input_mode == X0208) {
1583 /* in case of Kanji shifted */
1586 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
1587 /* Check MIME code */
1588 if ((c1 = (*i_getc)(f)) == EOF) {
1591 } else if (c1 == '?') {
1592 /* =? is mime conversion start sequence */
1593 if(mime_f == STRICT_MIME) {
1594 /* check in real detail */
1595 if (mime_begin_strict(f) == EOF)
1599 } else if (mime_begin(f) == EOF)
1609 /* normal ASCII code */
1612 } else if (c1 == SI) {
1615 } else if (c1 == SO) {
1618 } else if (c1 == ESC ) {
1619 if ((c1 = (*i_getc)(f)) == EOF) {
1620 /* (*oconv)(0, ESC); don't send bogus code */
1622 } else if (c1 == '$') {
1623 if ((c1 = (*i_getc)(f)) == EOF) {
1625 (*oconv)(0, ESC); don't send bogus code
1626 (*oconv)(0, '$'); */
1628 } else if (c1 == '@'|| c1 == 'B') {
1629 /* This is kanji introduction */
1633 } else if (c1 == '(') {
1634 if ((c1 = (*i_getc)(f)) == EOF) {
1635 /* don't send bogus code
1641 } else if (c1 == '@'|| c1 == 'B') {
1642 /* This is kanji introduction */
1647 /* could be some special code */
1654 } else if (broken_f&0x2) {
1655 /* accept any ESC-(-x as broken code ... */
1665 } else if (c1 == '(') {
1666 if ((c1 = (*i_getc)(f)) == EOF) {
1667 /* don't send bogus code
1669 (*oconv)(0, '('); */
1673 /* This is X0201 kana introduction */
1674 input_mode = X0201; shift_mode = X0201;
1676 } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
1677 /* This is X0208 kanji introduction */
1678 input_mode = ASCII; shift_mode = FALSE;
1680 } else if (broken_f&0x2) {
1681 input_mode = ASCII; shift_mode = FALSE;
1686 /* maintain various input_mode here */
1690 } else if ( c1 == 'N' || c1 == 'n' ){
1692 c1 = (*i_getc)(f); /* skip SS2 */
1693 if ( SPACE<=c1 && c1 < 0xe0 ) {
1702 } else if ((c1 == NL || c1 == CR) && broken_f&4) {
1703 input_mode = ASCII; set_iconv(FALSE, 0);
1709 if (input_mode == X0208)
1710 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
1711 else if (input_mode)
1712 (*oconv)(input_mode, c1); /* other special case */
1713 else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */
1714 int c0 = (*i_getc)(f);
1717 (*iconv)(c2, c1, c0);
1723 /* goto next_word */
1727 (*iconv)(EOF, 0, 0);
1740 /** it must NOT be in the kanji shifte sequence */
1741 /** it must NOT be written in JIS7 */
1742 /** and it must be after 2 byte 8bit code */
1749 while ((c1 = (*i_getc)(f)) != EOF) {
1755 if (push_hold_buf(c1) == EOF || estab_f){
1761 struct input_code *p = input_code_list;
1762 struct input_code *result = p;
1764 if (p->score < result->score){
1769 set_iconv(FALSE, p->iconv_func);
1774 ** 1) EOF is detected, or
1775 ** 2) Code is established, or
1776 ** 3) Buffer is FULL (but last word is pushed)
1778 ** in 1) and 3) cases, we continue to use
1779 ** Kanji codes by oconv and leave estab_f unchanged.
1783 while (wc < hold_count){
1784 c2 = hold_buf[wc++];
1788 }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
1789 (*iconv)(X0201, c2, 0);
1792 if (wc < hold_count){
1793 c1 = hold_buf[wc++];
1796 if (c1 == EOF) break;
1799 if ((*iconv)(c2, c1, 0) < 0){
1801 if (wc < hold_count){
1802 c0 = hold_buf[wc++];
1805 if (c0 == EOF) break;
1808 (*iconv)(c2, c1, c0);
1822 if (hold_count >= HOLD_SIZE*2)
1824 hold_buf[hold_count++] = c2;
1825 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
1828 int s2e_conv(c2, c1, p2, p1)
1832 c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
1834 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
1841 return (c2 << 8) | c1;
1851 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
1854 s2e_conv(c2, c1, &c2, &c1);
1867 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
1877 #ifdef UTF8_INPUT_ENABLE
1879 w2e_conv(c2, c1, c0, p2, p1)
1883 extern unsigned short * utf8_to_euc_2bytes[];
1884 extern unsigned short ** utf8_to_euc_3bytes[];
1886 if (0xc0 <= c2 && c2 <= 0xef) {
1887 unsigned short **pp;
1890 if (c0 == 0) return -1;
1891 pp = utf8_to_euc_3bytes[c2 - 0x80];
1892 return w_iconv_common(c1, c0, pp, sizeof_utf8_to_euc_C2, p2, p1);
1894 return w_iconv_common(c2, c1, utf8_to_euc_2bytes, sizeof_utf8_to_euc_2bytes, p2, p1);
1896 } else if (c2 == X0201) {
1909 int ret = w2e_conv(c2, c1, c0, &c2, &c1);
1917 w_iconv16(c2, c1, c0)
1920 extern unsigned short * utf8_to_euc_2bytes[];
1921 extern unsigned short ** utf8_to_euc_3bytes[];
1922 unsigned short **pp;
1927 if (c2==0376 && c1==0377){
1928 utf16_mode = UTF16_INPUT;
1930 } else if (c2==0377 && c1==0376){
1931 utf16_mode = UTF16BE_INPUT;
1934 if (utf16_mode == UTF16BE_INPUT) {
1936 tmp=c1; c1=c2; c2=tmp;
1938 if (c2==0 || c2==EOF) {
1942 val = ((c2<<8)&0xff00) + c1;
1944 c0 = (0x80 | (c1 & 0x3f));
1945 c1 = (0xc0 | (val >> 6));
1946 pp = utf8_to_euc_2bytes;
1947 psize = sizeof_utf8_to_euc_2bytes;
1949 c0 = (0x80 | (c1 & 0x3f));
1950 c2 = (0xe0 | (val >> 12));
1951 c1 = (0x80 | ((val >> 6) & 0x3f));
1952 if (c0 == 0) return -1;
1953 if (0<=c2-0x80 && c2-0x80 <sizeof_utf8_to_euc_3bytes){
1954 pp = utf8_to_euc_3bytes[c2 - 0x80];
1955 psize = sizeof_utf8_to_euc_C2;
1960 ret = w_iconv_common(c1, c0, pp, psize, &c2, &c1);
1961 if (ret) return ret;
1967 w_iconv_common(c1, c0, pp, psize, p2, p1)
1969 unsigned short **pp;
1977 if (pp == 0) return 1;
1980 if (c1 < 0 || psize <= c1) return 1;
1982 if (p == 0) return 1;
1985 if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1;
1987 if (val == 0) return 1;
1990 if (c2 == SO) c2 = X0201;
1999 #ifdef UTF8_OUTPUT_ENABLE
2004 extern unsigned short euc_to_utf8_1byte[];
2005 extern unsigned short * euc_to_utf8_2bytes[];
2009 p = euc_to_utf8_1byte;
2012 c2 = (c2&0x7f) - 0x21;
2013 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2014 p = euc_to_utf8_2bytes[c2];
2019 c1 = (c1 & 0x7f) - 0x21;
2020 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
2033 } else if (c2 == 0) {
2034 output_mode = ASCII;
2036 } else if (c2 == ISO8859_1) {
2037 output_mode = ISO8859_1;
2038 (*o_putc)(c1 | 0x080);
2040 unsigned short val = (unsigned short)e2w_conv(c2, c1);
2043 if (0 < val && val < 0x80){
2045 }else if (val < 0x800){
2046 (*o_putc)(0xc0 | (val >> 6));
2047 (*o_putc)(0x80 | (val & 0x3f));
2049 (*o_putc)(0xe0 | (val >> 12));
2050 (*o_putc)(0x80 | ((val >> 6) & 0x3f));
2051 (*o_putc)(0x80 | (val & 0x3f));
2062 if (w_oconv16_begin_f==2) {
2065 w_oconv16_begin_f=1;
2070 } else if (c2 == 0) {
2073 } else if (c2 == ISO8859_1) {
2075 (*o_putc)(c1 | 0x080);
2077 unsigned short val = (unsigned short)e2w_conv(c2, c1);
2078 (*o_putc)((val&0xff00)>>8);
2079 (*o_putc)(val&0xff);
2093 } else if (c2 == 0) {
2094 output_mode = ASCII;
2096 } else if (c2 == X0201) {
2097 output_mode = JAPANESE_EUC;
2098 (*o_putc)(SSO); (*o_putc)(c1|0x80);
2099 } else if (c2 == ISO8859_1) {
2100 output_mode = ISO8859_1;
2101 (*o_putc)(c1 | 0x080);
2103 if ((c1<0x20 || 0x7e<c1) ||
2104 (c2<0x20 || 0x7e<c2)) {
2105 set_iconv(FALSE, 0);
2106 return; /* too late to rescue this char */
2108 output_mode = JAPANESE_EUC;
2109 (*o_putc)(c2 | 0x080);
2110 (*o_putc)(c1 | 0x080);
2124 } else if (c2 == 0) {
2125 output_mode = ASCII;
2127 } else if (c2 == X0201) {
2128 output_mode = SHIFT_JIS;
2130 } else if (c2 == ISO8859_1) {
2131 output_mode = ISO8859_1;
2132 (*o_putc)(c1 | 0x080);
2134 if ((c1<0x20 || 0x7e<c1) ||
2135 (c2<0x20 || 0x7e<c2)) {
2136 set_iconv(FALSE, 0);
2137 return; /* too late to rescue this char */
2139 output_mode = SHIFT_JIS;
2140 (*o_putc)((((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1)));
2141 (*o_putc)((c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e)));
2151 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2154 (*o_putc)(ascii_intro);
2155 output_mode = ASCII;
2158 } else if (c2==X0201) {
2159 if (output_mode!=X0201) {
2160 output_mode = X0201;
2166 } else if (c2==ISO8859_1) {
2167 /* iso8859 introduction, or 8th bit on */
2168 /* Can we convert in 7bit form using ESC-'-'-A ?
2170 output_mode = ISO8859_1;
2172 } else if (c2 == 0) {
2173 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2176 (*o_putc)(ascii_intro);
2177 output_mode = ASCII;
2181 if (output_mode != X0208) {
2182 output_mode = X0208;
2185 (*o_putc)(kanji_intro);
2187 if (c1<0x20 || 0x7e<c1)
2189 if (c2<0x20 || 0x7e<c2)
2201 if (base64_count>50 && !mimeout_mode && c2==0 && c1==SPACE) {
2203 } else if (base64_count>66 && mimeout_mode) {
2204 (*o_base64conv)(EOF,0);
2206 (*o_putc)('\t'); base64_count += 7;
2208 (*o_base64conv)(c2,c1);
2212 static int broken_buf[3];
2213 static int broken_counter = 0;
2214 static int broken_last = 0;
2221 if (broken_counter>0) {
2222 return broken_buf[--broken_counter];
2225 if (c=='$' && broken_last != ESC
2226 && (input_mode==ASCII || input_mode==X0201)) {
2229 if (c1=='@'|| c1=='B') {
2230 broken_buf[0]=c1; broken_buf[1]=c;
2237 } else if (c=='(' && broken_last != ESC
2238 && (input_mode==X0208 || input_mode==X0201)) { /* ) */
2241 if (c1=='J'|| c1=='B') {
2242 broken_buf[0]=c1; broken_buf[1]=c;
2260 if (broken_counter<2)
2261 broken_buf[broken_counter++]=c;
2265 static int prev_cr = 0;
2273 if (! (c2==0&&c1==NL) ) {
2279 } else if (c1=='\r') {
2281 } else if (c1=='\n') {
2282 if (crmode_f==CRLF) {
2283 (*o_crconv)(0,'\r');
2284 } else if (crmode_f==CR) {
2285 (*o_crconv)(0,'\r');
2289 } else if (c1!='\032' || crmode_f!=NL){
2295 Return value of fold_conv()
2297 \n add newline and output char
2298 \r add newline and output nothing
2301 1 (or else) normal output
2303 fold state in prev (previous character)
2305 >0x80 Japanese (X0208/X0201)
2310 This fold algorthm does not preserve heading space in a line.
2311 This is the main difference from fmt.
2314 #define char_size(c2,c1) (c2?2:1)
2323 if (c1== '\r' && !fold_preserve_f) {
2324 fold_state=0; /* ignore cr */
2325 }else if (c1== '\n'&&f_prev=='\r' && fold_preserve_f) {
2327 fold_state=0; /* ignore cr */
2328 } else if (c1== BS) {
2329 if (f_line>0) f_line--;
2331 } else if (c2==EOF && f_line != 0) { /* close open last line */
2333 } else if ((c1=='\n' && !fold_preserve_f)
2334 || ((c1=='\r'||(c1=='\n'&&f_prev!='\r'))
2335 && fold_preserve_f)) {
2337 if (fold_preserve_f) {
2341 } else if ((f_prev == c1 && !fold_preserve_f)
2342 || (f_prev == '\n' && fold_preserve_f)
2343 ) { /* duplicate newline */
2346 fold_state = '\n'; /* output two newline */
2352 if (f_prev&0x80) { /* Japanese? */
2354 fold_state = 0; /* ignore given single newline */
2355 } else if (f_prev==' ') {
2359 if (++f_line<=fold_len)
2363 fold_state = '\r'; /* fold and output nothing */
2367 } else if (c1=='\f') {
2372 fold_state = '\n'; /* output newline and clear */
2373 } else if ( (c2==0 && c1==' ')||
2374 (c2==0 && c1=='\t')||
2375 (c2=='!'&& c1=='!')) {
2376 /* X0208 kankaku or ascii space */
2377 if (f_prev == ' ') {
2378 fold_state = 0; /* remove duplicate spaces */
2381 if (++f_line<=fold_len)
2382 fold_state = ' '; /* output ASCII space only */
2384 f_prev = ' '; f_line = 0;
2385 fold_state = '\r'; /* fold and output nothing */
2389 prev0 = f_prev; /* we still need this one... , but almost done */
2391 if (c2 || c2==X0201)
2392 f_prev |= 0x80; /* this is Japanese */
2393 f_line += char_size(c2,c1);
2394 if (f_line<=fold_len) { /* normal case */
2397 if (f_line>=fold_len+fold_margin) { /* too many kinsou suspension */
2398 f_line = char_size(c2,c1);
2399 fold_state = '\n'; /* We can't wait, do fold now */
2400 } else if (c2==X0201) {
2401 /* simple kinsoku rules return 1 means no folding */
2402 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
2403 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
2404 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
2405 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
2406 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
2407 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
2408 else if (SPACE<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
2410 fold_state = '\n';/* add one new f_line before this character */
2413 fold_state = '\n';/* add one new f_line before this character */
2416 /* kinsoku point in ASCII */
2417 if ( c1==')'|| /* { [ ( */
2428 /* just after special */
2429 } else if (!is_alnum(prev0)) {
2430 f_line = char_size(c2,c1);
2432 } else if ((prev0==' ') || /* ignored new f_line */
2433 (prev0=='\n')|| /* ignored new f_line */
2434 (prev0&0x80)) { /* X0208 - ASCII */
2435 f_line = char_size(c2,c1);
2436 fold_state = '\n';/* add one new f_line before this character */
2438 fold_state = 1; /* default no fold in ASCII */
2442 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
2443 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
2444 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
2445 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
2446 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
2447 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
2448 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
2449 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
2450 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
2451 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
2452 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
2453 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
2454 /* default no fold in kinsoku */
2457 f_line = char_size(c2,c1);
2458 /* add one new f_line before this character */
2461 f_line = char_size(c2,c1);
2463 /* add one new f_line before this character */
2468 /* terminator process */
2469 switch(fold_state) {
2488 int z_prev2=0,z_prev1=0;
2495 /* if (c2) c1 &= 0x7f; assertion */
2497 if (x0201_f && z_prev2==X0201) { /* X0201 */
2498 if (c1==(0xde&0x7f)) { /*
\e$BByE@
\e(B */
2500 (*o_zconv)(dv[(z_prev1-SPACE)*2],dv[(z_prev1-SPACE)*2+1]);
2502 } else if (c1==(0xdf&0x7f)&&ev[(z_prev1-SPACE)*2]) { /*
\e$BH>ByE@
\e(B */
2504 (*o_zconv)(ev[(z_prev1-SPACE)*2],ev[(z_prev1-SPACE)*2+1]);
2508 (*o_zconv)(cv[(z_prev1-SPACE)*2],cv[(z_prev1-SPACE)*2+1]);
2517 if (x0201_f && c2==X0201) {
2518 if (dv[(c1-SPACE)*2]||ev[(c1-SPACE)*2]) {
2519 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
2520 z_prev1 = c1; z_prev2 = c2;
2523 (*o_zconv)(cv[(c1-SPACE)*2],cv[(c1-SPACE)*2+1]);
2528 /* JISX0208 Alphabet */
2529 if (alpha_f && c2 == 0x23 ) {
2531 } else if (alpha_f && c2 == 0x21 ) {
2532 /* JISX0208 Kigou */
2537 } else if (alpha_f&0x4) {
2542 } else if (0x20<c1 && c1<0x7f && fv[c1-0x20]) {
2548 case '>': entity = ">"; break;
2549 case '<': entity = "<"; break;
2550 case '\"': entity = """; break;
2551 case '&': entity = "&"; break;
2554 while (*entity) (*o_zconv)(0, *entity++);
2564 #define rot13(c) ( \
2566 (c <= 'M') ? (c + 13): \
2567 (c <= 'Z') ? (c - 13): \
2569 (c <= 'm') ? (c + 13): \
2570 (c <= 'z') ? (c - 13): \
2574 #define rot47(c) ( \
2576 ( c <= 'O' ) ? (c + 47) : \
2577 ( c <= '~' ) ? (c - 47) : \
2585 if (c2==0 || c2==X0201 || c2==ISO8859_1) {
2591 (*o_rot_conv)(c2,c1);
2598 if ((hira_f & 1) && c2==0x25 && 0x20<c1 && c1<0x74) {
2600 } else if ((hira_f & 2) && c2==0x24 && 0x20<c1 && c1<0x74) {
2603 (*o_hira_conv)(c2,c1);
2608 iso2022jp_check_conv(c2,c1)
2611 static int range[RANGE_NUM_MAX][2] = {
2634 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
2638 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
2643 for (i = 0; i < RANGE_NUM_MAX; i++) {
2644 start = range[i][0];
2647 if (c >= start && c <= end) {
2652 (*o_iso2022jp_check_conv)(c2,c1);
2656 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
2658 unsigned char *mime_pattern[] = {
2659 (unsigned char *)"\075?EUC-JP?B?",
2660 (unsigned char *)"\075?SHIFT_JIS?B?",
2661 (unsigned char *)"\075?ISO-8859-1?Q?",
2662 (unsigned char *)"\075?ISO-2022-JP?B?",
2663 (unsigned char *)"\075?ISO-2022-JP?Q?",
2664 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
2665 (unsigned char *)"\075?UTF-8?B?",
2670 int mime_encode[] = {
2671 JAPANESE_EUC, SHIFT_JIS,ISO8859_1, X0208, X0201,
2672 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
2678 int mime_encode_method[] = {
2679 'B', 'B','Q', 'B', 'Q',
2680 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
2687 #define MAXRECOVER 20
2689 /* I don't trust portablity of toupper */
2690 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
2691 #define nkf_isdigit(c) ('0'<=c && c<='9')
2692 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
2697 if (i_getc!=mime_getc) {
2698 i_mgetc = i_getc; i_getc = mime_getc;
2699 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2700 if(mime_f==STRICT_MIME) {
2701 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
2702 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
2708 unswitch_mime_getc()
2710 if(mime_f==STRICT_MIME) {
2711 i_mgetc = i_mgetc_buf;
2712 i_mungetc = i_mungetc_buf;
2715 i_ungetc = i_mungetc;
2719 mime_begin_strict(f)
2724 unsigned char *p,*q;
2725 int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
2727 mime_decode_mode = FALSE;
2728 /* =? has been checked */
2730 p = mime_pattern[j];
2733 for(i=2;p[i]>' ';i++) { /* start at =? */
2734 if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) {
2735 /* pattern fails, try next one */
2737 while ((p = mime_pattern[++j])) {
2738 for(k=2;k<i;k++) /* assume length(p) > i */
2739 if (p[k]!=q[k]) break;
2740 if (k==i && nkf_toupper(c1)==p[k]) break;
2742 if (p) continue; /* found next one, continue */
2743 /* all fails, output from recovery buffer */
2751 mime_decode_mode = p[i-2];
2752 if (mime_decode_mode=='B') {
2753 mimebuf_f = unbuf_f;
2755 /* do MIME integrity check */
2756 return mime_integrity(f,mime_pattern[j]);
2768 /* we don't keep eof of Fifo, becase it contains ?= as
2769 a terminator. It was checked in mime_integrity. */
2770 return ((mimebuf_f)?
2771 (*i_mgetc_buf)(f):Fifo(mime_input++));
2775 mime_ungetc_buf(c,f)
2780 (*i_mungetc_buf)(c,f);
2782 Fifo(--mime_input)=c;
2793 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
2794 /* re-read and convert again from mime_buffer. */
2796 /* =? has been checked */
2798 Fifo(mime_last++)='='; Fifo(mime_last++)='?';
2799 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
2800 /* We accept any character type even if it is breaked by new lines */
2801 c1 = (*i_getc)(f); Fifo(mime_last++)= c1 ;
2802 if (c1=='\n'||c1==' '||c1=='\r'||
2803 c1=='-'||c1=='_'||is_alnum(c1) ) continue;
2805 /* Failed. But this could be another MIME preemble */
2813 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
2814 if (!(++i<MAXRECOVER) || c1==EOF) break;
2815 if (c1=='b'||c1=='B') {
2816 mime_decode_mode = 'B';
2817 } else if (c1=='q'||c1=='Q') {
2818 mime_decode_mode = 'Q';
2822 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
2823 if (!(++i<MAXRECOVER) || c1==EOF) break;
2825 mime_decode_mode = FALSE;
2831 if (!mime_decode_mode) {
2832 /* false MIME premble, restart from mime_buffer */
2833 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
2834 /* Since we are in MIME mode until buffer becomes empty, */
2835 /* we never go into mime_begin again for a while. */
2838 /* discard mime preemble, and goto MIME mode */
2840 /* do no MIME integrity check */
2841 return c1; /* used only for checking EOF */
2861 #ifdef CAP_URL_OPTION
2866 if (nkf_isdigit(x)) return x - '0';
2867 return nkf_toupper(x) - 'A' + 10;
2870 #ifdef ANSI_C_PROTOTYPE
2871 int hex_getc(int ch, FILE *f, int (*g)(FILE *f), int (*u)(int c, FILE *f))
2874 hex_getc(ch, f, g, u)
2887 if (!nkf_isxdigit(c2) == EOF){
2892 if (!nkf_isxdigit(c3) == EOF){
2897 return (hex2bin(c2) << 4) | hex2bin(c3);
2904 return hex_getc(':', f, i_cgetc, i_cungetc);
2912 return (*i_cungetc)(c, f);
2919 return hex_getc('%', f, i_ugetc, i_uungetc);
2927 return (*i_uungetc)(c, f);
2936 int c1, c2, c3, c4, cc;
2937 int t1, t2, t3, t4, mode, exit_mode;
2939 if (mime_top != mime_last) { /* Something is in FIFO */
2940 return Fifo(mime_top++);
2942 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
2943 mime_decode_mode=FALSE;
2944 unswitch_mime_getc();
2945 return (*i_getc)(f);
2948 if (mimebuf_f == FIXED_MIME)
2949 exit_mode = mime_decode_mode;
2952 if (mime_decode_mode == 'Q') {
2953 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
2955 if (c1=='_') return ' ';
2956 if (c1!='=' && c1!='?') {
2960 mime_decode_mode = exit_mode; /* prepare for quit */
2961 if (c1<=' ') return c1;
2962 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
2963 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
2964 /* end Q encoding */
2965 input_mode = exit_mode;
2966 while((c1=(*i_getc)(f))!=EOF && c1==SPACE
2967 /* && (c1==NL||c1==TAB||c1=='\r') */ ) ;
2970 if (c1=='='&&c2<' ') { /* this is soft wrap */
2971 while((c1 = (*i_mgetc)(f)) <=' ') {
2972 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
2974 mime_decode_mode = 'Q'; /* still in MIME */
2975 goto restart_mime_q;
2978 mime_decode_mode = 'Q'; /* still in MIME */
2982 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
2983 if (c2<=' ') return c2;
2984 mime_decode_mode = 'Q'; /* still in MIME */
2985 #define hex(c) (('0'<=c&&c<='9')?(c-'0'):\
2986 ('A'<=c&&c<='F')?(c-'A'+10):('a'<=c&&c<='f')?(c-'a'+10):0)
2987 return ((hex(c2)<<4) + hex(c3));
2990 if (mime_decode_mode != 'B') {
2991 mime_decode_mode = FALSE;
2992 return (*i_mgetc)(f);
2996 /* Base64 encoding */
2998 MIME allows line break in the middle of
2999 Base64, but we are very pessimistic in decoding
3000 in unbuf mode because MIME encoded code may broken by
3001 less or editor's control sequence (such as ESC-[-K in unbuffered
3002 mode. ignore incomplete MIME.
3004 mode = mime_decode_mode;
3005 mime_decode_mode = exit_mode; /* prepare for quit */
3007 while ((c1 = (*i_mgetc)(f))<=' ') {
3012 if ((c2 = (*i_mgetc)(f))<=' ') {
3015 if (mime_f != STRICT_MIME) goto mime_c2_retry;
3016 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3019 if ((c1 == '?') && (c2 == '=')) {
3021 while((c1=(*i_getc)(f))!=EOF && c1==SPACE
3022 /* && (c1==NL||c1==TAB||c1=='\r') */ ) ;
3026 if ((c3 = (*i_mgetc)(f))<=' ') {
3029 if (mime_f != STRICT_MIME) goto mime_c3_retry;
3030 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3034 if ((c4 = (*i_mgetc)(f))<=' ') {
3037 if (mime_f != STRICT_MIME) goto mime_c4_retry;
3038 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3042 mime_decode_mode = mode; /* still in MIME sigh... */
3044 /* BASE 64 decoding */
3046 t1 = 0x3f & base64decode(c1);
3047 t2 = 0x3f & base64decode(c2);
3048 t3 = 0x3f & base64decode(c3);
3049 t4 = 0x3f & base64decode(c4);
3050 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
3052 Fifo(mime_last++) = cc;
3053 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
3055 Fifo(mime_last++) = cc;
3056 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
3058 Fifo(mime_last++) = cc;
3063 return Fifo(mime_top++);
3071 Fifo(--mime_top) = c;
3082 /* In buffered mode, read until =? or NL or buffer full
3084 mime_input = mime_top;
3085 mime_last = mime_top;
3086 while(*p) Fifo(mime_input++) = *p++;
3089 while((c=(*i_getc)(f))!=EOF) {
3090 if (((mime_input-mime_top)&MIME_BUF_MASK)==0) {
3091 break; /* buffer full */
3093 if (c=='=' && d=='?') {
3094 /* checked. skip header, start decode */
3095 Fifo(mime_input++) = c;
3096 /* mime_last_input = mime_input; */
3101 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
3103 /* Should we check length mod 4? */
3104 Fifo(mime_input++) = c;
3107 /* In case of Incomplete MIME, no MIME decode */
3108 Fifo(mime_input++) = c;
3109 mime_last = mime_input; /* point undecoded buffer */
3110 mime_decode_mode = 1; /* no decode on Fifo last in mime_getc */
3111 switch_mime_getc(); /* anyway we need buffered getc */
3122 i = c - 'A'; /* A..Z 0-25 */
3124 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
3126 } else if (c > '/') {
3127 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
3128 } else if (c == '+') {
3129 i = '>' /* 62 */ ; /* + 62 */
3131 i = '?' /* 63 */ ; /* / 63 */
3136 static char basis_64[] =
3137 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
3147 p = mime_pattern[0];
3148 for(i=0;mime_encode[i];i++) {
3149 if (mode == mime_encode[i]) {
3150 p = mime_pattern[i];
3154 mimeout_mode = mime_encode_method[i];
3156 /* (*o_mputc)(' '); */
3173 #define itoh4(c) (c>=10?c+'A'-10:c+'0')
3179 if (mimeout_f==FIXED_MIME) {
3180 if (base64_count>71) {
3188 if ( c<=DEL &&(output_mode==ASCII ||output_mode == ISO8859_1 )
3189 && mimeout_f!=FIXED_MIME) {
3190 if (mimeout_mode=='Q') {
3197 if (mimeout_mode!='B' || c!=SPACE) {
3206 } else if (!mimeout_mode && mimeout_f!=FIXED_MIME) {
3207 open_mime(output_mode);
3209 } else { /* c==EOF */
3210 switch(mimeout_mode) {
3215 (*o_mputc)(basis_64[((b64c & 0x3)<< 4)]);
3221 (*o_mputc)(basis_64[((b64c & 0xF) << 2)]);
3227 if (mimeout_f!=FIXED_MIME) {
3229 } else if (mimeout_mode != 'Q')
3234 switch(mimeout_mode) {
3238 (*o_mputc)(itoh4(((c>>4)&0xf)));
3239 (*o_mputc)(itoh4((c&0xf)));
3246 (*o_mputc)(basis_64[c>>2]);
3251 (*o_mputc)(basis_64[((b64c & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
3257 (*o_mputc)(basis_64[((b64c & 0xF) << 2) | ((c & 0xC0) >>6)]);
3258 (*o_mputc)(basis_64[c & 0x3F]);
3278 mime_f = STRICT_MIME;
3282 #if defined(MSDOS) || defined(__OS2__)
3287 iso2022jp_f = FALSE;
3289 kanji_intro = DEFAULT_J;
3290 ascii_intro = DEFAULT_R;
3292 output_conv = DEFAULT_CONV;
3293 oconv = DEFAULT_CONV;
3296 i_mungetc = std_ungetc;
3297 i_mgetc_buf = std_getc;
3298 i_mungetc_buf = std_ungetc;
3301 i_ungetc=std_ungetc;
3304 i_bungetc= std_ungetc;
3308 o_crconv = no_connection;
3309 o_rot_conv = no_connection;
3310 o_iso2022jp_check_conv = no_connection;
3311 o_hira_conv = no_connection;
3312 o_fconv = no_connection;
3313 o_zconv = no_connection;
3316 i_ungetc = std_ungetc;
3318 i_mungetc = std_ungetc;
3320 output_mode = ASCII;
3323 mime_decode_mode = FALSE;
3332 struct input_code *p = input_code_list;
3337 #ifdef UTF8_OUTPUT_ENABLE
3338 if (w_oconv16_begin_f) {
3339 w_oconv16_begin_f = 2;
3344 fold_preserve_f = FALSE;
3347 fold_margin = FOLD_MARGIN;
3350 z_prev2=0,z_prev1=0;
3356 no_connection(c2,c1)
3359 no_connection2(c2,c1,0);
3363 no_connection2(c2,c1,c0)
3366 fprintf(stderr,"nkf internal module connection failure.\n");
3374 fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
3375 fprintf(stderr,"Flags:\n");
3376 fprintf(stderr,"b,u Output is bufferred (DEFAULT),Output is unbufferred\n");
3377 #ifdef DEFAULT_CODE_SJIS
3378 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC), UTF-8\n");
3380 #ifdef DEFAULT_CODE_JIS
3381 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC), UTF-8\n");
3383 #ifdef DEFAULT_CODE_EUC
3384 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT), UTF-8\n");
3386 #ifdef DEFAULT_CODE_UTF8
3387 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC), UTF-8 (DEFAULT)\n");
3389 fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC), UTF-8\n");
3390 fprintf(stderr,"t no conversion\n");
3391 fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n");
3392 fprintf(stderr,"r {de/en}crypt ROT13/47\n");
3393 fprintf(stderr,"h 1 hirakana->katakana, 2 katakana->hirakana,3 both\n");
3394 fprintf(stderr,"v Show this usage. V: show version\n");
3395 fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
3396 fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
3397 fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
3398 fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
3399 fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces,\n");
3400 fprintf(stderr," 3: Convert HTML Entity\n");
3401 fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
3402 fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
3404 fprintf(stderr,"T Text mode output\n");
3406 fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
3407 fprintf(stderr,"d,c Delete \\r in line feed and \\032, Add \\r in line feed\n");
3408 fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n");
3409 fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
3410 fprintf(stderr,"long name options\n");
3411 fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n");
3412 fprintf(stderr," --jis,--euc,--sjis,--utf8,--utf16,--mime,--base64 convert for the code\n");
3413 fprintf(stderr," --help,--version\n");
3420 fprintf(stderr,"Network Kanji Filter Version %s (%s) "
3421 #if defined(MSDOS) && !defined(_Windows)
3424 #if !defined(__WIN32__) && defined(_Windows)
3427 #if defined(__WIN32__) && defined(_Windows)
3433 ,Version,Patchlevel);
3434 fprintf(stderr,"\n%s\n",CopyRight);
3439 **
\e$B%Q%C%A@):n<T
\e(B
3440 ** void@merope.pleiades.or.jp (Kusakabe Youichi)
3441 ** NIDE Naoyuki <nide@ics.nara-wu.ac.jp>
3442 ** ohta@src.ricoh.co.jp (Junn Ohta)
3443 ** inouet@strl.nhk.or.jp (Tomoyuki Inoue)
3444 ** kiri@pulser.win.or.jp (Tetsuaki Kiriyama)
3445 ** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp>
3446 ** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe)
3447 ** kono@ie.u-ryukyu.ac.jp (Shinji Kono)
3448 ** GHG00637@nifty-serve.or.jp (COW)
3450 **
\e$B:G=*99?7F|
\e(B