1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
5 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
6 ** Copyright (C) 1996,1998
8 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
9 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
10 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
11 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
13 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
14 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
15 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
16 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
17 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
18 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
20 ** Everyone is permitted to do anything on this program
21 ** including copying, modifying, improving,
22 ** as long as you don't try to pretend that you wrote it.
23 ** i.e., the above copyright notice has to appear in all copies.
24 ** Binary distribution requires original version messages.
25 ** You don't have to ask before copying, redistribution or publishing.
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
29 /***********************************************************************
30 ** UTF-8
\e$B%5%]!<%H$K$D$$$F
\e(B
31 **
\e$B=>Mh$N
\e(B nkf
\e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9
\e(B
32 ** nkf -e
\e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G
\e(B UTF-8
\e$B$HH=Dj$5$l$l$P!"
\e(B
33 **
\e$B$=$N$^$^
\e(B euc-jp
\e$B$KJQ49$5$l$^$9
\e(B
35 **
\e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#
\e(B
36 ** (
\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O
\e(B)
38 **
\e$B2?$+LdBj$r8+$D$1$?$i!"
\e(B
39 ** E-Mail: furukawa@tcp-ip.or.jp
40 **
\e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#
\e(B
41 ***********************************************************************/
44 static char *CopyRight =
45 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2003 Kono, Furukawa";
46 static char *Version =
48 static char *Patchlevel =
55 ** USAGE: nkf [flags] [file]
58 ** b Output is bufferred (DEFAULT)
59 ** u Output is unbufferred
63 ** j Outout code is JIS 7 bit (DEFAULT SELECT)
64 ** s Output code is MS Kanji (DEFAULT SELECT)
65 ** e Output code is AT&T JIS (DEFAULT SELECT)
66 ** w Output code is AT&T JIS (DEFAULT SELECT)
67 ** l Output code is JIS 7bit and ISO8859-1 Latin-1
69 ** m MIME conversion for ISO-2022-JP
70 ** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
71 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
72 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
73 ** M MIME output conversion
75 ** r {de/en}crypt ROT13/47
79 ** T Text mode output (for MS-DOS)
81 ** x Do not convert X0201 kana into X0208
82 ** Z Convert X0208 alphabet to ASCII
87 ** B try to fix broken JIS, missing Escape
88 ** B[1-9] broken level
90 ** O Output to 'nkf.out' file or last file name
91 ** d Delete \r in line feed
92 ** c Add \r in line feed
93 ** -- other long option
94 ** -- ignore following option (don't use with -O )
98 #if (defined(__TURBOC__) || defined(LSI_C)) && !defined(MSDOS)
113 #if defined(MSDOS) || defined(__OS2__)
120 #define setbinmode(fp) fsetbin(fp)
121 #else /* Microsoft C, Turbo C */
122 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
124 #else /* UNIX,OS/2 */
125 #define setbinmode(fp)
128 #ifdef _IOFBF /* SysV and MSDOS */
129 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
131 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
134 /*Borland C++ 4.5 EasyWin*/
135 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
141 /* added by satoru@isoternet.org */
142 #include <sys/stat.h>
152 /* state of output_mode and input_mode
169 /* Input Assumption */
173 #define LATIN1_INPUT 6
175 #define STRICT_MIME 8
180 #define JAPANESE_EUC 10
184 #define UTF8_INPUT 13
185 #define UTF16_INPUT 14
186 #define UTF16BE_INPUT 15
204 #define is_alnum(c) \
205 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
207 #define HOLD_SIZE 1024
208 #define IOBUF_SIZE 16384
210 #define DEFAULT_J 'B'
211 #define DEFAULT_R 'B'
213 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
214 #define SJ6394 0x0161 /* 63 - 94 ku offset */
216 #define RANGE_NUM_MAX 18
221 #if defined( UTF8_OUTPUT_ENABLE ) || defined( UTF8_INPUT_ENABLE )
222 #define sizeof_euc_utf8 94
223 #define sizeof_euc_to_utf8_1byte 94
224 #define sizeof_euc_to_utf8_2bytes 94
225 #define sizeof_utf8_to_euc_C2 64
226 #define sizeof_utf8_to_euc_E5B8 64
227 #define sizeof_utf8_to_euc_2bytes 112
228 #define sizeof_utf8_to_euc_3bytes 112
231 /* MIME preprocessor */
234 #ifdef EASYWIN /*Easy Win */
235 extern POINT _BufferSize;
238 /* function prototype */
240 #ifdef ANSI_C_PROTOTYPE
242 #define STATIC static
254 void (*status_func)PROTO((struct input_code *, int));
255 int (*iconv_func)PROTO((int c2, int c1, int c0));
258 STATIC int noconvert PROTO((FILE *f));
259 STATIC int kanji_convert PROTO((FILE *f));
260 STATIC int h_conv PROTO((FILE *f,int c2,int c1));
261 STATIC int push_hold_buf PROTO((int c2));
262 STATIC void set_iconv PROTO((int f, int (*iconv_func)()));
263 STATIC int s_iconv PROTO((int c2,int c1,int c0));
264 STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
265 STATIC int e_iconv PROTO((int c2,int c1,int c0));
266 #ifdef UTF8_INPUT_ENABLE
267 STATIC int w2e_conv PROTO((int c2,int c1,int c0,int *p2,int *p1));
268 STATIC int w_iconv PROTO((int c2,int c1,int c0));
269 STATIC int w_iconv16 PROTO((int c2,int c1,int c0));
270 STATIC int w_iconv_common PROTO((int c1,int c0,unsigned short **pp,int psize,int *p2,int *p1));
272 #ifdef UTF8_OUTPUT_ENABLE
273 STATIC int e2w_conv PROTO((int c2,int c1));
274 STATIC void w_oconv PROTO((int c2,int c1));
275 STATIC void w_oconv16 PROTO((int c2,int c1));
277 STATIC void e_oconv PROTO((int c2,int c1));
278 STATIC void s_oconv PROTO((int c2,int c1));
279 STATIC void j_oconv PROTO((int c2,int c1));
280 STATIC void fold_conv PROTO((int c2,int c1));
281 STATIC void cr_conv PROTO((int c2,int c1));
282 STATIC void z_conv PROTO((int c2,int c1));
283 STATIC void rot_conv PROTO((int c2,int c1));
284 STATIC void hira_conv PROTO((int c2,int c1));
285 STATIC void base64_conv PROTO((int c2,int c1));
286 STATIC void iso2022jp_check_conv PROTO((int c2,int c1));
287 STATIC void no_connection PROTO((int c2,int c1));
288 STATIC int no_connection2 PROTO((int c2,int c1,int c0));
290 STATIC void code_score PROTO((struct input_code *ptr));
291 STATIC void code_status PROTO((int c));
293 STATIC void std_putc PROTO((int c));
294 STATIC int std_getc PROTO((FILE *f));
295 STATIC int std_ungetc PROTO((int c,FILE *f));
297 STATIC int broken_getc PROTO((FILE *f));
298 STATIC int broken_ungetc PROTO((int c,FILE *f));
300 STATIC int mime_begin PROTO((FILE *f));
301 STATIC int mime_getc PROTO((FILE *f));
302 STATIC int mime_ungetc PROTO((int c,FILE *f));
304 STATIC int mime_begin_strict PROTO((FILE *f));
305 STATIC int mime_getc_buf PROTO((FILE *f));
306 STATIC int mime_ungetc_buf PROTO((int c,FILE *f));
307 STATIC int mime_integrity PROTO((FILE *f,unsigned char *p));
309 STATIC int base64decode PROTO((int c));
310 STATIC void mime_putc PROTO((int c));
311 STATIC void open_mime PROTO((int c));
312 STATIC void close_mime PROTO(());
313 STATIC void usage PROTO(());
314 STATIC void version PROTO(());
315 STATIC void options PROTO((unsigned char *c));
317 STATIC void reinit PROTO(());
322 static unsigned char stdibuf[IOBUF_SIZE];
323 static unsigned char stdobuf[IOBUF_SIZE];
324 static unsigned char hold_buf[HOLD_SIZE*2];
325 static int hold_count;
327 /* MIME preprocessor fifo */
329 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
330 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
331 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
332 static unsigned char mime_buf[MIME_BUF_SIZE];
333 static unsigned int mime_top = 0;
334 static unsigned int mime_last = 0; /* decoded */
335 static unsigned int mime_input = 0; /* undecoded */
338 static int unbuf_f = FALSE;
339 static int estab_f = FALSE;
340 static int nop_f = FALSE;
341 static int binmode_f = TRUE; /* binary mode */
342 static int rot_f = FALSE; /* rot14/43 mode */
343 static int hira_f = FALSE; /* hira/kata henkan */
344 static int input_f = FALSE; /* non fixed input code */
345 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
346 static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
347 static int mimebuf_f = FALSE; /* MIME buffered input */
348 static int broken_f = FALSE; /* convert ESC-less broken JIS */
349 static int iso8859_f = FALSE; /* ISO8859 through */
350 static int mimeout_f = FALSE; /* base64 mode */
351 #if defined(MSDOS) || defined(__OS2__)
352 static int x0201_f = TRUE; /* Assume JISX0201 kana */
354 static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
356 static int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
357 #ifdef UTF8_OUTPUT_ENABLE
358 static int w_oconv16_begin_f= 0; /* utf-16 header */
362 #ifdef CAP_URL_OPTION
363 static int cap_f = FALSE;
364 static int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */
365 static int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc;
366 STATIC int cap_getc PROTO((FILE *f));
367 STATIC int cap_ungetc PROTO((int c,FILE *f));
369 static int url_f = FALSE;
370 static int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
371 static int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc;
372 STATIC int url_getc PROTO((FILE *f));
373 STATIC int url_ungetc PROTO((int c,FILE *f));
377 static int noout_f = FALSE;
378 STATIC void no_putc PROTO((int c));
379 static int debug_f = FALSE;
380 STATIC void debug PROTO((char *str));
383 STATIC void e_status PROTO((struct input_code *, int));
384 STATIC void s_status PROTO((struct input_code *, int));
386 #ifdef UTF8_INPUT_ENABLE
387 STATIC void w_status PROTO((struct input_code *, int));
388 static int utf16_mode = UTF16_INPUT;
391 struct input_code input_code_list[] = {
392 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv},
393 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv},
394 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv},
398 static int mimeout_mode = 0;
399 static int base64_count = 0;
401 /* X0208 -> ASCII converter */
404 static int f_line = 0; /* chars in line */
405 static int f_prev = 0;
406 static int fold_preserve_f = FALSE; /* preserve new lines */
407 static int fold_f = FALSE;
408 static int fold_len = 0;
411 static unsigned char kanji_intro = DEFAULT_J,
412 ascii_intro = DEFAULT_R;
416 #define FOLD_MARGIN 10
417 #define DEFAULT_FOLD 60
419 static int fold_margin = FOLD_MARGIN;
423 #ifdef DEFAULT_CODE_JIS
424 # define DEFAULT_CONV j_oconv
426 #ifdef DEFAULT_CODE_SJIS
427 # define DEFAULT_CONV s_oconv
429 #ifdef DEFAULT_CODE_EUC
430 # define DEFAULT_CONV e_oconv
432 #ifdef DEFAULT_CODE_UTF8
433 # define DEFAULT_CONV w_oconv
436 /* process default */
437 static void (*output_conv)PROTO((int c2,int c1)) = DEFAULT_CONV;
439 static void (*oconv)PROTO((int c2,int c1)) = no_connection;
440 /* s_iconv or oconv */
441 static int (*iconv)PROTO((int c2,int c1,int c0)) = no_connection2;
443 static void (*o_zconv)PROTO((int c2,int c1)) = no_connection;
444 static void (*o_fconv)PROTO((int c2,int c1)) = no_connection;
445 static void (*o_crconv)PROTO((int c2,int c1)) = no_connection;
446 static void (*o_rot_conv)PROTO((int c2,int c1)) = no_connection;
447 static void (*o_hira_conv)PROTO((int c2,int c1)) = no_connection;
448 static void (*o_base64conv)PROTO((int c2,int c1)) = no_connection;
449 static void (*o_iso2022jp_check_conv)PROTO((int c2,int c1)) = no_connection;
451 /* static redirections */
453 static void (*o_putc)PROTO((int c)) = std_putc;
455 static int (*i_getc)PROTO((FILE *f)) = std_getc; /* general input */
456 static int (*i_ungetc)PROTO((int c,FILE *f)) =std_ungetc;
458 static int (*i_bgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
459 static int (*i_bungetc)PROTO((int c ,FILE *f)) = std_ungetc;
461 static void (*o_mputc)PROTO((int c)) = std_putc ; /* output of mputc */
463 static int (*i_mgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
464 static int (*i_mungetc)PROTO((int c ,FILE *f)) = std_ungetc;
466 /* for strict mime */
467 static int (*i_mgetc_buf)PROTO((FILE *)) = std_getc; /* input of mgetc_buf */
468 static int (*i_mungetc_buf)PROTO((int c,FILE *f)) = std_ungetc;
471 static int output_mode = ASCII, /* output kanji mode */
472 input_mode = ASCII, /* input kanji mode */
473 shift_mode = FALSE; /* TRUE shift out, or X0201 */
474 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
476 /* X0201 / X0208 conversion tables */
478 /* X0201 kana conversion table */
481 unsigned char cv[]= {
482 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
483 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
484 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
485 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
486 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
487 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
488 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
489 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
490 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
491 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
492 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
493 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
494 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
495 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
496 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
497 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
501 /* X0201 kana conversion table for daguten */
504 unsigned char dv[]= {
505 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
506 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
507 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
508 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
509 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
510 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
511 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
512 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
513 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
514 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
515 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
516 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
517 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
518 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
519 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
520 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
523 /* X0201 kana conversion table for han-daguten */
526 unsigned char ev[]= {
527 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
528 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
529 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
530 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
531 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
532 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
533 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
534 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
535 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
536 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
537 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
538 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
539 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
540 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
541 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
542 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
546 /* X0208 kigou conversion table */
547 /* 0x8140 - 0x819e */
549 unsigned char fv[] = {
551 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
552 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
553 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
554 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
555 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
556 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
557 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
558 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
559 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
560 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
561 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
562 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
568 static int file_out = FALSE;
570 static int overwrite = FALSE;
573 static int crmode_f = 0; /* CR, NL, CRLF */
574 #ifdef EASYWIN /*Easy Win */
575 static int end_check;
587 #ifdef EASYWIN /*Easy Win */
588 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
591 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
592 cp = (unsigned char *)*argv;
595 if(x0201_f == WISH_TRUE)
596 x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
598 if (binmode_f == TRUE)
600 if (freopen("","wb",stdout) == NULL)
607 setbuf(stdout, (char *) NULL);
609 setvbuffer(stdout, stdobuf, IOBUF_SIZE);
612 if (binmode_f == TRUE)
614 if (freopen("","rb",stdin) == NULL) return (-1);
618 setvbuffer(stdin, stdibuf, IOBUF_SIZE);
622 kanji_convert(stdin);
628 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
637 /* reopen file for stdout */
638 if (file_out == TRUE) {
641 outfname = malloc(strlen(origfname)
642 + strlen(".nkftmpXXXXXX")
648 strcpy(outfname, origfname);
652 for (i = strlen(outfname); i; --i){
653 if (outfname[i - 1] == '/'
654 || outfname[i - 1] == '\\'){
660 strcat(outfname, "ntXXXXXX");
662 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC,
665 strcat(outfname, ".nkftmpXXXXXX");
666 fd = mkstemp(outfname);
669 || (fd_backup = dup(fileno(stdout))) < 0
670 || dup2(fd, fileno(stdout)) < 0
681 outfname = "nkf.out";
684 if(freopen(outfname, "w", stdout) == NULL) {
688 if (binmode_f == TRUE) {
690 if (freopen("","wb",stdout) == NULL)
697 if (binmode_f == TRUE)
699 if (freopen("","rb",fin) == NULL)
704 setvbuffer(fin, stdibuf, IOBUF_SIZE);
721 if (dup2(fd_backup, fileno(stdout)) < 0){
724 if (stat(origfname, &sb)) {
725 fprintf(stderr, "Can't stat %s\n", origfname);
727 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
728 if (chmod(outfname, sb.st_mode)) {
729 fprintf(stderr, "Can't set permission %s\n", outfname);
732 tb[0] = tb[1] = sb.st_mtime;
733 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
734 if (utime(outfname, tb)) {
735 fprintf(stderr, "Can't set timestamp %s\n", outfname);
738 if (unlink(origfname)){
742 tb.actime = sb.st_atime;
743 tb.modtime = sb.st_mtime;
744 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
745 if (utime(outfname, &tb)) {
746 fprintf(stderr, "Can't set timestamp %s\n", outfname);
749 if (rename(outfname, origfname)) {
751 fprintf(stderr, "Can't rename %s to %s\n",
752 outfname, origfname);
760 #ifdef EASYWIN /*Easy Win */
761 if (file_out == FALSE)
762 scanf("%d",&end_check);
765 #else /* for Other OS */
766 if (file_out == TRUE)
796 {"katakana-hiragana","h3"},
797 #ifdef UTF8_OUTPUT_ENABLE
801 #ifdef UTF8_INPUT_ENABLE
803 {"utf16-input", "W16"},
808 #ifdef CAP_URL_OPTION
818 static int option_mode;
833 case '-': /* literal options */
834 if (!*cp) { /* ignore the rest of arguments */
838 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
840 p = (unsigned char *)long_option[i].name;
841 for (j=0;*p && *p++ == cp[j];j++);
842 if (! *p && !cp[j]) break;
845 cp = (unsigned char *)long_option[i].alias;
848 if (strcmp(long_option[i].name, "overwrite") == 0){
854 #ifdef CAP_URL_OPTION
855 if (strcmp(long_option[i].name, "cap-input") == 0){
859 if (strcmp(long_option[i].name, "url-input") == 0){
865 if (strcmp(long_option[i].name, "no-output") == 0){
869 if (strcmp(long_option[i].name, "debug") == 0){
876 case 'b': /* buffered mode */
879 case 'u': /* non bufferd mode */
882 case 't': /* transparent mode */
885 case 'j': /* JIS output */
887 output_conv = j_oconv;
889 case 'e': /* AT&T EUC output */
890 output_conv = e_oconv;
892 case 's': /* SJIS output */
893 output_conv = s_oconv;
895 case 'l': /* ISO8859 Latin-1 support, no conversion */
896 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
897 input_f = LATIN1_INPUT;
899 case 'i': /* Kanji IN ESC-$-@/B */
900 if (*cp=='@'||*cp=='B')
903 case 'o': /* ASCII IN ESC-(-J/B */
904 if (*cp=='J'||*cp=='B'||*cp=='H')
912 if ('9'>= *cp && *cp>='0')
913 hira_f |= (*cp++ -'0');
920 #if defined(MSDOS) || defined(__OS2__)
935 #ifdef UTF8_OUTPUT_ENABLE
936 case 'w': /* UTF-8 output */
937 if ('1'== cp[0] && '6'==cp[1]) {
938 output_conv = w_oconv16; cp+=2;
940 w_oconv16_begin_f=2; cp++;
943 output_conv = w_oconv;
946 #ifdef UTF8_INPUT_ENABLE
947 case 'W': /* UTF-8 input */
948 if ('1'== cp[0] && '6'==cp[1]) {
949 input_f = UTF16_INPUT;
951 input_f = UTF8_INPUT;
954 /* Input code assumption */
955 case 'J': /* JIS input */
956 case 'E': /* AT&T EUC input */
959 case 'S': /* MS Kanji input */
960 input_f = SJIS_INPUT;
961 if (x0201_f==NO_X0201) x0201_f=TRUE;
963 case 'Z': /* Convert X0208 alphabet to asii */
964 /* bit:0 Convert X0208
965 bit:1 Convert Kankaku to one space
966 bit:2 Convert Kankaku to two spaces
967 bit:3 Convert HTML Entity
969 if ('9'>= *cp && *cp>='0')
970 alpha_f |= 1<<(*cp++ -'0');
974 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
975 x0201_f = FALSE; /* No X0201->X0208 conversion */
977 ESC-(-I in JIS, EUC, MS Kanji
978 SI/SO in JIS, EUC, MS Kanji
979 SSO in EUC, JIS, not in MS Kanji
982 ESC-(-I in JIS (0x20-0x5f)
983 SSO in EUC (0xa0-0xdf)
984 0xa0-0xd in MS Kanji (0xa0-0xdf)
987 case 'X': /* Assume X0201 kana */
988 /* Default value is NO_X0201 for EUC/MS-Kanji mix */
991 case 'F': /* prserve new lines */
992 fold_preserve_f = TRUE;
993 case 'f': /* folding -f60 or -f */
996 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
998 fold_len += *cp++ - '0';
1000 if (!(0<fold_len && fold_len<BUFSIZ))
1001 fold_len = DEFAULT_FOLD;
1005 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1007 fold_margin += *cp++ - '0';
1011 case 'm': /* MIME support */
1012 if (*cp=='B'||*cp=='Q') {
1013 mime_decode_mode = *cp++;
1014 mimebuf_f = FIXED_MIME;
1015 } else if (*cp=='N') {
1016 mime_f = TRUE; cp++;
1017 } else if (*cp=='S') {
1018 mime_f = STRICT_MIME; cp++;
1019 } else if (*cp=='0') {
1020 mime_f = FALSE; cp++;
1023 case 'M': /* MIME output */
1026 mimeout_f = FIXED_MIME; cp++;
1027 } else if (*cp=='Q') {
1029 mimeout_f = FIXED_MIME; cp++;
1034 case 'B': /* Broken JIS support */
1036 bit:1 allow any x on ESC-(-x or ESC-$-x
1037 bit:2 reset to ascii on NL
1039 if ('9'>= *cp && *cp>='0')
1040 broken_f |= 1<<(*cp++ -'0');
1045 case 'O':/* for Output file */
1049 case 'c':/* add cr code */
1052 case 'd':/* delete cr code */
1055 case 'I': /* ISO-2022-JP output */
1058 case 'L': /* line mode */
1059 if (*cp=='u') { /* unix */
1060 crmode_f = NL; cp++;
1061 } else if (*cp=='m') { /* mac */
1062 crmode_f = CR; cp++;
1063 } else if (*cp=='w') { /* windows */
1064 crmode_f = CRLF; cp++;
1065 } else if (*cp=='0') { /* no conversion */
1070 /* module muliple options in a string are allowed for Perl moudle */
1071 while(*cp && *cp!='-') cp++;
1075 /* bogus option but ignored */
1081 #ifdef ANSI_C_PROTOTYPE
1082 void set_iconv(int f, int (*iconv_func)(int c2,int c1,int c0))
1084 void set_iconv(f, iconv_func)
1086 int (*iconv_func)();
1090 static int (*iconv_for_check)() = 0;
1092 #ifdef INPUT_CODE_FIX
1100 #ifdef INPUT_CODE_FIX
1101 && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1107 if (estab_f && iconv_for_check != iconv){
1108 #ifdef UTF8_INPUT_ENABLE
1109 if (iconv == w_iconv) debug("UTF-8\n");
1110 if (iconv == w_iconv16) debug("UTF-16\n");
1112 if (iconv == s_iconv) debug("Shift_JIS\n");
1113 if (iconv == e_iconv) debug("EUC-JP\n");
1114 iconv_for_check = iconv;
1119 #define SCORE_KANA (1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
1120 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
1121 #define SCORE_NO_EXIST (SCORE_DEPEND << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1122 #define SCORE_ERROR (SCORE_NO_EXIST << 1) /*
\e$B%(%i!<
\e(B */
1123 int score_table_A0[] = {
1126 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1127 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1130 int score_table_F0[] = {
1132 0, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1133 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1134 SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1137 void code_score(ptr)
1138 struct input_code *ptr;
1141 int c2 = ptr->buf[0];
1142 int c1 = ptr->buf[1];
1145 }else if ((c2 & 0xf0) == 0xa0){
1146 s |= score_table_A0[c2 & 0x0f];
1147 }else if ((c2 & 0xf0) == 0xf0){
1148 s |= score_table_F0[c2 & 0x0f];
1149 }else if (c2 == SSO){
1152 #ifdef UTF8_OUTPUT_ENABLE
1153 else if (!e2w_conv(c2, c1)){
1154 s |= SCORE_NO_EXIST;
1160 void status_disable(ptr)
1161 struct input_code *ptr;
1166 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1169 void status_push_ch(ptr, c)
1170 struct input_code *ptr;
1173 ptr->buf[ptr->index++] = c;
1176 void status_reset(ptr)
1177 struct input_code *ptr;
1184 void status_check(ptr, c)
1185 struct input_code *ptr;
1188 if (c <= DEL && estab_f){
1193 void s_status(ptr, c)
1194 struct input_code *ptr;
1199 status_check(ptr, c);
1204 }else if (0xa1 <= c && c <= 0xdf){
1205 status_push_ch(ptr, SSO);
1206 status_push_ch(ptr, c);
1209 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
1211 status_push_ch(ptr, c);
1213 status_disable(ptr);
1217 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1218 status_push_ch(ptr, c);
1219 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
1223 status_disable(ptr);
1229 void e_status(ptr, c)
1230 struct input_code *ptr;
1235 status_check(ptr, c);
1240 }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
1242 status_push_ch(ptr, c);
1244 status_disable(ptr);
1248 if (0xa1 <= c && c <= 0xfe){
1249 status_push_ch(ptr, c);
1253 status_disable(ptr);
1259 #ifdef UTF8_INPUT_ENABLE
1260 void w_status(ptr, c)
1261 struct input_code *ptr;
1266 status_check(ptr, c);
1271 }else if (0xc0 <= c && c <= 0xdf){
1273 status_push_ch(ptr, c);
1274 }else if (0xe0 <= c && c <= 0xef){
1276 status_push_ch(ptr, c);
1278 status_disable(ptr);
1283 if (0x80 <= c && c <= 0xbf){
1284 status_push_ch(ptr, c);
1285 if (ptr->index > ptr->stat){
1286 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
1287 &ptr->buf[0], &ptr->buf[1]);
1292 status_disable(ptr);
1303 int action_flag = 1;
1304 struct input_code *result = 0;
1305 struct input_code *p = input_code_list;
1307 (p->status_func)(p, c);
1310 }else if(p->stat == 0){
1322 set_iconv(TRUE, result->iconv_func);
1323 }else if (c <= DEL){
1324 struct input_code *ptr = input_code_list;
1362 while ((c = (*i_getc)(f)) != EOF)
1371 oconv = output_conv;
1374 /* replace continucation module, from output side */
1376 /* output redicrection */
1385 if (mimeout_f == TRUE) {
1386 o_base64conv = oconv; oconv = base64_conv;
1388 /* base64_count = 0; */
1392 o_crconv = oconv; oconv = cr_conv;
1395 o_rot_conv = oconv; oconv = rot_conv;
1398 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
1401 o_hira_conv = oconv; oconv = hira_conv;
1404 o_fconv = oconv; oconv = fold_conv;
1407 if (alpha_f || x0201_f) {
1408 o_zconv = oconv; oconv = z_conv;
1412 /* input redicrection */
1413 #ifdef CAP_URL_OPTION
1415 i_cgetc = i_getc; i_getc = cap_getc;
1416 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
1419 i_ugetc = i_getc; i_getc = url_getc;
1420 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
1423 if (mime_f && mimebuf_f==FIXED_MIME) {
1424 i_mgetc = i_getc; i_getc = mime_getc;
1425 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
1428 i_bgetc = i_getc; i_getc = broken_getc;
1429 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
1431 if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
1432 set_iconv(-TRUE, e_iconv);
1433 } else if (input_f == SJIS_INPUT) {
1434 set_iconv(-TRUE, s_iconv);
1435 #ifdef UTF8_INPUT_ENABLE
1436 } else if (input_f == UTF8_INPUT) {
1437 set_iconv(-TRUE, w_iconv);
1438 } else if (input_f == UTF16_INPUT) {
1439 set_iconv(-TRUE, w_iconv16);
1442 set_iconv(FALSE, e_iconv);
1446 struct input_code *p = input_code_list;
1454 Conversion main loop. Code detection only.
1464 module_connection();
1469 output_mode = ASCII;
1472 #define NEXT continue /* no output, get next */
1473 #define SEND ; /* output c1 and c2, get next */
1474 #define LAST break /* end of loop, go closing */
1476 while ((c1 = (*i_getc)(f)) != EOF) {
1481 /* in case of 8th bit is on */
1483 /* in case of not established yet */
1484 /* It is still ambiguious */
1485 if (h_conv(f, c2, c1)==EOF)
1491 /* in case of already established */
1493 /* ignore bogus code */
1499 /* second byte, 7 bit code */
1500 /* it might be kanji shitfted */
1501 if ((c1 == DEL) || (c1 <= SPACE)) {
1502 /* ignore bogus first code */
1510 #ifdef UTF8_INPUT_ENABLE
1519 } else if (c1 > DEL) {
1521 if (!estab_f && !iso8859_f) {
1522 /* not established yet */
1525 } else { /* estab_f==TRUE */
1530 } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
1531 /* SJIS X0201 Case... */
1532 if(iso2022jp_f && x0201_f==NO_X0201) {
1533 (*oconv)(GETA1, GETA2);
1540 } else if (c1==SSO && iconv != s_iconv) {
1541 /* EUC X0201 Case */
1542 c1 = (*i_getc)(f); /* skip SSO */
1544 if (SSP<=c1 && c1<0xe0) {
1545 if(iso2022jp_f && x0201_f==NO_X0201) {
1546 (*oconv)(GETA1, GETA2);
1553 } else { /* bogus code, skip SSO and one byte */
1557 /* already established */
1562 } else if ((c1 > SPACE) && (c1 != DEL)) {
1563 /* in case of Roman characters */
1565 /* output 1 shifted byte */
1569 } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
1570 /* output 1 shifted byte */
1571 if(iso2022jp_f && x0201_f==NO_X0201) {
1572 (*oconv)(GETA1, GETA2);
1579 /* look like bogus code */
1582 } else if (input_mode == X0208) {
1583 /* in case of Kanji shifted */
1586 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
1587 /* Check MIME code */
1588 if ((c1 = (*i_getc)(f)) == EOF) {
1591 } else if (c1 == '?') {
1592 /* =? is mime conversion start sequence */
1593 if(mime_f == STRICT_MIME) {
1594 /* check in real detail */
1595 if (mime_begin_strict(f) == EOF)
1599 } else if (mime_begin(f) == EOF)
1609 /* normal ASCII code */
1612 } else if (c1 == SI) {
1615 } else if (c1 == SO) {
1618 } else if (c1 == ESC ) {
1619 if ((c1 = (*i_getc)(f)) == EOF) {
1620 /* (*oconv)(0, ESC); don't send bogus code */
1622 } else if (c1 == '$') {
1623 if ((c1 = (*i_getc)(f)) == EOF) {
1625 (*oconv)(0, ESC); don't send bogus code
1626 (*oconv)(0, '$'); */
1628 } else if (c1 == '@'|| c1 == 'B') {
1629 /* This is kanji introduction */
1633 } else if (c1 == '(') {
1634 if ((c1 = (*i_getc)(f)) == EOF) {
1635 /* don't send bogus code
1641 } else if (c1 == '@'|| c1 == 'B') {
1642 /* This is kanji introduction */
1647 /* could be some special code */
1654 } else if (broken_f&0x2) {
1655 /* accept any ESC-(-x as broken code ... */
1665 } else if (c1 == '(') {
1666 if ((c1 = (*i_getc)(f)) == EOF) {
1667 /* don't send bogus code
1669 (*oconv)(0, '('); */
1673 /* This is X0201 kana introduction */
1674 input_mode = X0201; shift_mode = X0201;
1676 } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
1677 /* This is X0208 kanji introduction */
1678 input_mode = ASCII; shift_mode = FALSE;
1680 } else if (broken_f&0x2) {
1681 input_mode = ASCII; shift_mode = FALSE;
1686 /* maintain various input_mode here */
1690 } else if ( c1 == 'N' || c1 == 'n' ){
1692 c1 = (*i_getc)(f); /* skip SS2 */
1693 if ( SPACE<=c1 && c1 < 0xe0 ) {
1702 } else if ((c1 == NL || c1 == CR) && broken_f&4) {
1703 input_mode = ASCII; set_iconv(FALSE, 0);
1709 if (input_mode == X0208)
1710 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
1711 else if (input_mode)
1712 (*oconv)(input_mode, c1); /* other special case */
1713 else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */
1714 int c0 = (*i_getc)(f);
1717 (*iconv)(c2, c1, c0);
1723 /* goto next_word */
1727 (*iconv)(EOF, 0, 0);
1740 /** it must NOT be in the kanji shifte sequence */
1741 /** it must NOT be written in JIS7 */
1742 /** and it must be after 2 byte 8bit code */
1749 while ((c1 = (*i_getc)(f)) != EOF) {
1755 if (push_hold_buf(c1) == EOF || estab_f){
1761 struct input_code *p = input_code_list;
1762 struct input_code *result = p;
1764 if (p->score < result->score){
1769 set_iconv(FALSE, p->iconv_func);
1774 ** 1) EOF is detected, or
1775 ** 2) Code is established, or
1776 ** 3) Buffer is FULL (but last word is pushed)
1778 ** in 1) and 3) cases, we continue to use
1779 ** Kanji codes by oconv and leave estab_f unchanged.
1783 while (wc < hold_count){
1784 c2 = hold_buf[wc++];
1788 }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
1789 (*iconv)(X0201, c2, 0);
1792 if (wc < hold_count){
1793 c1 = hold_buf[wc++];
1796 if (c1 == EOF) break;
1799 if ((*iconv)(c2, c1, 0) < 0){
1801 if (wc < hold_count){
1802 c0 = hold_buf[wc++];
1805 if (c0 == EOF) break;
1808 (*iconv)(c2, c1, c0);
1822 if (hold_count >= HOLD_SIZE*2)
1824 hold_buf[hold_count++] = c2;
1825 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
1828 int s2e_conv(c2, c1, p2, p1)
1832 c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
1834 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
1841 return (c2 << 8) | c1;
1851 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
1854 s2e_conv(c2, c1, &c2, &c1);
1867 } else if (c2 == SSO){
1870 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
1880 #ifdef UTF8_INPUT_ENABLE
1882 w2e_conv(c2, c1, c0, p2, p1)
1886 extern unsigned short * utf8_to_euc_2bytes[];
1887 extern unsigned short ** utf8_to_euc_3bytes[];
1889 if (0xc0 <= c2 && c2 <= 0xef) {
1890 unsigned short **pp;
1893 if (c0 == 0) return -1;
1894 pp = utf8_to_euc_3bytes[c2 - 0x80];
1895 return w_iconv_common(c1, c0, pp, sizeof_utf8_to_euc_C2, p2, p1);
1897 return w_iconv_common(c2, c1, utf8_to_euc_2bytes, sizeof_utf8_to_euc_2bytes, p2, p1);
1899 } else if (c2 == X0201) {
1912 int ret = w2e_conv(c2, c1, c0, &c2, &c1);
1920 w_iconv16(c2, c1, c0)
1923 extern unsigned short * utf8_to_euc_2bytes[];
1924 extern unsigned short ** utf8_to_euc_3bytes[];
1925 unsigned short **pp;
1930 if (c2==0376 && c1==0377){
1931 utf16_mode = UTF16_INPUT;
1933 } else if (c2==0377 && c1==0376){
1934 utf16_mode = UTF16BE_INPUT;
1937 if (utf16_mode == UTF16BE_INPUT) {
1939 tmp=c1; c1=c2; c2=tmp;
1941 if (c2==0 || c2==EOF) {
1945 val = ((c2<<8)&0xff00) + c1;
1947 c0 = (0x80 | (c1 & 0x3f));
1948 c1 = (0xc0 | (val >> 6));
1949 pp = utf8_to_euc_2bytes;
1950 psize = sizeof_utf8_to_euc_2bytes;
1952 c0 = (0x80 | (c1 & 0x3f));
1953 c2 = (0xe0 | (val >> 12));
1954 c1 = (0x80 | ((val >> 6) & 0x3f));
1955 if (c0 == 0) return -1;
1956 if (0<=c2-0x80 && c2-0x80 <sizeof_utf8_to_euc_3bytes){
1957 pp = utf8_to_euc_3bytes[c2 - 0x80];
1958 psize = sizeof_utf8_to_euc_C2;
1963 ret = w_iconv_common(c1, c0, pp, psize, &c2, &c1);
1964 if (ret) return ret;
1970 w_iconv_common(c1, c0, pp, psize, p2, p1)
1972 unsigned short **pp;
1980 if (pp == 0) return 1;
1983 if (c1 < 0 || psize <= c1) return 1;
1985 if (p == 0) return 1;
1988 if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1;
1990 if (val == 0) return 1;
1993 if (c2 == SO) c2 = X0201;
2002 #ifdef UTF8_OUTPUT_ENABLE
2007 extern unsigned short euc_to_utf8_1byte[];
2008 extern unsigned short * euc_to_utf8_2bytes[];
2012 p = euc_to_utf8_1byte;
2015 c2 = (c2&0x7f) - 0x21;
2016 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2017 p = euc_to_utf8_2bytes[c2];
2022 c1 = (c1 & 0x7f) - 0x21;
2023 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
2036 } else if (c2 == 0) {
2037 output_mode = ASCII;
2039 } else if (c2 == ISO8859_1) {
2040 output_mode = ISO8859_1;
2041 (*o_putc)(c1 | 0x080);
2043 unsigned short val = (unsigned short)e2w_conv(c2, c1);
2046 if (0 < val && val < 0x80){
2048 }else if (val < 0x800){
2049 (*o_putc)(0xc0 | (val >> 6));
2050 (*o_putc)(0x80 | (val & 0x3f));
2052 (*o_putc)(0xe0 | (val >> 12));
2053 (*o_putc)(0x80 | ((val >> 6) & 0x3f));
2054 (*o_putc)(0x80 | (val & 0x3f));
2065 if (w_oconv16_begin_f==2) {
2068 w_oconv16_begin_f=1;
2073 } else if (c2 == 0) {
2076 } else if (c2 == ISO8859_1) {
2078 (*o_putc)(c1 | 0x080);
2080 unsigned short val = (unsigned short)e2w_conv(c2, c1);
2081 (*o_putc)((val&0xff00)>>8);
2082 (*o_putc)(val&0xff);
2096 } else if (c2 == 0) {
2097 output_mode = ASCII;
2099 } else if (c2 == X0201) {
2100 output_mode = JAPANESE_EUC;
2101 (*o_putc)(SSO); (*o_putc)(c1|0x80);
2102 } else if (c2 == ISO8859_1) {
2103 output_mode = ISO8859_1;
2104 (*o_putc)(c1 | 0x080);
2106 if ((c1<0x20 || 0x7e<c1) ||
2107 (c2<0x20 || 0x7e<c2)) {
2108 set_iconv(FALSE, 0);
2109 return; /* too late to rescue this char */
2111 output_mode = JAPANESE_EUC;
2112 (*o_putc)(c2 | 0x080);
2113 (*o_putc)(c1 | 0x080);
2127 } else if (c2 == 0) {
2128 output_mode = ASCII;
2130 } else if (c2 == X0201) {
2131 output_mode = SHIFT_JIS;
2133 } else if (c2 == ISO8859_1) {
2134 output_mode = ISO8859_1;
2135 (*o_putc)(c1 | 0x080);
2137 if ((c1<0x20 || 0x7e<c1) ||
2138 (c2<0x20 || 0x7e<c2)) {
2139 set_iconv(FALSE, 0);
2140 return; /* too late to rescue this char */
2142 output_mode = SHIFT_JIS;
2143 (*o_putc)((((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1)));
2144 (*o_putc)((c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e)));
2154 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2157 (*o_putc)(ascii_intro);
2158 output_mode = ASCII;
2161 } else if (c2==X0201) {
2162 if (output_mode!=X0201) {
2163 output_mode = X0201;
2169 } else if (c2==ISO8859_1) {
2170 /* iso8859 introduction, or 8th bit on */
2171 /* Can we convert in 7bit form using ESC-'-'-A ?
2173 output_mode = ISO8859_1;
2175 } else if (c2 == 0) {
2176 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2179 (*o_putc)(ascii_intro);
2180 output_mode = ASCII;
2184 if (output_mode != X0208) {
2185 output_mode = X0208;
2188 (*o_putc)(kanji_intro);
2190 if (c1<0x20 || 0x7e<c1)
2192 if (c2<0x20 || 0x7e<c2)
2204 if (base64_count>50 && !mimeout_mode && c2==0 && c1==SPACE) {
2206 } else if (base64_count>66 && mimeout_mode) {
2207 (*o_base64conv)(EOF,0);
2209 (*o_putc)('\t'); base64_count += 7;
2211 (*o_base64conv)(c2,c1);
2215 static int broken_buf[3];
2216 static int broken_counter = 0;
2217 static int broken_last = 0;
2224 if (broken_counter>0) {
2225 return broken_buf[--broken_counter];
2228 if (c=='$' && broken_last != ESC
2229 && (input_mode==ASCII || input_mode==X0201)) {
2232 if (c1=='@'|| c1=='B') {
2233 broken_buf[0]=c1; broken_buf[1]=c;
2240 } else if (c=='(' && broken_last != ESC
2241 && (input_mode==X0208 || input_mode==X0201)) { /* ) */
2244 if (c1=='J'|| c1=='B') {
2245 broken_buf[0]=c1; broken_buf[1]=c;
2263 if (broken_counter<2)
2264 broken_buf[broken_counter++]=c;
2268 static int prev_cr = 0;
2276 if (! (c2==0&&c1==NL) ) {
2282 } else if (c1=='\r') {
2284 } else if (c1=='\n') {
2285 if (crmode_f==CRLF) {
2286 (*o_crconv)(0,'\r');
2287 } else if (crmode_f==CR) {
2288 (*o_crconv)(0,'\r');
2292 } else if (c1!='\032' || crmode_f!=NL){
2298 Return value of fold_conv()
2300 \n add newline and output char
2301 \r add newline and output nothing
2304 1 (or else) normal output
2306 fold state in prev (previous character)
2308 >0x80 Japanese (X0208/X0201)
2313 This fold algorthm does not preserve heading space in a line.
2314 This is the main difference from fmt.
2317 #define char_size(c2,c1) (c2?2:1)
2326 if (c1== '\r' && !fold_preserve_f) {
2327 fold_state=0; /* ignore cr */
2328 }else if (c1== '\n'&&f_prev=='\r' && fold_preserve_f) {
2330 fold_state=0; /* ignore cr */
2331 } else if (c1== BS) {
2332 if (f_line>0) f_line--;
2334 } else if (c2==EOF && f_line != 0) { /* close open last line */
2336 } else if ((c1=='\n' && !fold_preserve_f)
2337 || ((c1=='\r'||(c1=='\n'&&f_prev!='\r'))
2338 && fold_preserve_f)) {
2340 if (fold_preserve_f) {
2344 } else if ((f_prev == c1 && !fold_preserve_f)
2345 || (f_prev == '\n' && fold_preserve_f)
2346 ) { /* duplicate newline */
2349 fold_state = '\n'; /* output two newline */
2355 if (f_prev&0x80) { /* Japanese? */
2357 fold_state = 0; /* ignore given single newline */
2358 } else if (f_prev==' ') {
2362 if (++f_line<=fold_len)
2366 fold_state = '\r'; /* fold and output nothing */
2370 } else if (c1=='\f') {
2375 fold_state = '\n'; /* output newline and clear */
2376 } else if ( (c2==0 && c1==' ')||
2377 (c2==0 && c1=='\t')||
2378 (c2=='!'&& c1=='!')) {
2379 /* X0208 kankaku or ascii space */
2380 if (f_prev == ' ') {
2381 fold_state = 0; /* remove duplicate spaces */
2384 if (++f_line<=fold_len)
2385 fold_state = ' '; /* output ASCII space only */
2387 f_prev = ' '; f_line = 0;
2388 fold_state = '\r'; /* fold and output nothing */
2392 prev0 = f_prev; /* we still need this one... , but almost done */
2394 if (c2 || c2==X0201)
2395 f_prev |= 0x80; /* this is Japanese */
2396 f_line += char_size(c2,c1);
2397 if (f_line<=fold_len) { /* normal case */
2400 if (f_line>=fold_len+fold_margin) { /* too many kinsou suspension */
2401 f_line = char_size(c2,c1);
2402 fold_state = '\n'; /* We can't wait, do fold now */
2403 } else if (c2==X0201) {
2404 /* simple kinsoku rules return 1 means no folding */
2405 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
2406 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
2407 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
2408 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
2409 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
2410 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
2411 else if (SPACE<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
2413 fold_state = '\n';/* add one new f_line before this character */
2416 fold_state = '\n';/* add one new f_line before this character */
2419 /* kinsoku point in ASCII */
2420 if ( c1==')'|| /* { [ ( */
2431 /* just after special */
2432 } else if (!is_alnum(prev0)) {
2433 f_line = char_size(c2,c1);
2435 } else if ((prev0==' ') || /* ignored new f_line */
2436 (prev0=='\n')|| /* ignored new f_line */
2437 (prev0&0x80)) { /* X0208 - ASCII */
2438 f_line = char_size(c2,c1);
2439 fold_state = '\n';/* add one new f_line before this character */
2441 fold_state = 1; /* default no fold in ASCII */
2445 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
2446 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
2447 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
2448 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
2449 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
2450 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
2451 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
2452 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
2453 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
2454 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
2455 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
2456 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
2457 /* default no fold in kinsoku */
2460 f_line = char_size(c2,c1);
2461 /* add one new f_line before this character */
2464 f_line = char_size(c2,c1);
2466 /* add one new f_line before this character */
2471 /* terminator process */
2472 switch(fold_state) {
2491 int z_prev2=0,z_prev1=0;
2498 /* if (c2) c1 &= 0x7f; assertion */
2500 if (x0201_f && z_prev2==X0201) { /* X0201 */
2501 if (c1==(0xde&0x7f)) { /*
\e$BByE@
\e(B */
2503 (*o_zconv)(dv[(z_prev1-SPACE)*2],dv[(z_prev1-SPACE)*2+1]);
2505 } else if (c1==(0xdf&0x7f)&&ev[(z_prev1-SPACE)*2]) { /*
\e$BH>ByE@
\e(B */
2507 (*o_zconv)(ev[(z_prev1-SPACE)*2],ev[(z_prev1-SPACE)*2+1]);
2511 (*o_zconv)(cv[(z_prev1-SPACE)*2],cv[(z_prev1-SPACE)*2+1]);
2520 if (x0201_f && c2==X0201) {
2521 if (dv[(c1-SPACE)*2]||ev[(c1-SPACE)*2]) {
2522 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
2523 z_prev1 = c1; z_prev2 = c2;
2526 (*o_zconv)(cv[(c1-SPACE)*2],cv[(c1-SPACE)*2+1]);
2531 /* JISX0208 Alphabet */
2532 if (alpha_f && c2 == 0x23 ) {
2534 } else if (alpha_f && c2 == 0x21 ) {
2535 /* JISX0208 Kigou */
2540 } else if (alpha_f&0x4) {
2545 } else if (0x20<c1 && c1<0x7f && fv[c1-0x20]) {
2551 case '>': entity = ">"; break;
2552 case '<': entity = "<"; break;
2553 case '\"': entity = """; break;
2554 case '&': entity = "&"; break;
2557 while (*entity) (*o_zconv)(0, *entity++);
2567 #define rot13(c) ( \
2569 (c <= 'M') ? (c + 13): \
2570 (c <= 'Z') ? (c - 13): \
2572 (c <= 'm') ? (c + 13): \
2573 (c <= 'z') ? (c - 13): \
2577 #define rot47(c) ( \
2579 ( c <= 'O' ) ? (c + 47) : \
2580 ( c <= '~' ) ? (c - 47) : \
2588 if (c2==0 || c2==X0201 || c2==ISO8859_1) {
2594 (*o_rot_conv)(c2,c1);
2601 if ((hira_f & 1) && c2==0x25 && 0x20<c1 && c1<0x74) {
2603 } else if ((hira_f & 2) && c2==0x24 && 0x20<c1 && c1<0x74) {
2606 (*o_hira_conv)(c2,c1);
2611 iso2022jp_check_conv(c2,c1)
2614 static int range[RANGE_NUM_MAX][2] = {
2637 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
2641 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
2646 for (i = 0; i < RANGE_NUM_MAX; i++) {
2647 start = range[i][0];
2650 if (c >= start && c <= end) {
2655 (*o_iso2022jp_check_conv)(c2,c1);
2659 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
2661 unsigned char *mime_pattern[] = {
2662 (unsigned char *)"\075?EUC-JP?B?",
2663 (unsigned char *)"\075?SHIFT_JIS?B?",
2664 (unsigned char *)"\075?ISO-8859-1?Q?",
2665 (unsigned char *)"\075?ISO-2022-JP?B?",
2666 (unsigned char *)"\075?ISO-2022-JP?Q?",
2667 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
2668 (unsigned char *)"\075?UTF-8?B?",
2673 int mime_encode[] = {
2674 JAPANESE_EUC, SHIFT_JIS,ISO8859_1, X0208, X0201,
2675 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
2681 int mime_encode_method[] = {
2682 'B', 'B','Q', 'B', 'Q',
2683 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
2690 #define MAXRECOVER 20
2692 /* I don't trust portablity of toupper */
2693 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
2694 #define nkf_isdigit(c) ('0'<=c && c<='9')
2695 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
2700 if (i_getc!=mime_getc) {
2701 i_mgetc = i_getc; i_getc = mime_getc;
2702 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2703 if(mime_f==STRICT_MIME) {
2704 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
2705 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
2711 unswitch_mime_getc()
2713 if(mime_f==STRICT_MIME) {
2714 i_mgetc = i_mgetc_buf;
2715 i_mungetc = i_mungetc_buf;
2718 i_ungetc = i_mungetc;
2722 mime_begin_strict(f)
2727 unsigned char *p,*q;
2728 int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
2730 mime_decode_mode = FALSE;
2731 /* =? has been checked */
2733 p = mime_pattern[j];
2736 for(i=2;p[i]>' ';i++) { /* start at =? */
2737 if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) {
2738 /* pattern fails, try next one */
2740 while ((p = mime_pattern[++j])) {
2741 for(k=2;k<i;k++) /* assume length(p) > i */
2742 if (p[k]!=q[k]) break;
2743 if (k==i && nkf_toupper(c1)==p[k]) break;
2745 if (p) continue; /* found next one, continue */
2746 /* all fails, output from recovery buffer */
2754 mime_decode_mode = p[i-2];
2755 if (mime_decode_mode=='B') {
2756 mimebuf_f = unbuf_f;
2758 /* do MIME integrity check */
2759 return mime_integrity(f,mime_pattern[j]);
2771 /* we don't keep eof of Fifo, becase it contains ?= as
2772 a terminator. It was checked in mime_integrity. */
2773 return ((mimebuf_f)?
2774 (*i_mgetc_buf)(f):Fifo(mime_input++));
2778 mime_ungetc_buf(c,f)
2783 (*i_mungetc_buf)(c,f);
2785 Fifo(--mime_input)=c;
2796 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
2797 /* re-read and convert again from mime_buffer. */
2799 /* =? has been checked */
2801 Fifo(mime_last++)='='; Fifo(mime_last++)='?';
2802 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
2803 /* We accept any character type even if it is breaked by new lines */
2804 c1 = (*i_getc)(f); Fifo(mime_last++)= c1 ;
2805 if (c1=='\n'||c1==' '||c1=='\r'||
2806 c1=='-'||c1=='_'||is_alnum(c1) ) continue;
2808 /* Failed. But this could be another MIME preemble */
2816 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
2817 if (!(++i<MAXRECOVER) || c1==EOF) break;
2818 if (c1=='b'||c1=='B') {
2819 mime_decode_mode = 'B';
2820 } else if (c1=='q'||c1=='Q') {
2821 mime_decode_mode = 'Q';
2825 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
2826 if (!(++i<MAXRECOVER) || c1==EOF) break;
2828 mime_decode_mode = FALSE;
2834 if (!mime_decode_mode) {
2835 /* false MIME premble, restart from mime_buffer */
2836 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
2837 /* Since we are in MIME mode until buffer becomes empty, */
2838 /* we never go into mime_begin again for a while. */
2841 /* discard mime preemble, and goto MIME mode */
2843 /* do no MIME integrity check */
2844 return c1; /* used only for checking EOF */
2864 #ifdef CAP_URL_OPTION
2869 if (nkf_isdigit(x)) return x - '0';
2870 return nkf_toupper(x) - 'A' + 10;
2873 #ifdef ANSI_C_PROTOTYPE
2874 int hex_getc(int ch, FILE *f, int (*g)(FILE *f), int (*u)(int c, FILE *f))
2877 hex_getc(ch, f, g, u)
2890 if (!nkf_isxdigit(c2) == EOF){
2895 if (!nkf_isxdigit(c3) == EOF){
2900 return (hex2bin(c2) << 4) | hex2bin(c3);
2907 return hex_getc(':', f, i_cgetc, i_cungetc);
2915 return (*i_cungetc)(c, f);
2922 return hex_getc('%', f, i_ugetc, i_uungetc);
2930 return (*i_uungetc)(c, f);
2939 int c1, c2, c3, c4, cc;
2940 int t1, t2, t3, t4, mode, exit_mode;
2942 if (mime_top != mime_last) { /* Something is in FIFO */
2943 return Fifo(mime_top++);
2945 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
2946 mime_decode_mode=FALSE;
2947 unswitch_mime_getc();
2948 return (*i_getc)(f);
2951 if (mimebuf_f == FIXED_MIME)
2952 exit_mode = mime_decode_mode;
2955 if (mime_decode_mode == 'Q') {
2956 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
2958 if (c1=='_') return ' ';
2959 if (c1!='=' && c1!='?') {
2963 mime_decode_mode = exit_mode; /* prepare for quit */
2964 if (c1<=' ') return c1;
2965 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
2966 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
2967 /* end Q encoding */
2968 input_mode = exit_mode;
2969 while((c1=(*i_getc)(f))!=EOF && c1==SPACE
2970 /* && (c1==NL||c1==TAB||c1=='\r') */ ) ;
2973 if (c1=='='&&c2<' ') { /* this is soft wrap */
2974 while((c1 = (*i_mgetc)(f)) <=' ') {
2975 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
2977 mime_decode_mode = 'Q'; /* still in MIME */
2978 goto restart_mime_q;
2981 mime_decode_mode = 'Q'; /* still in MIME */
2985 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
2986 if (c2<=' ') return c2;
2987 mime_decode_mode = 'Q'; /* still in MIME */
2988 #define hex(c) (('0'<=c&&c<='9')?(c-'0'):\
2989 ('A'<=c&&c<='F')?(c-'A'+10):('a'<=c&&c<='f')?(c-'a'+10):0)
2990 return ((hex(c2)<<4) + hex(c3));
2993 if (mime_decode_mode != 'B') {
2994 mime_decode_mode = FALSE;
2995 return (*i_mgetc)(f);
2999 /* Base64 encoding */
3001 MIME allows line break in the middle of
3002 Base64, but we are very pessimistic in decoding
3003 in unbuf mode because MIME encoded code may broken by
3004 less or editor's control sequence (such as ESC-[-K in unbuffered
3005 mode. ignore incomplete MIME.
3007 mode = mime_decode_mode;
3008 mime_decode_mode = exit_mode; /* prepare for quit */
3010 while ((c1 = (*i_mgetc)(f))<=' ') {
3015 if ((c2 = (*i_mgetc)(f))<=' ') {
3018 if (mime_f != STRICT_MIME) goto mime_c2_retry;
3019 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3022 if ((c1 == '?') && (c2 == '=')) {
3024 while((c1=(*i_getc)(f))!=EOF && c1==SPACE
3025 /* && (c1==NL||c1==TAB||c1=='\r') */ ) ;
3029 if ((c3 = (*i_mgetc)(f))<=' ') {
3032 if (mime_f != STRICT_MIME) goto mime_c3_retry;
3033 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3037 if ((c4 = (*i_mgetc)(f))<=' ') {
3040 if (mime_f != STRICT_MIME) goto mime_c4_retry;
3041 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3045 mime_decode_mode = mode; /* still in MIME sigh... */
3047 /* BASE 64 decoding */
3049 t1 = 0x3f & base64decode(c1);
3050 t2 = 0x3f & base64decode(c2);
3051 t3 = 0x3f & base64decode(c3);
3052 t4 = 0x3f & base64decode(c4);
3053 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
3055 Fifo(mime_last++) = cc;
3056 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
3058 Fifo(mime_last++) = cc;
3059 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
3061 Fifo(mime_last++) = cc;
3066 return Fifo(mime_top++);
3074 Fifo(--mime_top) = c;
3085 /* In buffered mode, read until =? or NL or buffer full
3087 mime_input = mime_top;
3088 mime_last = mime_top;
3089 while(*p) Fifo(mime_input++) = *p++;
3092 while((c=(*i_getc)(f))!=EOF) {
3093 if (((mime_input-mime_top)&MIME_BUF_MASK)==0) {
3094 break; /* buffer full */
3096 if (c=='=' && d=='?') {
3097 /* checked. skip header, start decode */
3098 Fifo(mime_input++) = c;
3099 /* mime_last_input = mime_input; */
3104 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
3106 /* Should we check length mod 4? */
3107 Fifo(mime_input++) = c;
3110 /* In case of Incomplete MIME, no MIME decode */
3111 Fifo(mime_input++) = c;
3112 mime_last = mime_input; /* point undecoded buffer */
3113 mime_decode_mode = 1; /* no decode on Fifo last in mime_getc */
3114 switch_mime_getc(); /* anyway we need buffered getc */
3125 i = c - 'A'; /* A..Z 0-25 */
3127 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
3129 } else if (c > '/') {
3130 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
3131 } else if (c == '+') {
3132 i = '>' /* 62 */ ; /* + 62 */
3134 i = '?' /* 63 */ ; /* / 63 */
3139 static char basis_64[] =
3140 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
3150 p = mime_pattern[0];
3151 for(i=0;mime_encode[i];i++) {
3152 if (mode == mime_encode[i]) {
3153 p = mime_pattern[i];
3157 mimeout_mode = mime_encode_method[i];
3159 /* (*o_mputc)(' '); */
3176 #define itoh4(c) (c>=10?c+'A'-10:c+'0')
3182 if (mimeout_f==FIXED_MIME) {
3183 if (base64_count>71) {
3191 if ( c<=DEL &&(output_mode==ASCII ||output_mode == ISO8859_1 )
3192 && mimeout_f!=FIXED_MIME) {
3193 if (mimeout_mode=='Q') {
3200 if (mimeout_mode!='B' || c!=SPACE) {
3209 } else if (!mimeout_mode && mimeout_f!=FIXED_MIME) {
3210 open_mime(output_mode);
3212 } else { /* c==EOF */
3213 switch(mimeout_mode) {
3218 (*o_mputc)(basis_64[((b64c & 0x3)<< 4)]);
3224 (*o_mputc)(basis_64[((b64c & 0xF) << 2)]);
3230 if (mimeout_f!=FIXED_MIME) {
3232 } else if (mimeout_mode != 'Q')
3237 switch(mimeout_mode) {
3241 (*o_mputc)(itoh4(((c>>4)&0xf)));
3242 (*o_mputc)(itoh4((c&0xf)));
3249 (*o_mputc)(basis_64[c>>2]);
3254 (*o_mputc)(basis_64[((b64c & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
3260 (*o_mputc)(basis_64[((b64c & 0xF) << 2) | ((c & 0xC0) >>6)]);
3261 (*o_mputc)(basis_64[c & 0x3F]);
3281 mime_f = STRICT_MIME;
3285 #if defined(MSDOS) || defined(__OS2__)
3290 iso2022jp_f = FALSE;
3292 kanji_intro = DEFAULT_J;
3293 ascii_intro = DEFAULT_R;
3295 output_conv = DEFAULT_CONV;
3296 oconv = DEFAULT_CONV;
3299 i_mungetc = std_ungetc;
3300 i_mgetc_buf = std_getc;
3301 i_mungetc_buf = std_ungetc;
3304 i_ungetc=std_ungetc;
3307 i_bungetc= std_ungetc;
3311 o_crconv = no_connection;
3312 o_rot_conv = no_connection;
3313 o_iso2022jp_check_conv = no_connection;
3314 o_hira_conv = no_connection;
3315 o_fconv = no_connection;
3316 o_zconv = no_connection;
3319 i_ungetc = std_ungetc;
3321 i_mungetc = std_ungetc;
3323 output_mode = ASCII;
3326 mime_decode_mode = FALSE;
3335 struct input_code *p = input_code_list;
3340 #ifdef UTF8_OUTPUT_ENABLE
3341 if (w_oconv16_begin_f) {
3342 w_oconv16_begin_f = 2;
3347 fold_preserve_f = FALSE;
3350 fold_margin = FOLD_MARGIN;
3353 z_prev2=0,z_prev1=0;
3359 no_connection(c2,c1)
3362 no_connection2(c2,c1,0);
3366 no_connection2(c2,c1,c0)
3369 fprintf(stderr,"nkf internal module connection failure.\n");
3377 fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
3378 fprintf(stderr,"Flags:\n");
3379 fprintf(stderr,"b,u Output is bufferred (DEFAULT),Output is unbufferred\n");
3380 #ifdef DEFAULT_CODE_SJIS
3381 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC), UTF-8\n");
3383 #ifdef DEFAULT_CODE_JIS
3384 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC), UTF-8\n");
3386 #ifdef DEFAULT_CODE_EUC
3387 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT), UTF-8\n");
3389 #ifdef DEFAULT_CODE_UTF8
3390 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC), UTF-8 (DEFAULT)\n");
3392 fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC), UTF-8\n");
3393 fprintf(stderr,"t no conversion\n");
3394 fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n");
3395 fprintf(stderr,"r {de/en}crypt ROT13/47\n");
3396 fprintf(stderr,"h 1 hirakana->katakana, 2 katakana->hirakana,3 both\n");
3397 fprintf(stderr,"v Show this usage. V: show version\n");
3398 fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
3399 fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
3400 fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
3401 fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
3402 fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces,\n");
3403 fprintf(stderr," 3: Convert HTML Entity\n");
3404 fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
3405 fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
3407 fprintf(stderr,"T Text mode output\n");
3409 fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
3410 fprintf(stderr,"d,c Delete \\r in line feed and \\032, Add \\r in line feed\n");
3411 fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n");
3412 fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
3413 fprintf(stderr,"long name options\n");
3414 fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n");
3415 fprintf(stderr," --jis,--euc,--sjis,--utf8,--utf16,--mime,--base64 convert for the code\n");
3416 fprintf(stderr," --help,--version\n");
3423 fprintf(stderr,"Network Kanji Filter Version %s (%s) "
3424 #if defined(MSDOS) && !defined(_Windows)
3427 #if !defined(__WIN32__) && defined(_Windows)
3430 #if defined(__WIN32__) && defined(_Windows)
3436 ,Version,Patchlevel);
3437 fprintf(stderr,"\n%s\n",CopyRight);
3442 **
\e$B%Q%C%A@):n<T
\e(B
3443 ** void@merope.pleiades.or.jp (Kusakabe Youichi)
3444 ** NIDE Naoyuki <nide@ics.nara-wu.ac.jp>
3445 ** ohta@src.ricoh.co.jp (Junn Ohta)
3446 ** inouet@strl.nhk.or.jp (Tomoyuki Inoue)
3447 ** kiri@pulser.win.or.jp (Tetsuaki Kiriyama)
3448 ** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp>
3449 ** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe)
3450 ** kono@ie.u-ryukyu.ac.jp (Shinji Kono)
3451 ** GHG00637@nifty-serve.or.jp (COW)