1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
5 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
6 ** Copyright (C) 1996,1998
8 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
9 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
10 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
11 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
13 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
14 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
15 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
16 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
17 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
18 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
20 ** Everyone is permitted to do anything on this program
21 ** including copying, modifying, improving,
22 ** as long as you don't try to pretend that you wrote it.
23 ** i.e., the above copyright notice has to appear in all copies.
24 ** Binary distribution requires original version messages.
25 ** You don't have to ask before copying, redistribution or publishing.
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
29 /***********************************************************************
30 ** UTF-8
\e$B%5%]!<%H$K$D$$$F
\e(B
31 **
\e$B=>Mh$N
\e(B nkf
\e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9
\e(B
32 ** nkf -e
\e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G
\e(B UTF-8
\e$B$HH=Dj$5$l$l$P!"
\e(B
33 **
\e$B$=$N$^$^
\e(B euc-jp
\e$B$KJQ49$5$l$^$9
\e(B
35 **
\e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#
\e(B
36 ** (
\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O
\e(B)
38 **
\e$B2?$+LdBj$r8+$D$1$?$i!"
\e(B
39 ** E-Mail: furukawa@tcp-ip.or.jp
40 **
\e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#
\e(B
41 ***********************************************************************/
44 static char *CopyRight =
45 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002 Kono, Furukawa";
46 static char *Version =
48 static char *Patchlevel =
55 ** USAGE: nkf [flags] [file]
58 ** b Output is bufferred (DEFAULT)
59 ** u Output is unbufferred
63 ** j Outout code is JIS 7 bit (DEFAULT SELECT)
64 ** s Output code is MS Kanji (DEFAULT SELECT)
65 ** e Output code is AT&T JIS (DEFAULT SELECT)
66 ** w Output code is AT&T JIS (DEFAULT SELECT)
67 ** l Output code is JIS 7bit and ISO8859-1 Latin-1
69 ** m MIME conversion for ISO-2022-JP
70 ** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
71 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
72 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
73 ** M MIME output conversion
75 ** r {de/en}crypt ROT13/47
79 ** T Text mode output (for MS-DOS)
81 ** x Do not convert X0201 kana into X0208
82 ** Z Convert X0208 alphabet to ASCII
87 ** B try to fix broken JIS, missing Escape
88 ** B[1-9] broken level
90 ** O Output to 'nkf.out' file or last file name
91 ** d Delete \r in line feed
92 ** c Add \r in line feed
93 ** -- other long option
94 ** -- ignore following option (don't use with -O )
98 #if (defined(__TURBOC__) || defined(LSI_C)) && !defined(MSDOS)
113 #if defined(MSDOS) || defined(__OS2__)
120 #define setbinmode(fp) fsetbin(fp)
121 #else /* Microsoft C, Turbo C */
122 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
124 #else /* UNIX,OS/2 */
125 #define setbinmode(fp)
128 #ifdef _IOFBF /* SysV and MSDOS */
129 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
131 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
134 /*Borland C++ 4.5 EasyWin*/
135 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
141 /* added by satoru@isoternet.org */
142 #include <sys/stat.h>
152 /* state of output_mode and input_mode
169 /* Input Assumption */
173 #define LATIN1_INPUT 6
175 #define STRICT_MIME 8
180 #define JAPANESE_EUC 10
184 #define UTF8_INPUT 13
185 #define UTF16_INPUT 14
186 #define UTF16BE_INPUT 15
204 #define is_alnum(c) \
205 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
207 #define HOLD_SIZE 1024
208 #define IOBUF_SIZE 16384
210 #define DEFAULT_J 'B'
211 #define DEFAULT_R 'B'
213 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
214 #define SJ6394 0x0161 /* 63 - 94 ku offset */
216 #define RANGE_NUM_MAX 18
221 #if defined( UTF8_OUTPUT_ENABLE ) || defined( UTF8_INPUT_ENABLE )
222 #define sizeof_euc_utf8 94
223 #define sizeof_euc_to_utf8_1byte 94
224 #define sizeof_euc_to_utf8_2bytes 94
225 #define sizeof_utf8_to_euc_C2 64
226 #define sizeof_utf8_to_euc_E5B8 64
227 #define sizeof_utf8_to_euc_2bytes 112
228 #define sizeof_utf8_to_euc_3bytes 112
231 /* MIME preprocessor */
234 #ifdef EASYWIN /*Easy Win */
235 extern POINT _BufferSize;
238 /* function prototype */
240 #ifdef ANSI_C_PROTOTYPE
242 #define STATIC static
248 STATIC int noconvert PROTO((FILE *f));
249 STATIC int kanji_convert PROTO((FILE *f));
250 STATIC int h_conv PROTO((FILE *f,int c2,int c1));
251 STATIC int push_hold_buf PROTO((int c2));
252 STATIC void set_iconv PROTO((int f, int (*iconv_func)()));
253 STATIC int s_iconv PROTO((int c2,int c1,int c0));
254 STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
255 STATIC int e_iconv PROTO((int c2,int c1,int c0));
256 #ifdef UTF8_INPUT_ENABLE
257 STATIC int w2e_conv PROTO((int c2,int c1,int c0,int *p2,int *p1));
258 STATIC int w_iconv PROTO((int c2,int c1,int c0));
259 STATIC int w_iconv16 PROTO((int c2,int c1,int c0));
260 STATIC int w_iconv_common PROTO((int c1,int c0,unsigned short **pp,int psize,int *p2,int *p1));
262 #ifdef UTF8_OUTPUT_ENABLE
263 STATIC int e2w_conv PROTO((int c2,int c1));
264 STATIC void w_oconv PROTO((int c2,int c1));
265 STATIC void w_oconv16 PROTO((int c2,int c1));
267 STATIC void e_oconv PROTO((int c2,int c1));
268 STATIC void s_oconv PROTO((int c2,int c1));
269 STATIC void j_oconv PROTO((int c2,int c1));
270 STATIC void fold_conv PROTO((int c2,int c1));
271 STATIC void cr_conv PROTO((int c2,int c1));
272 STATIC void z_conv PROTO((int c2,int c1));
273 STATIC void rot_conv PROTO((int c2,int c1));
274 STATIC void hira_conv PROTO((int c2,int c1));
275 STATIC void base64_conv PROTO((int c2,int c1));
276 STATIC void iso2022jp_check_conv PROTO((int c2,int c1));
277 STATIC void no_connection PROTO((int c2,int c1));
278 STATIC int no_connection2 PROTO((int c2,int c1,int c0));
280 STATIC int code_score PROTO((int c2,int c1,int s));
281 STATIC void code_status PROTO((int c));
283 STATIC void std_putc PROTO((int c));
284 STATIC int std_getc PROTO((FILE *f));
285 STATIC int std_ungetc PROTO((int c,FILE *f));
287 STATIC int broken_getc PROTO((FILE *f));
288 STATIC int broken_ungetc PROTO((int c,FILE *f));
290 STATIC int mime_begin PROTO((FILE *f));
291 STATIC int mime_getc PROTO((FILE *f));
292 STATIC int mime_ungetc PROTO((int c,FILE *f));
294 STATIC int mime_begin_strict PROTO((FILE *f));
295 STATIC int mime_getc_buf PROTO((FILE *f));
296 STATIC int mime_ungetc_buf PROTO((int c,FILE *f));
297 STATIC int mime_integrity PROTO((FILE *f,unsigned char *p));
299 STATIC int base64decode PROTO((int c));
300 STATIC void mime_putc PROTO((int c));
301 STATIC void open_mime PROTO((int c));
302 STATIC void close_mime PROTO(());
303 STATIC void usage PROTO(());
304 STATIC void version PROTO(());
305 STATIC void options PROTO((unsigned char *c));
307 STATIC void reinit PROTO(());
312 static unsigned char stdibuf[IOBUF_SIZE];
313 static unsigned char stdobuf[IOBUF_SIZE];
314 static unsigned char hold_buf[HOLD_SIZE*2];
315 static int hold_count;
317 /* MIME preprocessor fifo */
319 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
320 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
321 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
322 static unsigned char mime_buf[MIME_BUF_SIZE];
323 static unsigned int mime_top = 0;
324 static unsigned int mime_last = 0; /* decoded */
325 static unsigned int mime_input = 0; /* undecoded */
328 static int unbuf_f = FALSE;
329 static int estab_f = FALSE;
330 static int nop_f = FALSE;
331 static int binmode_f = TRUE; /* binary mode */
332 static int rot_f = FALSE; /* rot14/43 mode */
333 static int hira_f = FALSE; /* hira/kata henkan */
334 static int input_f = FALSE; /* non fixed input code */
335 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
336 static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
337 static int mimebuf_f = FALSE; /* MIME buffered input */
338 static int broken_f = FALSE; /* convert ESC-less broken JIS */
339 static int iso8859_f = FALSE; /* ISO8859 through */
340 static int mimeout_f = FALSE; /* base64 mode */
341 #if defined(MSDOS) || defined(__OS2__)
342 static int x0201_f = TRUE; /* Assume JISX0201 kana */
344 static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
346 static int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
347 #ifdef UTF8_OUTPUT_ENABLE
348 static int w_oconv16_begin_f= 0; /* utf-16 header */
352 #ifdef CAP_URL_OPTION
353 static int cap_f = FALSE;
354 static int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */
355 static int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc;
356 STATIC int cap_getc PROTO((FILE *f));
357 STATIC int cap_ungetc PROTO((int c,FILE *f));
359 static int url_f = FALSE;
360 static int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
361 static int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc;
362 STATIC int url_getc PROTO((FILE *f));
363 STATIC int url_ungetc PROTO((int c,FILE *f));
367 static int noout_f = FALSE;
368 STATIC void no_putc PROTO((int c));
369 static int debug_f = FALSE;
370 STATIC void debug PROTO((char *str));
373 static int e_stat = 0;
374 static int e_score = 0;
376 static int s_stat = 0;
377 static int s_score = 0;
379 #ifdef UTF8_INPUT_ENABLE
380 static int w_stat = 0;
381 static int w_score = 0;
383 static int utf16_mode = UTF16_INPUT;
385 static int w_stat = -1;
388 static int mimeout_mode = 0;
389 static int base64_count = 0;
391 /* X0208 -> ASCII converter */
394 static int f_line = 0; /* chars in line */
395 static int f_prev = 0;
396 static int fold_preserve_f = FALSE; /* preserve new lines */
397 static int fold_f = FALSE;
398 static int fold_len = 0;
401 static unsigned char kanji_intro = DEFAULT_J,
402 ascii_intro = DEFAULT_R;
406 #define FOLD_MARGIN 10
407 #define DEFAULT_FOLD 60
409 static int fold_margin = FOLD_MARGIN;
413 #ifdef DEFAULT_CODE_JIS
414 # define DEFAULT_CONV j_oconv
416 #ifdef DEFAULT_CODE_SJIS
417 # define DEFAULT_CONV s_oconv
419 #ifdef DEFAULT_CODE_EUC
420 # define DEFAULT_CONV e_oconv
422 #ifdef DEFAULT_CODE_UTF8
423 # define DEFAULT_CONV w_oconv
426 /* process default */
427 static void (*output_conv)PROTO((int c2,int c1)) = DEFAULT_CONV;
429 static void (*oconv)PROTO((int c2,int c1)) = no_connection;
430 /* s_iconv or oconv */
431 static int (*iconv)PROTO((int c2,int c1,int c0)) = no_connection2;
433 static void (*o_zconv)PROTO((int c2,int c1)) = no_connection;
434 static void (*o_fconv)PROTO((int c2,int c1)) = no_connection;
435 static void (*o_crconv)PROTO((int c2,int c1)) = no_connection;
436 static void (*o_rot_conv)PROTO((int c2,int c1)) = no_connection;
437 static void (*o_hira_conv)PROTO((int c2,int c1)) = no_connection;
438 static void (*o_base64conv)PROTO((int c2,int c1)) = no_connection;
439 static void (*o_iso2022jp_check_conv)PROTO((int c2,int c1)) = no_connection;
441 /* static redirections */
443 static void (*o_putc)PROTO((int c)) = std_putc;
445 static int (*i_getc)PROTO((FILE *f)) = std_getc; /* general input */
446 static int (*i_ungetc)PROTO((int c,FILE *f)) =std_ungetc;
448 static int (*i_bgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
449 static int (*i_bungetc)PROTO((int c ,FILE *f)) = std_ungetc;
451 static void (*o_mputc)PROTO((int c)) = std_putc ; /* output of mputc */
453 static int (*i_mgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
454 static int (*i_mungetc)PROTO((int c ,FILE *f)) = std_ungetc;
456 /* for strict mime */
457 static int (*i_mgetc_buf)PROTO((FILE *)) = std_getc; /* input of mgetc_buf */
458 static int (*i_mungetc_buf)PROTO((int c,FILE *f)) = std_ungetc;
461 static int output_mode = ASCII, /* output kanji mode */
462 input_mode = ASCII, /* input kanji mode */
463 shift_mode = FALSE; /* TRUE shift out, or X0201 */
464 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
466 /* X0201 / X0208 conversion tables */
468 /* X0201 kana conversion table */
471 unsigned char cv[]= {
472 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
473 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
474 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
475 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
476 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
477 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
478 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
479 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
480 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
481 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
482 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
483 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
484 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
485 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
486 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
487 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
491 /* X0201 kana conversion table for daguten */
494 unsigned char dv[]= {
495 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
496 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
497 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
498 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
499 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
500 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
501 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
502 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
503 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
504 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
505 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
506 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
507 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
508 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
509 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
510 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
513 /* X0201 kana conversion table for han-daguten */
516 unsigned char ev[]= {
517 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
518 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
519 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
520 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
521 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
522 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
523 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
524 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
525 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
526 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
527 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
528 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
529 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
530 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
531 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
532 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
536 /* X0208 kigou conversion table */
537 /* 0x8140 - 0x819e */
539 unsigned char fv[] = {
541 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
542 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
543 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
544 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
545 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
546 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
547 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
548 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
549 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
550 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
551 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
552 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
558 static int file_out = FALSE;
560 static int overwrite = FALSE;
563 static int crmode_f = 0; /* CR, NL, CRLF */
564 #ifdef EASYWIN /*Easy Win */
565 static int end_check;
577 #ifdef EASYWIN /*Easy Win */
578 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
581 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
582 cp = (unsigned char *)*argv;
585 if(x0201_f == WISH_TRUE)
586 x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
588 if (binmode_f == TRUE)
590 if (freopen("","wb",stdout) == NULL)
597 setbuf(stdout, (char *) NULL);
599 setvbuffer(stdout, stdobuf, IOBUF_SIZE);
602 if (binmode_f == TRUE)
604 if (freopen("","rb",stdin) == NULL) return (-1);
608 setvbuffer(stdin, stdibuf, IOBUF_SIZE);
612 kanji_convert(stdin);
618 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
627 /* reopen file for stdout */
628 if (file_out == TRUE) {
631 outfname = malloc(strlen(origfname)
632 + strlen(".nkftmpXXXXXX")
638 strcpy(outfname, origfname);
642 for (i = strlen(outfname); i; --i){
643 if (outfname[i - 1] == '/'
644 || outfname[i - 1] == '\\'){
650 strcat(outfname, "ntXXXXXX");
652 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC,
655 strcat(outfname, ".nkftmpXXXXXX");
656 fd = mkstemp(outfname);
659 || (fd_backup = dup(fileno(stdout))) < 0
660 || dup2(fd, fileno(stdout)) < 0
671 outfname = "nkf.out";
674 if(freopen(outfname, "w", stdout) == NULL) {
678 if (binmode_f == TRUE) {
680 if (freopen("","wb",stdout) == NULL)
687 if (binmode_f == TRUE)
689 if (freopen("","rb",fin) == NULL)
694 setvbuffer(fin, stdibuf, IOBUF_SIZE);
711 if (dup2(fd_backup, fileno(stdout)) < 0){
714 if (stat(origfname, &sb)) {
715 fprintf(stderr, "Can't stat %s\n", origfname);
717 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
718 if (chmod(outfname, sb.st_mode)) {
719 fprintf(stderr, "Can't set permission %s\n", outfname);
722 tb[0] = tb[1] = sb.st_mtime;
723 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
724 if (utime(outfname, tb)) {
725 fprintf(stderr, "Can't set timestamp %s\n", outfname);
728 if (unlink(origfname)){
732 tb.actime = sb.st_atime;
733 tb.modtime = sb.st_mtime;
734 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
735 if (utime(outfname, &tb)) {
736 fprintf(stderr, "Can't set timestamp %s\n", outfname);
739 if (rename(outfname, origfname)) {
741 fprintf(stderr, "Can't rename %s to %s\n",
742 outfname, origfname);
750 #ifdef EASYWIN /*Easy Win */
751 if (file_out == FALSE)
752 scanf("%d",&end_check);
755 #else /* for Other OS */
756 if (file_out == TRUE)
786 {"katakana-hiragana","h3"},
787 #ifdef UTF8_OUTPUT_ENABLE
791 #ifdef UTF8_INPUT_ENABLE
793 {"utf16-input", "W16"},
798 #ifdef CAP_URL_OPTION
808 static int option_mode;
823 case '-': /* literal options */
824 if (!*cp) { /* ignore the rest of arguments */
828 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
830 p = (unsigned char *)long_option[i].name;
831 for (j=0;*p && *p++ == cp[j];j++);
832 if (! *p && !cp[j]) break;
835 cp = (unsigned char *)long_option[i].alias;
838 if (strcmp(long_option[i].name, "overwrite") == 0){
844 #ifdef CAP_URL_OPTION
845 if (strcmp(long_option[i].name, "cap-input") == 0){
849 if (strcmp(long_option[i].name, "url-input") == 0){
855 if (strcmp(long_option[i].name, "no-output") == 0){
859 if (strcmp(long_option[i].name, "debug") == 0){
866 case 'b': /* buffered mode */
869 case 'u': /* non bufferd mode */
872 case 't': /* transparent mode */
875 case 'j': /* JIS output */
877 output_conv = j_oconv;
879 case 'e': /* AT&T EUC output */
880 output_conv = e_oconv;
882 case 's': /* SJIS output */
883 output_conv = s_oconv;
885 case 'l': /* ISO8859 Latin-1 support, no conversion */
886 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
887 input_f = LATIN1_INPUT;
889 case 'i': /* Kanji IN ESC-$-@/B */
890 if (*cp=='@'||*cp=='B')
893 case 'o': /* ASCII IN ESC-(-J/B */
894 if (*cp=='J'||*cp=='B'||*cp=='H')
902 if ('9'>= *cp && *cp>='0')
903 hira_f |= (*cp++ -'0');
910 #if defined(MSDOS) || defined(__OS2__)
925 #ifdef UTF8_OUTPUT_ENABLE
926 case 'w': /* UTF-8 output */
927 if ('1'== cp[0] && '6'==cp[1]) {
928 output_conv = w_oconv16; cp+=2;
930 w_oconv16_begin_f=2; cp++;
933 output_conv = w_oconv;
936 #ifdef UTF8_INPUT_ENABLE
937 case 'W': /* UTF-8 input */
938 if ('1'== cp[0] && '6'==cp[1]) {
939 input_f = UTF16_INPUT;
941 input_f = UTF8_INPUT;
944 /* Input code assumption */
945 case 'J': /* JIS input */
946 case 'E': /* AT&T EUC input */
949 case 'S': /* MS Kanji input */
950 input_f = SJIS_INPUT;
951 if (x0201_f==NO_X0201) x0201_f=TRUE;
953 case 'Z': /* Convert X0208 alphabet to asii */
954 /* bit:0 Convert X0208
955 bit:1 Convert Kankaku to one space
956 bit:2 Convert Kankaku to two spaces
957 bit:3 Convert HTML Entity
959 if ('9'>= *cp && *cp>='0')
960 alpha_f |= 1<<(*cp++ -'0');
964 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
965 x0201_f = FALSE; /* No X0201->X0208 conversion */
967 ESC-(-I in JIS, EUC, MS Kanji
968 SI/SO in JIS, EUC, MS Kanji
969 SSO in EUC, JIS, not in MS Kanji
972 ESC-(-I in JIS (0x20-0x5f)
973 SSO in EUC (0xa0-0xdf)
974 0xa0-0xd in MS Kanji (0xa0-0xdf)
977 case 'X': /* Assume X0201 kana */
978 /* Default value is NO_X0201 for EUC/MS-Kanji mix */
981 case 'F': /* prserve new lines */
982 fold_preserve_f = TRUE;
983 case 'f': /* folding -f60 or -f */
986 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
988 fold_len += *cp++ - '0';
990 if (!(0<fold_len && fold_len<BUFSIZ))
991 fold_len = DEFAULT_FOLD;
995 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
997 fold_margin += *cp++ - '0';
1001 case 'm': /* MIME support */
1002 if (*cp=='B'||*cp=='Q') {
1003 mime_decode_mode = *cp++;
1004 mimebuf_f = FIXED_MIME;
1005 } else if (*cp=='N') {
1006 mime_f = TRUE; cp++;
1007 } else if (*cp=='S') {
1008 mime_f = STRICT_MIME; cp++;
1009 } else if (*cp=='0') {
1010 mime_f = FALSE; cp++;
1013 case 'M': /* MIME output */
1016 mimeout_f = FIXED_MIME; cp++;
1017 } else if (*cp=='Q') {
1019 mimeout_f = FIXED_MIME; cp++;
1024 case 'B': /* Broken JIS support */
1026 bit:1 allow any x on ESC-(-x or ESC-$-x
1027 bit:2 reset to ascii on NL
1029 if ('9'>= *cp && *cp>='0')
1030 broken_f |= 1<<(*cp++ -'0');
1035 case 'O':/* for Output file */
1039 case 'c':/* add cr code */
1042 case 'd':/* delete cr code */
1045 case 'I': /* ISO-2022-JP output */
1048 case 'L': /* line mode */
1049 if (*cp=='u') { /* unix */
1050 crmode_f = NL; cp++;
1051 } else if (*cp=='m') { /* mac */
1052 crmode_f = CR; cp++;
1053 } else if (*cp=='w') { /* windows */
1054 crmode_f = CRLF; cp++;
1055 } else if (*cp=='0') { /* no conversion */
1060 /* module muliple options in a string are allowed for Perl moudle */
1061 while(*cp && *cp!='-') cp++;
1065 /* bogus option but ignored */
1071 #ifdef ANSI_C_PROTOTYPE
1072 void set_iconv(int f, int (*iconv_func)(int c2,int c1,int c0))
1074 void set_iconv(f, iconv_func)
1076 int (*iconv_func)();
1080 static int (*iconv_for_check)() = 0;
1082 #ifdef INPUT_CODE_FIX
1090 #ifdef INPUT_CODE_FIX
1091 && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1097 if (estab_f && iconv_for_check != iconv){
1098 #ifdef UTF8_INPUT_ENABLE
1099 if (iconv == w_iconv) debug("UTF-8\n");
1100 if (iconv == w_iconv16) debug("UTF-16\n");
1102 if (iconv == s_iconv) debug("Shift_JIS\n");
1103 if (iconv == e_iconv) debug("EUC-JP\n");
1104 iconv_for_check = iconv;
1109 #define SCORE_DEPEND (1) /*
\e$B5!<o0MB8J8;z
\e(B */
1110 #define SCORE_NO_EXIST (SCORE_DEPEND << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1111 #define SCORE_ERROR (SCORE_NO_EXIST << 1) /*
\e$B%(%i!<
\e(B */
1112 int score_table_A0[] = {
1115 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1116 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1119 int score_table_F0[] = {
1121 0, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1122 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1123 SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1126 int code_score(c2, c1, s)
1131 }else if ((c2 & 0xf0) == 0xa0){
1132 s |= score_table_A0[c2 & 0x0f];
1133 }else if ((c2 & 0xf0) == 0xf0){
1134 s |= score_table_F0[c2 & 0x0f];
1136 #ifdef UTF8_OUTPUT_ENABLE
1137 else if (!e2w_conv(c2, c1)){
1138 s |= SCORE_NO_EXIST;
1150 if (c <= DEL && estab_f){
1157 || (0xa1 <= c && c <= 0xef && iconv == s_iconv)){
1159 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
1164 s_score = code_score(-1, 0, s_score);
1165 if (iconv == s_iconv) set_iconv(FALSE, 0);
1169 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfd)){
1172 s2e_conv(s_buf[1], s_buf[0], &s_buf[1], &s_buf[0]);
1173 s_score = code_score(s_buf[1], s_buf[0], s_score);
1176 s_score = code_score(-1, 0, s_score);
1177 if (iconv == s_iconv) set_iconv(FALSE, 0);
1183 if (c <= DEL && estab_f){
1191 }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
1196 e_score = code_score(-1, 0, e_score);
1197 if (iconv == e_iconv) set_iconv(FALSE, 0);
1201 if (0xa1 <= c && c <= 0xfe){
1204 e_score = code_score(e_buf[1], e_buf[0], e_score);
1207 if (iconv == e_iconv) set_iconv(FALSE, 0);
1208 e_score = code_score(-1, 0, e_score);
1212 #ifdef UTF8_INPUT_ENABLE
1215 if (c <= DEL && estab_f){
1223 }else if (0xc0 <= c && c <= 0xdf){
1227 }else if (0xe0 <= c && c <= 0xef){
1232 w_score = code_score(-1, 0, w_score);
1233 if (iconv == w_iconv) set_iconv(FALSE, 0);
1238 if (0x80 <= c && c <= 0xbf){
1243 w2e_conv(w_buf[2], w_buf[1], w_buf[0],
1244 &w_buf[1], &w_buf[0]);
1246 w2e_conv(w_buf[1], w_buf[0], 0,
1247 &w_buf[1], &w_buf[0]);
1249 w_score = code_score(w_buf[1], w_buf[0], w_score);
1253 w_score = code_score(-1, 0, w_score);
1254 if (iconv == w_iconv) set_iconv(FALSE, 0);
1259 if (s_stat < 0 && e_stat < 0 && w_stat == 0){
1260 set_iconv(TRUE, w_iconv);
1263 if (s_stat == 0 && e_stat < 0 && w_stat < 0){
1264 set_iconv(TRUE, s_iconv);
1266 if (s_stat < 0 && e_stat == 0 && w_stat < 0){
1267 set_iconv(TRUE, e_iconv);
1269 if (s_stat < 0 && e_stat < 0 && w_stat < 0){
1271 s_stat = e_stat = 0;
1272 #ifdef UTF8_INPUT_ENABLE
1308 while ((c = (*i_getc)(f)) != EOF)
1317 oconv = output_conv;
1320 /* replace continucation module, from output side */
1322 /* output redicrection */
1331 if (mimeout_f == TRUE) {
1332 o_base64conv = oconv; oconv = base64_conv;
1334 /* base64_count = 0; */
1338 o_crconv = oconv; oconv = cr_conv;
1341 o_rot_conv = oconv; oconv = rot_conv;
1344 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
1347 o_hira_conv = oconv; oconv = hira_conv;
1350 o_fconv = oconv; oconv = fold_conv;
1353 if (alpha_f || x0201_f) {
1354 o_zconv = oconv; oconv = z_conv;
1358 /* input redicrection */
1359 #ifdef CAP_URL_OPTION
1361 i_cgetc = i_getc; i_getc = cap_getc;
1362 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
1365 i_ugetc = i_getc; i_getc = url_getc;
1366 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
1369 if (mime_f && mimebuf_f==FIXED_MIME) {
1370 i_mgetc = i_getc; i_getc = mime_getc;
1371 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
1374 i_bgetc = i_getc; i_getc = broken_getc;
1375 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
1377 if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
1378 set_iconv(-TRUE, e_iconv);
1379 } else if (input_f == SJIS_INPUT) {
1380 set_iconv(-TRUE, s_iconv);
1381 #ifdef UTF8_INPUT_ENABLE
1382 } else if (input_f == UTF8_INPUT) {
1383 set_iconv(-TRUE, w_iconv);
1384 } else if (input_f == UTF16_INPUT) {
1385 set_iconv(-TRUE, w_iconv16);
1388 set_iconv(FALSE, e_iconv);
1393 #ifdef UTF8_INPUT_ENABLE
1401 Conversion main loop. Code detection only.
1411 module_connection();
1416 output_mode = ASCII;
1419 #define NEXT continue /* no output, get next */
1420 #define SEND ; /* output c1 and c2, get next */
1421 #define LAST break /* end of loop, go closing */
1423 while ((c1 = (*i_getc)(f)) != EOF) {
1428 /* in case of 8th bit is on */
1430 /* in case of not established yet */
1431 /* It is still ambiguious */
1432 if (h_conv(f, c2, c1)==EOF)
1438 /* in case of already established */
1440 /* ignore bogus code */
1446 /* second byte, 7 bit code */
1447 /* it might be kanji shitfted */
1448 if ((c1 == DEL) || (c1 <= SPACE)) {
1449 /* ignore bogus first code */
1457 #ifdef UTF8_INPUT_ENABLE
1466 } else if (c1 > DEL) {
1468 if (!estab_f && !iso8859_f) {
1469 /* not established yet */
1472 } else { /* estab_f==TRUE */
1477 } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
1478 /* SJIS X0201 Case... */
1479 if(iso2022jp_f && x0201_f==NO_X0201) {
1480 (*oconv)(GETA1, GETA2);
1487 } else if (c1==SSO && iconv != s_iconv) {
1488 /* EUC X0201 Case */
1489 c1 = (*i_getc)(f); /* skip SSO */
1491 if (SSP<=c1 && c1<0xe0) {
1492 if(iso2022jp_f && x0201_f==NO_X0201) {
1493 (*oconv)(GETA1, GETA2);
1500 } else { /* bogus code, skip SSO and one byte */
1504 /* already established */
1509 } else if ((c1 > SPACE) && (c1 != DEL)) {
1510 /* in case of Roman characters */
1512 /* output 1 shifted byte */
1516 } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
1517 /* output 1 shifted byte */
1518 if(iso2022jp_f && x0201_f==NO_X0201) {
1519 (*oconv)(GETA1, GETA2);
1526 /* look like bogus code */
1529 } else if (input_mode == X0208) {
1530 /* in case of Kanji shifted */
1533 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
1534 /* Check MIME code */
1535 if ((c1 = (*i_getc)(f)) == EOF) {
1538 } else if (c1 == '?') {
1539 /* =? is mime conversion start sequence */
1540 if(mime_f == STRICT_MIME) {
1541 /* check in real detail */
1542 if (mime_begin_strict(f) == EOF)
1546 } else if (mime_begin(f) == EOF)
1556 /* normal ASCII code */
1559 } else if (c1 == SI) {
1562 } else if (c1 == SO) {
1565 } else if (c1 == ESC ) {
1566 if ((c1 = (*i_getc)(f)) == EOF) {
1567 /* (*oconv)(0, ESC); don't send bogus code */
1569 } else if (c1 == '$') {
1570 if ((c1 = (*i_getc)(f)) == EOF) {
1572 (*oconv)(0, ESC); don't send bogus code
1573 (*oconv)(0, '$'); */
1575 } else if (c1 == '@'|| c1 == 'B') {
1576 /* This is kanji introduction */
1580 } else if (c1 == '(') {
1581 if ((c1 = (*i_getc)(f)) == EOF) {
1582 /* don't send bogus code
1588 } else if (c1 == '@'|| c1 == 'B') {
1589 /* This is kanji introduction */
1594 /* could be some special code */
1601 } else if (broken_f&0x2) {
1602 /* accept any ESC-(-x as broken code ... */
1612 } else if (c1 == '(') {
1613 if ((c1 = (*i_getc)(f)) == EOF) {
1614 /* don't send bogus code
1616 (*oconv)(0, '('); */
1620 /* This is X0201 kana introduction */
1621 input_mode = X0201; shift_mode = X0201;
1623 } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
1624 /* This is X0208 kanji introduction */
1625 input_mode = ASCII; shift_mode = FALSE;
1627 } else if (broken_f&0x2) {
1628 input_mode = ASCII; shift_mode = FALSE;
1633 /* maintain various input_mode here */
1637 } else if ( c1 == 'N' || c1 == 'n' ){
1639 c1 = (*i_getc)(f); /* skip SS2 */
1640 if ( SPACE<=c1 && c1 < 0xe0 ) {
1649 } else if ((c1 == NL || c1 == CR) && broken_f&4) {
1650 input_mode = ASCII; set_iconv(FALSE, 0);
1656 if (input_mode == X0208)
1657 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
1658 else if (input_mode)
1659 (*oconv)(input_mode, c1); /* other special case */
1660 else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */
1661 int c0 = (*i_getc)(f);
1664 (*iconv)(c2, c1, c0);
1670 /* goto next_word */
1674 (*iconv)(EOF, 0, 0);
1687 /** it must NOT be in the kanji shifte sequence */
1688 /** it must NOT be written in JIS7 */
1689 /** and it must be after 2 byte 8bit code */
1696 while ((c1 = (*i_getc)(f)) != EOF) {
1702 if (push_hold_buf(c1) == EOF || estab_f){
1708 if (e_score <= s_score
1709 #ifdef UTF8_INPUT_ENABLE
1710 && e_score <= w_score
1713 set_iconv(FALSE, e_iconv);
1715 else if (s_score <= e_score
1716 #ifdef UTF8_INPUT_ENABLE
1717 && s_score <= w_score
1720 set_iconv(FALSE, s_iconv);
1722 #ifdef UTF8_INPUT_ENABLE
1724 set_iconv(FALSE, w_iconv);
1731 ** 1) EOF is detected, or
1732 ** 2) Code is established, or
1733 ** 3) Buffer is FULL (but last word is pushed)
1735 ** in 1) and 3) cases, we continue to use
1736 ** Kanji codes by oconv and leave estab_f unchanged.
1740 while (wc < hold_count){
1741 c2 = hold_buf[wc++];
1745 }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
1746 (*iconv)(X0201, c2, 0);
1749 if (wc < hold_count){
1750 c1 = hold_buf[wc++];
1753 if (c1 == EOF) break;
1756 if ((*iconv)(c2, c1, 0) < 0){
1758 if (wc < hold_count){
1759 c0 = hold_buf[wc++];
1762 if (c0 == EOF) break;
1765 (*iconv)(c2, c1, c0);
1779 if (hold_count >= HOLD_SIZE*2)
1781 hold_buf[hold_count++] = c2;
1782 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
1785 int s2e_conv(c2, c1, p2, p1)
1789 c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
1791 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
1798 return (c2 << 8) | c1;
1808 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
1811 s2e_conv(c2, c1, &c2, &c1);
1824 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
1834 #ifdef UTF8_INPUT_ENABLE
1836 w2e_conv(c2, c1, c0, p2, p1)
1840 extern unsigned short * utf8_to_euc_2bytes[];
1841 extern unsigned short ** utf8_to_euc_3bytes[];
1843 if (0xc0 <= c2 && c2 <= 0xef) {
1844 unsigned short **pp;
1847 if (c0 == 0) return -1;
1848 pp = utf8_to_euc_3bytes[c2 - 0x80];
1849 return w_iconv_common(c1, c0, pp, sizeof_utf8_to_euc_C2, p2, p1);
1851 return w_iconv_common(c2, c1, utf8_to_euc_2bytes, sizeof_utf8_to_euc_2bytes, p2, p1);
1853 } else if (c2 == X0201) {
1866 int ret = w2e_conv(c2, c1, c0, &c2, &c1);
1874 w_iconv16(c2, c1, c0)
1877 extern unsigned short * utf8_to_euc_2bytes[];
1878 extern unsigned short ** utf8_to_euc_3bytes[];
1879 unsigned short **pp;
1884 if (c2==0376 && c1==0377){
1885 utf16_mode = UTF16_INPUT;
1887 } else if (c2==0377 && c1==0376){
1888 utf16_mode = UTF16BE_INPUT;
1891 if (utf16_mode == UTF16BE_INPUT) {
1893 tmp=c1; c1=c2; c2=tmp;
1895 if (c2==0 || c2==EOF) {
1899 val = ((c2<<8)&0xff00) + c1;
1901 c0 = (0x80 | (c1 & 0x3f));
1902 c1 = (0xc0 | (val >> 6));
1903 pp = utf8_to_euc_2bytes;
1904 psize = sizeof_utf8_to_euc_2bytes;
1906 c0 = (0x80 | (c1 & 0x3f));
1907 c2 = (0xe0 | (val >> 12));
1908 c1 = (0x80 | ((val >> 6) & 0x3f));
1909 if (c0 == 0) return -1;
1910 if (0<=c2-0x80 && c2-0x80 <sizeof_utf8_to_euc_3bytes){
1911 pp = utf8_to_euc_3bytes[c2 - 0x80];
1912 psize = sizeof_utf8_to_euc_C2;
1917 ret = w_iconv_common(c1, c0, pp, psize, &c2, &c1);
1918 if (ret) return ret;
1924 w_iconv_common(c1, c0, pp, psize, p2, p1)
1926 unsigned short **pp;
1934 if (pp == 0) return 1;
1937 if (c1 < 0 || psize <= c1) return 1;
1939 if (p == 0) return 1;
1942 if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1;
1944 if (val == 0) return 1;
1947 if (c2 == SO) c2 = X0201;
1956 #ifdef UTF8_OUTPUT_ENABLE
1961 extern unsigned short euc_to_utf8_1byte[];
1962 extern unsigned short * euc_to_utf8_2bytes[];
1966 p = euc_to_utf8_1byte;
1969 c2 = (c2&0x7f) - 0x21;
1970 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
1971 p = euc_to_utf8_2bytes[c2];
1976 c1 = (c1 & 0x7f) - 0x21;
1977 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
1990 } else if (c2 == 0) {
1991 output_mode = ASCII;
1993 } else if (c2 == ISO8859_1) {
1994 output_mode = ISO8859_1;
1995 (*o_putc)(c1 | 0x080);
1997 unsigned short val = (unsigned short)e2w_conv(c2, c1);
2000 if (0 < val && val < 0x80){
2002 }else if (val < 0x800){
2003 (*o_putc)(0xc0 | (val >> 6));
2004 (*o_putc)(0x80 | (val & 0x3f));
2006 (*o_putc)(0xe0 | (val >> 12));
2007 (*o_putc)(0x80 | ((val >> 6) & 0x3f));
2008 (*o_putc)(0x80 | (val & 0x3f));
2019 if (w_oconv16_begin_f==2) {
2022 w_oconv16_begin_f=1;
2027 } else if (c2 == 0) {
2030 } else if (c2 == ISO8859_1) {
2032 (*o_putc)(c1 | 0x080);
2034 unsigned short val = (unsigned short)e2w_conv(c2, c1);
2035 (*o_putc)((val&0xff00)>>8);
2036 (*o_putc)(val&0xff);
2050 } else if (c2 == 0) {
2051 output_mode = ASCII;
2053 } else if (c2 == X0201) {
2054 output_mode = JAPANESE_EUC;
2055 (*o_putc)(SSO); (*o_putc)(c1|0x80);
2056 } else if (c2 == ISO8859_1) {
2057 output_mode = ISO8859_1;
2058 (*o_putc)(c1 | 0x080);
2060 if ((c1<0x20 || 0x7e<c1) ||
2061 (c2<0x20 || 0x7e<c2)) {
2062 set_iconv(FALSE, 0);
2063 return; /* too late to rescue this char */
2065 output_mode = JAPANESE_EUC;
2066 (*o_putc)(c2 | 0x080);
2067 (*o_putc)(c1 | 0x080);
2081 } else if (c2 == 0) {
2082 output_mode = ASCII;
2084 } else if (c2 == X0201) {
2085 output_mode = SHIFT_JIS;
2087 } else if (c2 == ISO8859_1) {
2088 output_mode = ISO8859_1;
2089 (*o_putc)(c1 | 0x080);
2091 if ((c1<0x20 || 0x7e<c1) ||
2092 (c2<0x20 || 0x7e<c2)) {
2093 set_iconv(FALSE, 0);
2094 return; /* too late to rescue this char */
2096 output_mode = SHIFT_JIS;
2097 (*o_putc)((((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1)));
2098 (*o_putc)((c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e)));
2108 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2111 (*o_putc)(ascii_intro);
2112 output_mode = ASCII;
2115 } else if (c2==X0201) {
2116 if (output_mode!=X0201) {
2117 output_mode = X0201;
2123 } else if (c2==ISO8859_1) {
2124 /* iso8859 introduction, or 8th bit on */
2125 /* Can we convert in 7bit form using ESC-'-'-A ?
2127 output_mode = ISO8859_1;
2129 } else if (c2 == 0) {
2130 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2133 (*o_putc)(ascii_intro);
2134 output_mode = ASCII;
2138 if (output_mode != X0208) {
2139 output_mode = X0208;
2142 (*o_putc)(kanji_intro);
2144 if (c1<0x20 || 0x7e<c1)
2146 if (c2<0x20 || 0x7e<c2)
2158 if (base64_count>50 && !mimeout_mode && c2==0 && c1==SPACE) {
2160 } else if (base64_count>66 && mimeout_mode) {
2161 (*o_base64conv)(EOF,0);
2163 (*o_putc)('\t'); base64_count += 7;
2165 (*o_base64conv)(c2,c1);
2169 static int broken_buf[3];
2170 static int broken_counter = 0;
2171 static int broken_last = 0;
2178 if (broken_counter>0) {
2179 return broken_buf[--broken_counter];
2182 if (c=='$' && broken_last != ESC
2183 && (input_mode==ASCII || input_mode==X0201)) {
2186 if (c1=='@'|| c1=='B') {
2187 broken_buf[0]=c1; broken_buf[1]=c;
2194 } else if (c=='(' && broken_last != ESC
2195 && (input_mode==X0208 || input_mode==X0201)) { /* ) */
2198 if (c1=='J'|| c1=='B') {
2199 broken_buf[0]=c1; broken_buf[1]=c;
2217 if (broken_counter<2)
2218 broken_buf[broken_counter++]=c;
2222 static int prev_cr = 0;
2230 if (! (c2==0&&c1==NL) ) {
2236 } else if (c1=='\r') {
2238 } else if (c1=='\n') {
2239 if (crmode_f==CRLF) {
2240 (*o_crconv)(0,'\r');
2241 } else if (crmode_f==CR) {
2242 (*o_crconv)(0,'\r');
2246 } else if (c1!='\032' || crmode_f!=NL){
2252 Return value of fold_conv()
2254 \n add newline and output char
2255 \r add newline and output nothing
2258 1 (or else) normal output
2260 fold state in prev (previous character)
2262 >0x80 Japanese (X0208/X0201)
2267 This fold algorthm does not preserve heading space in a line.
2268 This is the main difference from fmt.
2271 #define char_size(c2,c1) (c2?2:1)
2281 fold_state=0; /* ignroe cr */
2282 } else if (c1== BS) {
2283 if (f_line>0) f_line--;
2285 } else if (c2==EOF && f_line != 0) { /* close open last line */
2287 } else if (c1=='\n') {
2289 if (fold_preserve_f) {
2292 } else if (f_prev == c1) { /* duplicate newline */
2295 fold_state = '\n'; /* output two newline */
2301 if (f_prev&0x80) { /* Japanese? */
2303 fold_state = 0; /* ignore given single newline */
2304 } else if (f_prev==' ') {
2308 if (++f_line<=fold_len)
2312 fold_state = '\r'; /* fold and output nothing */
2316 } else if (c1=='\f') {
2321 fold_state = '\n'; /* output newline and clear */
2322 } else if ( (c2==0 && c1==' ')||
2323 (c2==0 && c1=='\t')||
2324 (c2=='!'&& c1=='!')) {
2325 /* X0208 kankaku or ascii space */
2326 if (f_prev == ' ') {
2327 fold_state = 0; /* remove duplicate spaces */
2330 if (++f_line<=fold_len)
2331 fold_state = ' '; /* output ASCII space only */
2333 f_prev = ' '; f_line = 0;
2334 fold_state = '\r'; /* fold and output nothing */
2338 prev0 = f_prev; /* we still need this one... , but almost done */
2340 if (c2 || c2==X0201)
2341 f_prev |= 0x80; /* this is Japanese */
2342 f_line += char_size(c2,c1);
2343 if (f_line<=fold_len) { /* normal case */
2346 if (f_line>=fold_len+fold_margin) { /* too many kinsou suspension */
2347 f_line = char_size(c2,c1);
2348 fold_state = '\n'; /* We can't wait, do fold now */
2349 } else if (c2==X0201) {
2350 /* simple kinsoku rules return 1 means no folding */
2351 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
2352 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
2353 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
2354 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
2355 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
2356 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
2357 else if (SPACE<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
2359 fold_state = '\n';/* add one new f_line before this character */
2362 fold_state = '\n';/* add one new f_line before this character */
2365 /* kinsoku point in ASCII */
2366 if ( c1==')'|| /* { [ ( */
2377 /* just after special */
2378 } else if (!is_alnum(prev0)) {
2379 f_line = char_size(c2,c1);
2381 } else if ((prev0==' ') || /* ignored new f_line */
2382 (prev0=='\n')|| /* ignored new f_line */
2383 (prev0&0x80)) { /* X0208 - ASCII */
2384 f_line = char_size(c2,c1);
2385 fold_state = '\n';/* add one new f_line before this character */
2387 fold_state = 1; /* default no fold in ASCII */
2391 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
2392 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
2393 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
2394 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
2395 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
2396 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
2397 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
2398 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
2399 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
2400 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
2401 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
2402 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
2403 /* default no fold in kinsoku */
2406 f_line = char_size(c2,c1);
2407 /* add one new f_line before this character */
2410 f_line = char_size(c2,c1);
2412 /* add one new f_line before this character */
2417 /* terminator process */
2418 switch(fold_state) {
2437 int z_prev2=0,z_prev1=0;
2444 /* if (c2) c1 &= 0x7f; assertion */
2446 if (x0201_f && z_prev2==X0201) { /* X0201 */
2447 if (c1==(0xde&0x7f)) { /*
\e$BByE@
\e(B */
2449 (*o_zconv)(dv[(z_prev1-SPACE)*2],dv[(z_prev1-SPACE)*2+1]);
2451 } else if (c1==(0xdf&0x7f)&&ev[(z_prev1-SPACE)*2]) { /*
\e$BH>ByE@
\e(B */
2453 (*o_zconv)(ev[(z_prev1-SPACE)*2],ev[(z_prev1-SPACE)*2+1]);
2457 (*o_zconv)(cv[(z_prev1-SPACE)*2],cv[(z_prev1-SPACE)*2+1]);
2466 if (x0201_f && c2==X0201) {
2467 if (dv[(c1-SPACE)*2]||ev[(c1-SPACE)*2]) {
2468 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
2469 z_prev1 = c1; z_prev2 = c2;
2472 (*o_zconv)(cv[(c1-SPACE)*2],cv[(c1-SPACE)*2+1]);
2477 /* JISX0208 Alphabet */
2478 if (alpha_f && c2 == 0x23 ) {
2480 } else if (alpha_f && c2 == 0x21 ) {
2481 /* JISX0208 Kigou */
2486 } else if (alpha_f&0x4) {
2491 } else if (0x20<c1 && c1<0x7f && fv[c1-0x20]) {
2497 case '>': entity = ">"; break;
2498 case '<': entity = "<"; break;
2499 case '\"': entity = """; break;
2500 case '&': entity = "&"; break;
2503 while (*entity) (*o_zconv)(0, *entity++);
2513 #define rot13(c) ( \
2515 (c <= 'M') ? (c + 13): \
2516 (c <= 'Z') ? (c - 13): \
2518 (c <= 'm') ? (c + 13): \
2519 (c <= 'z') ? (c - 13): \
2523 #define rot47(c) ( \
2525 ( c <= 'O' ) ? (c + 47) : \
2526 ( c <= '~' ) ? (c - 47) : \
2534 if (c2==0 || c2==X0201 || c2==ISO8859_1) {
2540 (*o_rot_conv)(c2,c1);
2547 if ((hira_f & 1) && c2==0x25 && 0x20<c1 && c1<0x74) {
2549 } else if ((hira_f & 2) && c2==0x24 && 0x20<c1 && c1<0x74) {
2552 (*o_hira_conv)(c2,c1);
2557 iso2022jp_check_conv(c2,c1)
2560 static int range[RANGE_NUM_MAX][2] = {
2583 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
2587 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
2592 for (i = 0; i < RANGE_NUM_MAX; i++) {
2593 start = range[i][0];
2596 if (c >= start && c <= end) {
2601 (*o_iso2022jp_check_conv)(c2,c1);
2605 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
2607 unsigned char *mime_pattern[] = {
2608 (unsigned char *)"\075?EUC-JP?B?",
2609 (unsigned char *)"\075?SHIFT_JIS?B?",
2610 (unsigned char *)"\075?ISO-8859-1?Q?",
2611 (unsigned char *)"\075?ISO-2022-JP?B?",
2612 (unsigned char *)"\075?ISO-2022-JP?Q?",
2613 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
2614 (unsigned char *)"\075?UTF-8?B?",
2619 int mime_encode[] = {
2620 JAPANESE_EUC, SHIFT_JIS,ISO8859_1, X0208, X0201,
2621 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
2627 int mime_encode_method[] = {
2628 'B', 'B','Q', 'B', 'Q',
2629 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
2636 #define MAXRECOVER 20
2638 /* I don't trust portablity of toupper */
2639 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
2640 #define nkf_isdigit(c) ('0'<=c && c<='9')
2641 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
2646 if (i_getc!=mime_getc) {
2647 i_mgetc = i_getc; i_getc = mime_getc;
2648 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2649 if(mime_f==STRICT_MIME) {
2650 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
2651 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
2657 unswitch_mime_getc()
2659 if(mime_f==STRICT_MIME) {
2660 i_mgetc = i_mgetc_buf;
2661 i_mungetc = i_mungetc_buf;
2664 i_ungetc = i_mungetc;
2668 mime_begin_strict(f)
2673 unsigned char *p,*q;
2674 int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
2676 mime_decode_mode = FALSE;
2677 /* =? has been checked */
2679 p = mime_pattern[j];
2682 for(i=2;p[i]>' ';i++) { /* start at =? */
2683 if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) {
2684 /* pattern fails, try next one */
2686 while ((p = mime_pattern[++j])) {
2687 for(k=2;k<i;k++) /* assume length(p) > i */
2688 if (p[k]!=q[k]) break;
2689 if (k==i && nkf_toupper(c1)==p[k]) break;
2691 if (p) continue; /* found next one, continue */
2692 /* all fails, output from recovery buffer */
2700 mime_decode_mode = p[i-2];
2701 if (mime_decode_mode=='B') {
2702 mimebuf_f = unbuf_f;
2704 /* do MIME integrity check */
2705 return mime_integrity(f,mime_pattern[j]);
2717 /* we don't keep eof of Fifo, becase it contains ?= as
2718 a terminator. It was checked in mime_integrity. */
2719 return ((mimebuf_f)?
2720 (*i_mgetc_buf)(f):Fifo(mime_input++));
2724 mime_ungetc_buf(c,f)
2729 (*i_mungetc_buf)(c,f);
2731 Fifo(--mime_input)=c;
2742 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
2743 /* re-read and convert again from mime_buffer. */
2745 /* =? has been checked */
2747 Fifo(mime_last++)='='; Fifo(mime_last++)='?';
2748 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
2749 /* We accept any character type even if it is breaked by new lines */
2750 c1 = (*i_getc)(f); Fifo(mime_last++)= c1 ;
2751 if (c1=='\n'||c1==' '||c1=='\r'||
2752 c1=='-'||c1=='_'||is_alnum(c1) ) continue;
2754 /* Failed. But this could be another MIME preemble */
2762 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
2763 if (!(++i<MAXRECOVER) || c1==EOF) break;
2764 if (c1=='b'||c1=='B') {
2765 mime_decode_mode = 'B';
2766 } else if (c1=='q'||c1=='Q') {
2767 mime_decode_mode = 'Q';
2771 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
2772 if (!(++i<MAXRECOVER) || c1==EOF) break;
2774 mime_decode_mode = FALSE;
2780 if (!mime_decode_mode) {
2781 /* false MIME premble, restart from mime_buffer */
2782 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
2783 /* Since we are in MIME mode until buffer becomes empty, */
2784 /* we never go into mime_begin again for a while. */
2787 /* discard mime preemble, and goto MIME mode */
2789 /* do no MIME integrity check */
2790 return c1; /* used only for checking EOF */
2810 #ifdef CAP_URL_OPTION
2815 if (nkf_isdigit(x)) return x - '0';
2816 return nkf_toupper(x) - 'A' + 10;
2819 #ifdef ANSI_C_PROTOTYPE
2820 int hex_getc(int ch, FILE *f, int (*g)(FILE *f), int (*u)(int c, FILE *f))
2823 hex_getc(ch, f, g, u)
2836 if (!nkf_isxdigit(c2) == EOF){
2841 if (!nkf_isxdigit(c3) == EOF){
2846 return (hex2bin(c2) << 4) | hex2bin(c3);
2853 return hex_getc(':', f, i_cgetc, i_cungetc);
2861 return (*i_cungetc)(c, f);
2868 return hex_getc('%', f, i_ugetc, i_uungetc);
2876 return (*i_uungetc)(c, f);
2885 int c1, c2, c3, c4, cc;
2886 int t1, t2, t3, t4, mode, exit_mode;
2888 if (mime_top != mime_last) { /* Something is in FIFO */
2889 return Fifo(mime_top++);
2891 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
2892 mime_decode_mode=FALSE;
2893 unswitch_mime_getc();
2894 return (*i_getc)(f);
2897 if (mimebuf_f == FIXED_MIME)
2898 exit_mode = mime_decode_mode;
2901 if (mime_decode_mode == 'Q') {
2902 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
2904 if (c1=='_') return ' ';
2905 if (c1!='=' && c1!='?') {
2909 mime_decode_mode = exit_mode; /* prepare for quit */
2910 if (c1<=' ') return c1;
2911 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
2912 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
2913 /* end Q encoding */
2914 input_mode = exit_mode;
2915 while((c1=(*i_getc)(f))!=EOF && c1==SPACE
2916 /* && (c1==NL||c1==TAB||c1=='\r') */ ) ;
2919 if (c1=='='&&c2<' ') { /* this is soft wrap */
2920 while((c1 = (*i_mgetc)(f)) <=' ') {
2921 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
2923 mime_decode_mode = 'Q'; /* still in MIME */
2924 goto restart_mime_q;
2927 mime_decode_mode = 'Q'; /* still in MIME */
2931 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
2932 if (c2<=' ') return c2;
2933 mime_decode_mode = 'Q'; /* still in MIME */
2934 #define hex(c) (('0'<=c&&c<='9')?(c-'0'):\
2935 ('A'<=c&&c<='F')?(c-'A'+10):('a'<=c&&c<='f')?(c-'a'+10):0)
2936 return ((hex(c2)<<4) + hex(c3));
2939 if (mime_decode_mode != 'B') {
2940 mime_decode_mode = FALSE;
2941 return (*i_mgetc)(f);
2945 /* Base64 encoding */
2947 MIME allows line break in the middle of
2948 Base64, but we are very pessimistic in decoding
2949 in unbuf mode because MIME encoded code may broken by
2950 less or editor's control sequence (such as ESC-[-K in unbuffered
2951 mode. ignore incomplete MIME.
2953 mode = mime_decode_mode;
2954 mime_decode_mode = exit_mode; /* prepare for quit */
2956 while ((c1 = (*i_mgetc)(f))<=' ') {
2961 if ((c2 = (*i_mgetc)(f))<=' ') {
2964 if (mime_f != STRICT_MIME) goto mime_c2_retry;
2965 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
2968 if ((c1 == '?') && (c2 == '=')) {
2970 while((c1=(*i_getc)(f))!=EOF && c1==SPACE
2971 /* && (c1==NL||c1==TAB||c1=='\r') */ ) ;
2975 if ((c3 = (*i_mgetc)(f))<=' ') {
2978 if (mime_f != STRICT_MIME) goto mime_c3_retry;
2979 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
2983 if ((c4 = (*i_mgetc)(f))<=' ') {
2986 if (mime_f != STRICT_MIME) goto mime_c4_retry;
2987 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
2991 mime_decode_mode = mode; /* still in MIME sigh... */
2993 /* BASE 64 decoding */
2995 t1 = 0x3f & base64decode(c1);
2996 t2 = 0x3f & base64decode(c2);
2997 t3 = 0x3f & base64decode(c3);
2998 t4 = 0x3f & base64decode(c4);
2999 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
3001 Fifo(mime_last++) = cc;
3002 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
3004 Fifo(mime_last++) = cc;
3005 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
3007 Fifo(mime_last++) = cc;
3012 return Fifo(mime_top++);
3020 Fifo(--mime_top) = c;
3031 /* In buffered mode, read until =? or NL or buffer full
3033 mime_input = mime_top;
3034 mime_last = mime_top;
3035 while(*p) Fifo(mime_input++) = *p++;
3038 while((c=(*i_getc)(f))!=EOF) {
3039 if (((mime_input-mime_top)&MIME_BUF_MASK)==0) {
3040 break; /* buffer full */
3042 if (c=='=' && d=='?') {
3043 /* checked. skip header, start decode */
3044 Fifo(mime_input++) = c;
3045 /* mime_last_input = mime_input; */
3050 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
3052 /* Should we check length mod 4? */
3053 Fifo(mime_input++) = c;
3056 /* In case of Incomplete MIME, no MIME decode */
3057 Fifo(mime_input++) = c;
3058 mime_last = mime_input; /* point undecoded buffer */
3059 mime_decode_mode = 1; /* no decode on Fifo last in mime_getc */
3060 switch_mime_getc(); /* anyway we need buffered getc */
3071 i = c - 'A'; /* A..Z 0-25 */
3073 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
3075 } else if (c > '/') {
3076 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
3077 } else if (c == '+') {
3078 i = '>' /* 62 */ ; /* + 62 */
3080 i = '?' /* 63 */ ; /* / 63 */
3085 static char basis_64[] =
3086 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
3096 p = mime_pattern[0];
3097 for(i=0;mime_encode[i];i++) {
3098 if (mode == mime_encode[i]) {
3099 p = mime_pattern[i];
3103 mimeout_mode = mime_encode_method[i];
3105 /* (*o_mputc)(' '); */
3122 #define itoh4(c) (c>=10?c+'A'-10:c+'0')
3128 if (mimeout_f==FIXED_MIME) {
3129 if (base64_count>71) {
3137 if ( c<=DEL &&(output_mode==ASCII ||output_mode == ISO8859_1 )
3138 && mimeout_f!=FIXED_MIME) {
3139 if (mimeout_mode=='Q') {
3146 if (mimeout_mode!='B' || c!=SPACE) {
3155 } else if (!mimeout_mode && mimeout_f!=FIXED_MIME) {
3156 open_mime(output_mode);
3158 } else { /* c==EOF */
3159 switch(mimeout_mode) {
3164 (*o_mputc)(basis_64[((b64c & 0x3)<< 4)]);
3170 (*o_mputc)(basis_64[((b64c & 0xF) << 2)]);
3176 if (mimeout_f!=FIXED_MIME) {
3178 } else if (mimeout_mode != 'Q')
3183 switch(mimeout_mode) {
3187 (*o_mputc)(itoh4(((c>>4)&0xf)));
3188 (*o_mputc)(itoh4((c&0xf)));
3195 (*o_mputc)(basis_64[c>>2]);
3200 (*o_mputc)(basis_64[((b64c & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
3206 (*o_mputc)(basis_64[((b64c & 0xF) << 2) | ((c & 0xC0) >>6)]);
3207 (*o_mputc)(basis_64[c & 0x3F]);
3227 mime_f = STRICT_MIME;
3231 #if defined(MSDOS) || defined(__OS2__)
3236 iso2022jp_f = FALSE;
3238 kanji_intro = DEFAULT_J;
3239 ascii_intro = DEFAULT_R;
3241 output_conv = DEFAULT_CONV;
3242 oconv = DEFAULT_CONV;
3245 i_mungetc = std_ungetc;
3246 i_mgetc_buf = std_getc;
3247 i_mungetc_buf = std_ungetc;
3250 i_ungetc=std_ungetc;
3253 i_bungetc= std_ungetc;
3257 o_crconv = no_connection;
3258 o_rot_conv = no_connection;
3259 o_iso2022jp_check_conv = no_connection;
3260 o_hira_conv = no_connection;
3261 o_fconv = no_connection;
3262 o_zconv = no_connection;
3265 i_ungetc = std_ungetc;
3267 i_mungetc = std_ungetc;
3269 output_mode = ASCII;
3272 mime_decode_mode = FALSE;
3282 #ifdef UTF8_INPUT_ENABLE
3287 #ifdef UTF8_OUTPUT_ENABLE
3288 if (w_oconv16_begin_f) {
3289 w_oconv16_begin_f = 2;
3294 fold_preserve_f = FALSE;
3297 fold_margin = FOLD_MARGIN;
3300 z_prev2=0,z_prev1=0;
3306 no_connection(c2,c1)
3309 no_connection2(c2,c1,0);
3313 no_connection2(c2,c1,c0)
3316 fprintf(stderr,"nkf internal module connection failure.\n");
3324 fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
3325 fprintf(stderr,"Flags:\n");
3326 fprintf(stderr,"b,u Output is bufferred (DEFAULT),Output is unbufferred\n");
3327 #ifdef DEFAULT_CODE_SJIS
3328 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC), UTF-8\n");
3330 #ifdef DEFAULT_CODE_JIS
3331 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC), UTF-8\n");
3333 #ifdef DEFAULT_CODE_EUC
3334 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT), UTF-8\n");
3336 #ifdef DEFAULT_CODE_UTF8
3337 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC), UTF-8 (DEFAULT)\n");
3339 fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC), UTF-8\n");
3340 fprintf(stderr,"t no conversion\n");
3341 fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n");
3342 fprintf(stderr,"r {de/en}crypt ROT13/47\n");
3343 fprintf(stderr,"h 1 hirakana->katakana, 2 katakana->hirakana,3 both\n");
3344 fprintf(stderr,"v Show this usage. V: show version\n");
3345 fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
3346 fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
3347 fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
3348 fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
3349 fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces,\n");
3350 fprintf(stderr," 3: Convert HTML Entity\n");
3351 fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
3352 fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
3354 fprintf(stderr,"T Text mode output\n");
3356 fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
3357 fprintf(stderr,"d,c Delete \\r in line feed and \\032, Add \\r in line feed\n");
3358 fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n");
3359 fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
3360 fprintf(stderr,"long name options\n");
3361 fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n");
3362 fprintf(stderr," --jis,--euc,--sjis,--utf8,--utf16,--mime,--base64 convert for the code\n");
3363 fprintf(stderr," --help,--version\n");
3370 fprintf(stderr,"Network Kanji Filter Version %s (%s) "
3371 #if defined(MSDOS) && !defined(_Windows)
3374 #if !defined(__WIN32__) && defined(_Windows)
3377 #if defined(__WIN32__) && defined(_Windows)
3383 ,Version,Patchlevel);
3384 fprintf(stderr,"\n%s\n",CopyRight);
3389 **
\e$B%Q%C%A@):n<T
\e(B
3390 ** void@merope.pleiades.or.jp (Kusakabe Youichi)
3391 ** NIDE Naoyuki <nide@ics.nara-wu.ac.jp>
3392 ** ohta@src.ricoh.co.jp (Junn Ohta)
3393 ** inouet@strl.nhk.or.jp (Tomoyuki Inoue)
3394 ** kiri@pulser.win.or.jp (Tetsuaki Kiriyama)
3395 ** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp>
3396 ** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe)
3397 ** kono@ie.u-ryukyu.ac.jp (Shinji Kono)
3398 ** GHG00637@nifty-serve.or.jp (COW)
3400 **
\e$B:G=*99?7F|
\e(B