1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
5 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
6 ** Copyright (C) 1996,1998
8 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
9 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
10 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
11 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
13 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
14 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
15 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
16 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
17 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
18 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
20 ** Everyone is permitted to do anything on this program
21 ** including copying, modifying, improving,
22 ** as long as you don't try to pretend that you wrote it.
23 ** i.e., the above copyright notice has to appear in all copies.
24 ** Binary distribution requires original version messages.
25 ** You don't have to ask before copying, redistribution or publishing.
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
29 /***********************************************************************
30 ** UTF-8
\e$B%5%]!<%H$K$D$$$F
\e(B
31 **
\e$B=>Mh$N
\e(B nkf
\e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9
\e(B
32 ** nkf -e
\e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G
\e(B UTF-8
\e$B$HH=Dj$5$l$l$P!"
\e(B
33 **
\e$B$=$N$^$^
\e(B euc-jp
\e$B$KJQ49$5$l$^$9
\e(B
35 **
\e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#
\e(B
36 ** (
\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O
\e(B)
38 **
\e$B2?$+LdBj$r8+$D$1$?$i!"
\e(B
39 ** E-Mail: furukawa@tcp-ip.or.jp
40 **
\e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#
\e(B
41 ***********************************************************************/
44 static char *CopyRight =
45 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2003 Kono, Furukawa";
46 static char *Version =
48 static char *Patchlevel =
55 ** USAGE: nkf [flags] [file]
58 ** b Output is bufferred (DEFAULT)
59 ** u Output is unbufferred
63 ** j Outout code is JIS 7 bit (DEFAULT SELECT)
64 ** s Output code is MS Kanji (DEFAULT SELECT)
65 ** e Output code is AT&T JIS (DEFAULT SELECT)
66 ** w Output code is AT&T JIS (DEFAULT SELECT)
67 ** l Output code is JIS 7bit and ISO8859-1 Latin-1
69 ** m MIME conversion for ISO-2022-JP
70 ** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
71 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
72 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
73 ** M MIME output conversion
75 ** r {de/en}crypt ROT13/47
79 ** T Text mode output (for MS-DOS)
81 ** x Do not convert X0201 kana into X0208
82 ** Z Convert X0208 alphabet to ASCII
87 ** B try to fix broken JIS, missing Escape
88 ** B[1-9] broken level
90 ** O Output to 'nkf.out' file or last file name
91 ** d Delete \r in line feed
92 ** c Add \r in line feed
93 ** -- other long option
94 ** -- ignore following option (don't use with -O )
98 #if (defined(__TURBOC__) || defined(LSI_C)) && !defined(MSDOS)
113 #if defined(MSDOS) || defined(__OS2__)
120 #define setbinmode(fp) fsetbin(fp)
121 #else /* Microsoft C, Turbo C */
122 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
124 #else /* UNIX,OS/2 */
125 #define setbinmode(fp)
128 #ifdef _IOFBF /* SysV and MSDOS */
129 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
131 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
134 /*Borland C++ 4.5 EasyWin*/
135 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
141 /* added by satoru@isoternet.org */
142 #include <sys/stat.h>
152 /* state of output_mode and input_mode
169 /* Input Assumption */
173 #define LATIN1_INPUT 6
175 #define STRICT_MIME 8
180 #define JAPANESE_EUC 10
184 #define UTF8_INPUT 13
185 #define UTF16_INPUT 14
186 #define UTF16BE_INPUT 15
204 #define is_alnum(c) \
205 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
207 #define HOLD_SIZE 1024
208 #define IOBUF_SIZE 16384
210 #define DEFAULT_J 'B'
211 #define DEFAULT_R 'B'
213 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
214 #define SJ6394 0x0161 /* 63 - 94 ku offset */
216 #define RANGE_NUM_MAX 18
221 #if defined( UTF8_OUTPUT_ENABLE ) || defined( UTF8_INPUT_ENABLE )
222 #define sizeof_euc_utf8 94
223 #define sizeof_euc_to_utf8_1byte 94
224 #define sizeof_euc_to_utf8_2bytes 94
225 #define sizeof_utf8_to_euc_C2 64
226 #define sizeof_utf8_to_euc_E5B8 64
227 #define sizeof_utf8_to_euc_2bytes 112
228 #define sizeof_utf8_to_euc_3bytes 112
231 /* MIME preprocessor */
234 #ifdef EASYWIN /*Easy Win */
235 extern POINT _BufferSize;
238 /* function prototype */
240 #ifdef ANSI_C_PROTOTYPE
242 #define STATIC static
254 void (*status_func)PROTO((struct input_code *, int));
255 int (*iconv_func)PROTO((int c2, int c1, int c0));
258 STATIC int noconvert PROTO((FILE *f));
259 STATIC int kanji_convert PROTO((FILE *f));
260 STATIC int h_conv PROTO((FILE *f,int c2,int c1));
261 STATIC int push_hold_buf PROTO((int c2));
262 STATIC void set_iconv PROTO((int f, int (*iconv_func)()));
263 STATIC int s_iconv PROTO((int c2,int c1,int c0));
264 STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
265 STATIC int e_iconv PROTO((int c2,int c1,int c0));
266 #ifdef UTF8_INPUT_ENABLE
267 STATIC int w2e_conv PROTO((int c2,int c1,int c0,int *p2,int *p1));
268 STATIC int w_iconv PROTO((int c2,int c1,int c0));
269 STATIC int w_iconv16 PROTO((int c2,int c1,int c0));
270 STATIC int w_iconv_common PROTO((int c1,int c0,unsigned short **pp,int psize,int *p2,int *p1));
272 #ifdef UTF8_OUTPUT_ENABLE
273 STATIC int e2w_conv PROTO((int c2,int c1));
274 STATIC void w_oconv PROTO((int c2,int c1));
275 STATIC void w_oconv16 PROTO((int c2,int c1));
277 STATIC void e_oconv PROTO((int c2,int c1));
278 STATIC void e2s_conv PROTO((int c2, int c1, int *p2, int *p1));
279 STATIC void s_oconv PROTO((int c2,int c1));
280 STATIC void j_oconv PROTO((int c2,int c1));
281 STATIC void fold_conv PROTO((int c2,int c1));
282 STATIC void cr_conv PROTO((int c2,int c1));
283 STATIC void z_conv PROTO((int c2,int c1));
284 STATIC void rot_conv PROTO((int c2,int c1));
285 STATIC void hira_conv PROTO((int c2,int c1));
286 STATIC void base64_conv PROTO((int c2,int c1));
287 STATIC void iso2022jp_check_conv PROTO((int c2,int c1));
288 STATIC void no_connection PROTO((int c2,int c1));
289 STATIC int no_connection2 PROTO((int c2,int c1,int c0));
291 STATIC void code_score PROTO((struct input_code *ptr));
292 STATIC void code_status PROTO((int c));
294 STATIC void std_putc PROTO((int c));
295 STATIC int std_getc PROTO((FILE *f));
296 STATIC int std_ungetc PROTO((int c,FILE *f));
298 STATIC int broken_getc PROTO((FILE *f));
299 STATIC int broken_ungetc PROTO((int c,FILE *f));
301 STATIC int mime_begin PROTO((FILE *f));
302 STATIC int mime_getc PROTO((FILE *f));
303 STATIC int mime_ungetc PROTO((int c,FILE *f));
305 STATIC int mime_begin_strict PROTO((FILE *f));
306 STATIC int mime_getc_buf PROTO((FILE *f));
307 STATIC int mime_ungetc_buf PROTO((int c,FILE *f));
308 STATIC int mime_integrity PROTO((FILE *f,unsigned char *p));
310 STATIC int base64decode PROTO((int c));
311 STATIC void mime_putc PROTO((int c));
312 STATIC void open_mime PROTO((int c));
313 STATIC void close_mime PROTO(());
314 STATIC void usage PROTO(());
315 STATIC void version PROTO(());
316 STATIC void options PROTO((unsigned char *c));
318 STATIC void reinit PROTO(());
323 static unsigned char stdibuf[IOBUF_SIZE];
324 static unsigned char stdobuf[IOBUF_SIZE];
325 static unsigned char hold_buf[HOLD_SIZE*2];
326 static int hold_count;
328 /* MIME preprocessor fifo */
330 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
331 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
332 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
333 static unsigned char mime_buf[MIME_BUF_SIZE];
334 static unsigned int mime_top = 0;
335 static unsigned int mime_last = 0; /* decoded */
336 static unsigned int mime_input = 0; /* undecoded */
339 static int unbuf_f = FALSE;
340 static int estab_f = FALSE;
341 static int nop_f = FALSE;
342 static int binmode_f = TRUE; /* binary mode */
343 static int rot_f = FALSE; /* rot14/43 mode */
344 static int hira_f = FALSE; /* hira/kata henkan */
345 static int input_f = FALSE; /* non fixed input code */
346 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
347 static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
348 static int mimebuf_f = FALSE; /* MIME buffered input */
349 static int broken_f = FALSE; /* convert ESC-less broken JIS */
350 static int iso8859_f = FALSE; /* ISO8859 through */
351 static int mimeout_f = FALSE; /* base64 mode */
352 #if defined(MSDOS) || defined(__OS2__)
353 static int x0201_f = TRUE; /* Assume JISX0201 kana */
355 static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
357 static int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
358 #ifdef UTF8_OUTPUT_ENABLE
359 static int w_oconv16_begin_f= 0; /* utf-16 header */
364 static int cap_f = FALSE;
365 static int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */
366 static int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc;
367 STATIC int cap_getc PROTO((FILE *f));
368 STATIC int cap_ungetc PROTO((int c,FILE *f));
370 static int url_f = FALSE;
371 static int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
372 static int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc;
373 STATIC int url_getc PROTO((FILE *f));
374 STATIC int url_ungetc PROTO((int c,FILE *f));
376 static int numchar_f = FALSE;
377 static int (*i_ngetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
378 static int (*i_nungetc)PROTO((int c ,FILE *f)) = std_ungetc;
379 STATIC int numchar_getc PROTO((FILE *f));
380 STATIC int numchar_ungetc PROTO((int c,FILE *f));
384 static int noout_f = FALSE;
385 STATIC void no_putc PROTO((int c));
386 static int debug_f = FALSE;
387 STATIC void debug PROTO((char *str));
390 #ifdef SHIFTJIS_CP932
391 STATIC int cp932_f = FALSE;
392 #define CP932_TABLE_BEGIN (0xfa)
393 #define CP932_TABLE_END (0xfc)
395 #endif /* SHIFTJIS_CP932 */
397 STATIC void e_status PROTO((struct input_code *, int));
398 STATIC void s_status PROTO((struct input_code *, int));
400 #ifdef UTF8_INPUT_ENABLE
401 STATIC void w_status PROTO((struct input_code *, int));
402 static int utf16_mode = UTF16_INPUT;
405 struct input_code input_code_list[] = {
406 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv},
407 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv},
408 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv},
412 static int mimeout_mode = 0;
413 static int base64_count = 0;
415 /* X0208 -> ASCII converter */
418 static int f_line = 0; /* chars in line */
419 static int f_prev = 0;
420 static int fold_preserve_f = FALSE; /* preserve new lines */
421 static int fold_f = FALSE;
422 static int fold_len = 0;
425 static unsigned char kanji_intro = DEFAULT_J,
426 ascii_intro = DEFAULT_R;
430 #define FOLD_MARGIN 10
431 #define DEFAULT_FOLD 60
433 static int fold_margin = FOLD_MARGIN;
437 #ifdef DEFAULT_CODE_JIS
438 # define DEFAULT_CONV j_oconv
440 #ifdef DEFAULT_CODE_SJIS
441 # define DEFAULT_CONV s_oconv
443 #ifdef DEFAULT_CODE_EUC
444 # define DEFAULT_CONV e_oconv
446 #ifdef DEFAULT_CODE_UTF8
447 # define DEFAULT_CONV w_oconv
450 /* process default */
451 static void (*output_conv)PROTO((int c2,int c1)) = DEFAULT_CONV;
453 static void (*oconv)PROTO((int c2,int c1)) = no_connection;
454 /* s_iconv or oconv */
455 static int (*iconv)PROTO((int c2,int c1,int c0)) = no_connection2;
457 static void (*o_zconv)PROTO((int c2,int c1)) = no_connection;
458 static void (*o_fconv)PROTO((int c2,int c1)) = no_connection;
459 static void (*o_crconv)PROTO((int c2,int c1)) = no_connection;
460 static void (*o_rot_conv)PROTO((int c2,int c1)) = no_connection;
461 static void (*o_hira_conv)PROTO((int c2,int c1)) = no_connection;
462 static void (*o_base64conv)PROTO((int c2,int c1)) = no_connection;
463 static void (*o_iso2022jp_check_conv)PROTO((int c2,int c1)) = no_connection;
465 /* static redirections */
467 static void (*o_putc)PROTO((int c)) = std_putc;
469 static int (*i_getc)PROTO((FILE *f)) = std_getc; /* general input */
470 static int (*i_ungetc)PROTO((int c,FILE *f)) =std_ungetc;
472 static int (*i_bgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
473 static int (*i_bungetc)PROTO((int c ,FILE *f)) = std_ungetc;
475 static void (*o_mputc)PROTO((int c)) = std_putc ; /* output of mputc */
477 static int (*i_mgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
478 static int (*i_mungetc)PROTO((int c ,FILE *f)) = std_ungetc;
480 /* for strict mime */
481 static int (*i_mgetc_buf)PROTO((FILE *)) = std_getc; /* input of mgetc_buf */
482 static int (*i_mungetc_buf)PROTO((int c,FILE *f)) = std_ungetc;
485 static int output_mode = ASCII, /* output kanji mode */
486 input_mode = ASCII, /* input kanji mode */
487 shift_mode = FALSE; /* TRUE shift out, or X0201 */
488 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
490 /* X0201 / X0208 conversion tables */
492 /* X0201 kana conversion table */
495 unsigned char cv[]= {
496 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
497 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
498 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
499 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
500 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
501 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
502 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
503 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
504 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
505 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
506 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
507 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
508 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
509 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
510 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
511 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
515 /* X0201 kana conversion table for daguten */
518 unsigned char dv[]= {
519 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
520 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
521 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
522 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
523 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
524 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
525 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
526 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
527 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
528 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
529 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
530 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
531 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
532 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
533 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
534 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
537 /* X0201 kana conversion table for han-daguten */
540 unsigned char ev[]= {
541 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
542 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
543 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
544 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
545 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
546 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
547 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
548 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
549 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
550 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
551 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
552 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
553 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
554 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
555 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
556 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
560 /* X0208 kigou conversion table */
561 /* 0x8140 - 0x819e */
563 unsigned char fv[] = {
565 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
566 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
567 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
568 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
569 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
570 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
571 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
572 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
573 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
574 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
575 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
576 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
582 static int file_out = FALSE;
584 static int overwrite = FALSE;
587 static int crmode_f = 0; /* CR, NL, CRLF */
588 #ifdef EASYWIN /*Easy Win */
589 static int end_check;
601 #ifdef EASYWIN /*Easy Win */
602 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
605 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
606 cp = (unsigned char *)*argv;
609 if(x0201_f == WISH_TRUE)
610 x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
612 if (binmode_f == TRUE)
614 if (freopen("","wb",stdout) == NULL)
621 setbuf(stdout, (char *) NULL);
623 setvbuffer(stdout, stdobuf, IOBUF_SIZE);
626 if (binmode_f == TRUE)
628 if (freopen("","rb",stdin) == NULL) return (-1);
632 setvbuffer(stdin, stdibuf, IOBUF_SIZE);
636 kanji_convert(stdin);
642 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
651 /* reopen file for stdout */
652 if (file_out == TRUE) {
655 outfname = malloc(strlen(origfname)
656 + strlen(".nkftmpXXXXXX")
662 strcpy(outfname, origfname);
666 for (i = strlen(outfname); i; --i){
667 if (outfname[i - 1] == '/'
668 || outfname[i - 1] == '\\'){
674 strcat(outfname, "ntXXXXXX");
676 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC,
679 strcat(outfname, ".nkftmpXXXXXX");
680 fd = mkstemp(outfname);
683 || (fd_backup = dup(fileno(stdout))) < 0
684 || dup2(fd, fileno(stdout)) < 0
695 outfname = "nkf.out";
698 if(freopen(outfname, "w", stdout) == NULL) {
702 if (binmode_f == TRUE) {
704 if (freopen("","wb",stdout) == NULL)
711 if (binmode_f == TRUE)
713 if (freopen("","rb",fin) == NULL)
718 setvbuffer(fin, stdibuf, IOBUF_SIZE);
735 if (dup2(fd_backup, fileno(stdout)) < 0){
738 if (stat(origfname, &sb)) {
739 fprintf(stderr, "Can't stat %s\n", origfname);
741 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
742 if (chmod(outfname, sb.st_mode)) {
743 fprintf(stderr, "Can't set permission %s\n", outfname);
746 tb[0] = tb[1] = sb.st_mtime;
747 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
748 if (utime(outfname, tb)) {
749 fprintf(stderr, "Can't set timestamp %s\n", outfname);
752 if (unlink(origfname)){
756 tb.actime = sb.st_atime;
757 tb.modtime = sb.st_mtime;
758 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
759 if (utime(outfname, &tb)) {
760 fprintf(stderr, "Can't set timestamp %s\n", outfname);
763 if (rename(outfname, origfname)) {
765 fprintf(stderr, "Can't rename %s to %s\n",
766 outfname, origfname);
774 #ifdef EASYWIN /*Easy Win */
775 if (file_out == FALSE)
776 scanf("%d",&end_check);
779 #else /* for Other OS */
780 if (file_out == TRUE)
810 {"katakana-hiragana","h3"},
811 #ifdef UTF8_OUTPUT_ENABLE
815 #ifdef UTF8_INPUT_ENABLE
817 {"utf16-input", "W16"},
825 {"numchar-input", ""},
831 #ifdef SHIFTJIS_CP932
836 static int option_mode;
851 case '-': /* literal options */
852 if (!*cp) { /* ignore the rest of arguments */
856 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
858 p = (unsigned char *)long_option[i].name;
859 for (j=0;*p && *p++ == cp[j];j++);
860 if (! *p && !cp[j]) break;
863 cp = (unsigned char *)long_option[i].alias;
866 if (strcmp(long_option[i].name, "overwrite") == 0){
873 if (strcmp(long_option[i].name, "cap-input") == 0){
877 if (strcmp(long_option[i].name, "url-input") == 0){
881 if (strcmp(long_option[i].name, "numchar-input") == 0){
887 if (strcmp(long_option[i].name, "no-output") == 0){
891 if (strcmp(long_option[i].name, "debug") == 0){
896 #ifdef SHIFTJIS_CP932
897 if (strcmp(long_option[i].name, "cp932") == 0){
904 case 'b': /* buffered mode */
907 case 'u': /* non bufferd mode */
910 case 't': /* transparent mode */
913 case 'j': /* JIS output */
915 output_conv = j_oconv;
917 case 'e': /* AT&T EUC output */
918 output_conv = e_oconv;
920 case 's': /* SJIS output */
921 output_conv = s_oconv;
923 case 'l': /* ISO8859 Latin-1 support, no conversion */
924 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
925 input_f = LATIN1_INPUT;
927 case 'i': /* Kanji IN ESC-$-@/B */
928 if (*cp=='@'||*cp=='B')
931 case 'o': /* ASCII IN ESC-(-J/B */
932 if (*cp=='J'||*cp=='B'||*cp=='H')
940 if ('9'>= *cp && *cp>='0')
941 hira_f |= (*cp++ -'0');
948 #if defined(MSDOS) || defined(__OS2__)
963 #ifdef UTF8_OUTPUT_ENABLE
964 case 'w': /* UTF-8 output */
965 if ('1'== cp[0] && '6'==cp[1]) {
966 output_conv = w_oconv16; cp+=2;
968 w_oconv16_begin_f=2; cp++;
971 output_conv = w_oconv;
974 #ifdef UTF8_INPUT_ENABLE
975 case 'W': /* UTF-8 input */
976 if ('1'== cp[0] && '6'==cp[1]) {
977 input_f = UTF16_INPUT;
979 input_f = UTF8_INPUT;
982 /* Input code assumption */
983 case 'J': /* JIS input */
984 case 'E': /* AT&T EUC input */
987 case 'S': /* MS Kanji input */
988 input_f = SJIS_INPUT;
989 if (x0201_f==NO_X0201) x0201_f=TRUE;
991 case 'Z': /* Convert X0208 alphabet to asii */
992 /* bit:0 Convert X0208
993 bit:1 Convert Kankaku to one space
994 bit:2 Convert Kankaku to two spaces
995 bit:3 Convert HTML Entity
997 if ('9'>= *cp && *cp>='0')
998 alpha_f |= 1<<(*cp++ -'0');
1002 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
1003 x0201_f = FALSE; /* No X0201->X0208 conversion */
1005 ESC-(-I in JIS, EUC, MS Kanji
1006 SI/SO in JIS, EUC, MS Kanji
1007 SSO in EUC, JIS, not in MS Kanji
1008 MS Kanji (0xa0-0xdf)
1010 ESC-(-I in JIS (0x20-0x5f)
1011 SSO in EUC (0xa0-0xdf)
1012 0xa0-0xd in MS Kanji (0xa0-0xdf)
1015 case 'X': /* Assume X0201 kana */
1016 /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1019 case 'F': /* prserve new lines */
1020 fold_preserve_f = TRUE;
1021 case 'f': /* folding -f60 or -f */
1024 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1026 fold_len += *cp++ - '0';
1028 if (!(0<fold_len && fold_len<BUFSIZ))
1029 fold_len = DEFAULT_FOLD;
1033 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1035 fold_margin += *cp++ - '0';
1039 case 'm': /* MIME support */
1040 if (*cp=='B'||*cp=='Q') {
1041 mime_decode_mode = *cp++;
1042 mimebuf_f = FIXED_MIME;
1043 } else if (*cp=='N') {
1044 mime_f = TRUE; cp++;
1045 } else if (*cp=='S') {
1046 mime_f = STRICT_MIME; cp++;
1047 } else if (*cp=='0') {
1048 mime_f = FALSE; cp++;
1051 case 'M': /* MIME output */
1054 mimeout_f = FIXED_MIME; cp++;
1055 } else if (*cp=='Q') {
1057 mimeout_f = FIXED_MIME; cp++;
1062 case 'B': /* Broken JIS support */
1064 bit:1 allow any x on ESC-(-x or ESC-$-x
1065 bit:2 reset to ascii on NL
1067 if ('9'>= *cp && *cp>='0')
1068 broken_f |= 1<<(*cp++ -'0');
1073 case 'O':/* for Output file */
1077 case 'c':/* add cr code */
1080 case 'd':/* delete cr code */
1083 case 'I': /* ISO-2022-JP output */
1086 case 'L': /* line mode */
1087 if (*cp=='u') { /* unix */
1088 crmode_f = NL; cp++;
1089 } else if (*cp=='m') { /* mac */
1090 crmode_f = CR; cp++;
1091 } else if (*cp=='w') { /* windows */
1092 crmode_f = CRLF; cp++;
1093 } else if (*cp=='0') { /* no conversion */
1098 /* module muliple options in a string are allowed for Perl moudle */
1099 while(*cp && *cp!='-') cp++;
1103 /* bogus option but ignored */
1109 #ifdef ANSI_C_PROTOTYPE
1110 void set_iconv(int f, int (*iconv_func)(int c2,int c1,int c0))
1112 void set_iconv(f, iconv_func)
1114 int (*iconv_func)();
1118 static int (*iconv_for_check)() = 0;
1120 #ifdef INPUT_CODE_FIX
1128 #ifdef INPUT_CODE_FIX
1129 && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1135 if (estab_f && iconv_for_check != iconv){
1136 #ifdef UTF8_INPUT_ENABLE
1137 if (iconv == w_iconv) debug("UTF-8\n");
1138 if (iconv == w_iconv16) debug("UTF-16\n");
1140 if (iconv == s_iconv) debug("Shift_JIS\n");
1141 if (iconv == e_iconv) debug("EUC-JP\n");
1142 iconv_for_check = iconv;
1147 #define SCORE_KANA (1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
1148 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
1149 #ifdef SHIFTJIS_CP932
1150 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932
\e$B$K$h$kFI$_49$(
\e(B */
1151 #define SCORE_NO_EXIST (SCORE_CP932 << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1153 #define SCORE_NO_EXIST (SCORE_DEPEND << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1155 #define SCORE_ERROR (SCORE_NO_EXIST << 1) /*
\e$B%(%i!<
\e(B */
1156 int score_table_A0[] = {
1159 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1160 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1163 int score_table_F0[] = {
1165 0, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1166 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1167 SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1170 void set_code_score(ptr, score)
1171 struct input_code *ptr;
1174 ptr->score |= score;
1177 void code_score(ptr)
1178 struct input_code *ptr;
1180 int c2 = ptr->buf[0];
1181 int c1 = ptr->buf[1];
1183 set_code_score(ptr, SCORE_ERROR);
1184 }else if ((c2 & 0xf0) == 0xa0){
1185 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1186 }else if ((c2 & 0xf0) == 0xf0){
1187 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1188 }else if (c2 == SSO){
1189 set_code_score(ptr, SCORE_KANA);
1191 #ifdef UTF8_OUTPUT_ENABLE
1192 else if (!e2w_conv(c2, c1)){
1193 set_code_score(ptr, SCORE_NO_EXIST);
1198 void status_disable(ptr)
1199 struct input_code *ptr;
1204 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1207 void status_push_ch(ptr, c)
1208 struct input_code *ptr;
1211 ptr->buf[ptr->index++] = c;
1214 void status_reset(ptr)
1215 struct input_code *ptr;
1222 void status_check(ptr, c)
1223 struct input_code *ptr;
1226 if (c <= DEL && estab_f){
1231 void s_status(ptr, c)
1232 struct input_code *ptr;
1237 status_check(ptr, c);
1242 }else if (0xa1 <= c && c <= 0xdf){
1243 status_push_ch(ptr, SSO);
1244 status_push_ch(ptr, c);
1247 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
1249 status_push_ch(ptr, c);
1250 #ifdef SHIFTJIS_CP932
1252 && CP932_TABLE_BEGIN <= c && c <= CP932_TABLE_END){
1254 status_push_ch(ptr, c);
1255 #endif /* SHIFTJIS_CP932 */
1257 status_disable(ptr);
1261 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1262 status_push_ch(ptr, c);
1263 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
1267 status_disable(ptr);
1270 #ifdef SHIFTJIS_CP932
1272 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1273 status_push_ch(ptr, c);
1274 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
1275 set_code_score(ptr, SCORE_CP932);
1280 status_disable(ptr);
1282 #endif /* SHIFTJIS_CP932 */
1286 void e_status(ptr, c)
1287 struct input_code *ptr;
1292 status_check(ptr, c);
1297 }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
1299 status_push_ch(ptr, c);
1301 status_disable(ptr);
1305 if (0xa1 <= c && c <= 0xfe){
1306 status_push_ch(ptr, c);
1310 status_disable(ptr);
1316 #ifdef UTF8_INPUT_ENABLE
1317 void w_status(ptr, c)
1318 struct input_code *ptr;
1323 status_check(ptr, c);
1328 }else if (0xc0 <= c && c <= 0xdf){
1330 status_push_ch(ptr, c);
1331 }else if (0xe0 <= c && c <= 0xef){
1333 status_push_ch(ptr, c);
1335 status_disable(ptr);
1340 if (0x80 <= c && c <= 0xbf){
1341 status_push_ch(ptr, c);
1342 if (ptr->index > ptr->stat){
1343 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
1344 &ptr->buf[0], &ptr->buf[1]);
1349 status_disable(ptr);
1360 int action_flag = 1;
1361 struct input_code *result = 0;
1362 struct input_code *p = input_code_list;
1364 (p->status_func)(p, c);
1367 }else if(p->stat == 0){
1379 set_iconv(TRUE, result->iconv_func);
1380 }else if (c <= DEL){
1381 struct input_code *ptr = input_code_list;
1419 while ((c = (*i_getc)(f)) != EOF)
1428 oconv = output_conv;
1431 /* replace continucation module, from output side */
1433 /* output redicrection */
1442 if (mimeout_f == TRUE) {
1443 o_base64conv = oconv; oconv = base64_conv;
1445 /* base64_count = 0; */
1449 o_crconv = oconv; oconv = cr_conv;
1452 o_rot_conv = oconv; oconv = rot_conv;
1455 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
1458 o_hira_conv = oconv; oconv = hira_conv;
1461 o_fconv = oconv; oconv = fold_conv;
1464 if (alpha_f || x0201_f) {
1465 o_zconv = oconv; oconv = z_conv;
1469 /* input redicrection */
1472 i_cgetc = i_getc; i_getc = cap_getc;
1473 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
1476 i_ugetc = i_getc; i_getc = url_getc;
1477 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
1480 i_ngetc = i_getc; i_getc = numchar_getc;
1481 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
1484 if (mime_f && mimebuf_f==FIXED_MIME) {
1485 i_mgetc = i_getc; i_getc = mime_getc;
1486 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
1489 i_bgetc = i_getc; i_getc = broken_getc;
1490 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
1492 if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
1493 set_iconv(-TRUE, e_iconv);
1494 } else if (input_f == SJIS_INPUT) {
1495 set_iconv(-TRUE, s_iconv);
1496 #ifdef UTF8_INPUT_ENABLE
1497 } else if (input_f == UTF8_INPUT) {
1498 set_iconv(-TRUE, w_iconv);
1499 } else if (input_f == UTF16_INPUT) {
1500 set_iconv(-TRUE, w_iconv16);
1503 set_iconv(FALSE, e_iconv);
1507 struct input_code *p = input_code_list;
1515 Conversion main loop. Code detection only.
1525 module_connection();
1530 output_mode = ASCII;
1533 #define NEXT continue /* no output, get next */
1534 #define SEND ; /* output c1 and c2, get next */
1535 #define LAST break /* end of loop, go closing */
1537 while ((c1 = (*i_getc)(f)) != EOF) {
1542 /* in case of 8th bit is on */
1544 /* in case of not established yet */
1545 /* It is still ambiguious */
1546 if (h_conv(f, c2, c1)==EOF)
1552 /* in case of already established */
1554 /* ignore bogus code */
1560 /* second byte, 7 bit code */
1561 /* it might be kanji shitfted */
1562 if ((c1 == DEL) || (c1 <= SPACE)) {
1563 /* ignore bogus first code */
1571 #ifdef UTF8_INPUT_ENABLE
1580 } else if (c1 > DEL) {
1582 if (!estab_f && !iso8859_f) {
1583 /* not established yet */
1586 } else { /* estab_f==TRUE */
1591 } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
1592 /* SJIS X0201 Case... */
1593 if(iso2022jp_f && x0201_f==NO_X0201) {
1594 (*oconv)(GETA1, GETA2);
1601 } else if (c1==SSO && iconv != s_iconv) {
1602 /* EUC X0201 Case */
1603 c1 = (*i_getc)(f); /* skip SSO */
1605 if (SSP<=c1 && c1<0xe0) {
1606 if(iso2022jp_f && x0201_f==NO_X0201) {
1607 (*oconv)(GETA1, GETA2);
1614 } else { /* bogus code, skip SSO and one byte */
1618 /* already established */
1623 } else if ((c1 > SPACE) && (c1 != DEL)) {
1624 /* in case of Roman characters */
1626 /* output 1 shifted byte */
1630 } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
1631 /* output 1 shifted byte */
1632 if(iso2022jp_f && x0201_f==NO_X0201) {
1633 (*oconv)(GETA1, GETA2);
1640 /* look like bogus code */
1643 } else if (input_mode == X0208) {
1644 /* in case of Kanji shifted */
1647 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
1648 /* Check MIME code */
1649 if ((c1 = (*i_getc)(f)) == EOF) {
1652 } else if (c1 == '?') {
1653 /* =? is mime conversion start sequence */
1654 if(mime_f == STRICT_MIME) {
1655 /* check in real detail */
1656 if (mime_begin_strict(f) == EOF)
1660 } else if (mime_begin(f) == EOF)
1670 /* normal ASCII code */
1673 } else if (c1 == SI) {
1676 } else if (c1 == SO) {
1679 } else if (c1 == ESC ) {
1680 if ((c1 = (*i_getc)(f)) == EOF) {
1681 /* (*oconv)(0, ESC); don't send bogus code */
1683 } else if (c1 == '$') {
1684 if ((c1 = (*i_getc)(f)) == EOF) {
1686 (*oconv)(0, ESC); don't send bogus code
1687 (*oconv)(0, '$'); */
1689 } else if (c1 == '@'|| c1 == 'B') {
1690 /* This is kanji introduction */
1694 } else if (c1 == '(') {
1695 if ((c1 = (*i_getc)(f)) == EOF) {
1696 /* don't send bogus code
1702 } else if (c1 == '@'|| c1 == 'B') {
1703 /* This is kanji introduction */
1708 /* could be some special code */
1715 } else if (broken_f&0x2) {
1716 /* accept any ESC-(-x as broken code ... */
1726 } else if (c1 == '(') {
1727 if ((c1 = (*i_getc)(f)) == EOF) {
1728 /* don't send bogus code
1730 (*oconv)(0, '('); */
1734 /* This is X0201 kana introduction */
1735 input_mode = X0201; shift_mode = X0201;
1737 } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
1738 /* This is X0208 kanji introduction */
1739 input_mode = ASCII; shift_mode = FALSE;
1741 } else if (broken_f&0x2) {
1742 input_mode = ASCII; shift_mode = FALSE;
1747 /* maintain various input_mode here */
1751 } else if ( c1 == 'N' || c1 == 'n' ){
1753 c1 = (*i_getc)(f); /* skip SS2 */
1754 if ( SPACE<=c1 && c1 < 0xe0 ) {
1763 } else if ((c1 == NL || c1 == CR) && broken_f&4) {
1764 input_mode = ASCII; set_iconv(FALSE, 0);
1770 if (input_mode == X0208)
1771 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
1772 else if (input_mode)
1773 (*oconv)(input_mode, c1); /* other special case */
1774 else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */
1775 int c0 = (*i_getc)(f);
1778 (*iconv)(c2, c1, c0);
1784 /* goto next_word */
1788 (*iconv)(EOF, 0, 0);
1801 /** it must NOT be in the kanji shifte sequence */
1802 /** it must NOT be written in JIS7 */
1803 /** and it must be after 2 byte 8bit code */
1810 while ((c1 = (*i_getc)(f)) != EOF) {
1816 if (push_hold_buf(c1) == EOF || estab_f){
1822 struct input_code *p = input_code_list;
1823 struct input_code *result = p;
1825 if (p->score < result->score){
1830 set_iconv(FALSE, p->iconv_func);
1835 ** 1) EOF is detected, or
1836 ** 2) Code is established, or
1837 ** 3) Buffer is FULL (but last word is pushed)
1839 ** in 1) and 3) cases, we continue to use
1840 ** Kanji codes by oconv and leave estab_f unchanged.
1844 while (wc < hold_count){
1845 c2 = hold_buf[wc++];
1849 }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
1850 (*iconv)(X0201, c2, 0);
1853 if (wc < hold_count){
1854 c1 = hold_buf[wc++];
1857 if (c1 == EOF) break;
1860 if ((*iconv)(c2, c1, 0) < 0){
1862 if (wc < hold_count){
1863 c0 = hold_buf[wc++];
1866 if (c0 == EOF) break;
1869 (*iconv)(c2, c1, c0);
1883 if (hold_count >= HOLD_SIZE*2)
1885 hold_buf[hold_count++] = c2;
1886 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
1889 int s2e_conv(c2, c1, p2, p1)
1893 #ifdef SHIFTJIS_CP932
1894 if (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){
1895 extern unsigned short shiftjis_cp932[3][189];
1896 c1 = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
1897 if (c1 == 0) return 1;
1901 #endif /* SHIFTJIS_CP932 */
1902 c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
1904 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
1921 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
1924 int ret = s2e_conv(c2, c1, &c2, &c1);
1925 if (ret) return ret;
1938 } else if (c2 == SSO){
1941 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
1951 #ifdef UTF8_INPUT_ENABLE
1953 w2e_conv(c2, c1, c0, p2, p1)
1957 extern unsigned short * utf8_to_euc_2bytes[];
1958 extern unsigned short ** utf8_to_euc_3bytes[];
1960 if (0xc0 <= c2 && c2 <= 0xef) {
1961 unsigned short **pp;
1964 if (c0 == 0) return -1;
1965 pp = utf8_to_euc_3bytes[c2 - 0x80];
1966 return w_iconv_common(c1, c0, pp, sizeof_utf8_to_euc_C2, p2, p1);
1968 return w_iconv_common(c2, c1, utf8_to_euc_2bytes, sizeof_utf8_to_euc_2bytes, p2, p1);
1970 } else if (c2 == X0201) {
1983 int ret = w2e_conv(c2, c1, c0, &c2, &c1);
1991 w16w_conv(val, p2, p1, p0)
1999 }else if (val < 0x800){
2000 *p2 = 0xc0 | (val >> 6);
2001 *p1 = 0x80 | (val & 0x3f);
2004 *p2 = 0xe0 | (val >> 12);
2005 *p1 = 0x80 | ((val >> 6) & 0x3f);
2006 *p0 = 0x80 | (val & 0x3f);
2011 w16e_conv(val, p2, p1)
2015 extern unsigned short * utf8_to_euc_2bytes[];
2016 extern unsigned short ** utf8_to_euc_3bytes[];
2018 unsigned short **pp;
2021 w16w_conv(val, &c2, &c1, &c0);
2024 pp = utf8_to_euc_3bytes[c2 - 0x80];
2025 psize = sizeof_utf8_to_euc_C2;
2027 pp = utf8_to_euc_2bytes;
2028 psize = sizeof_utf8_to_euc_2bytes;
2030 return w_iconv_common(c1, c0, pp, psize, p2, p1);
2036 w_iconv16(c2, c1, c0)
2041 if (c2==0376 && c1==0377){
2042 utf16_mode = UTF16_INPUT;
2044 } else if (c2==0377 && c1==0376){
2045 utf16_mode = UTF16BE_INPUT;
2048 if (utf16_mode == UTF16BE_INPUT) {
2050 tmp=c1; c1=c2; c2=tmp;
2052 if (c2==0 || c2==EOF) {
2056 ret = w16e_conv(((c2<<8)&0xff00) + c1, &c2, &c1);
2057 if (ret) return ret;
2063 w_iconv_common(c1, c0, pp, psize, p2, p1)
2065 unsigned short **pp;
2073 if (pp == 0) return 1;
2076 if (c1 < 0 || psize <= c1) return 1;
2078 if (p == 0) return 1;
2081 if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1;
2083 if (val == 0) return 1;
2086 if (c2 == SO) c2 = X0201;
2095 #ifdef UTF8_OUTPUT_ENABLE
2100 extern unsigned short euc_to_utf8_1byte[];
2101 extern unsigned short * euc_to_utf8_2bytes[];
2105 p = euc_to_utf8_1byte;
2108 c2 = (c2&0x7f) - 0x21;
2109 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2110 p = euc_to_utf8_2bytes[c2];
2115 c1 = (c1 & 0x7f) - 0x21;
2116 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
2129 } else if (c2 == 0) {
2130 output_mode = ASCII;
2132 } else if (c2 == ISO8859_1) {
2133 output_mode = ISO8859_1;
2134 (*o_putc)(c1 | 0x080);
2136 unsigned short val = (unsigned short)e2w_conv(c2, c1);
2139 if (0 < val && val < 0x80){
2141 }else if (val < 0x800){
2142 (*o_putc)(0xc0 | (val >> 6));
2143 (*o_putc)(0x80 | (val & 0x3f));
2145 (*o_putc)(0xe0 | (val >> 12));
2146 (*o_putc)(0x80 | ((val >> 6) & 0x3f));
2147 (*o_putc)(0x80 | (val & 0x3f));
2158 if (w_oconv16_begin_f==2) {
2161 w_oconv16_begin_f=1;
2166 } else if (c2 == 0) {
2169 } else if (c2 == ISO8859_1) {
2171 (*o_putc)(c1 | 0x080);
2173 unsigned short val = (unsigned short)e2w_conv(c2, c1);
2174 (*o_putc)((val&0xff00)>>8);
2175 (*o_putc)(val&0xff);
2189 } else if (c2 == 0) {
2190 output_mode = ASCII;
2192 } else if (c2 == X0201) {
2193 output_mode = JAPANESE_EUC;
2194 (*o_putc)(SSO); (*o_putc)(c1|0x80);
2195 } else if (c2 == ISO8859_1) {
2196 output_mode = ISO8859_1;
2197 (*o_putc)(c1 | 0x080);
2199 if ((c1<0x20 || 0x7e<c1) ||
2200 (c2<0x20 || 0x7e<c2)) {
2201 set_iconv(FALSE, 0);
2202 return; /* too late to rescue this char */
2204 output_mode = JAPANESE_EUC;
2205 (*o_putc)(c2 | 0x080);
2206 (*o_putc)(c1 | 0x080);
2211 e2s_conv(c2, c1, p2, p1)
2212 int c2, c1, *p2, *p1;
2214 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
2215 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
2226 } else if (c2 == 0) {
2227 output_mode = ASCII;
2229 } else if (c2 == X0201) {
2230 output_mode = SHIFT_JIS;
2232 } else if (c2 == ISO8859_1) {
2233 output_mode = ISO8859_1;
2234 (*o_putc)(c1 | 0x080);
2236 if ((c1<0x20 || 0x7e<c1) ||
2237 (c2<0x20 || 0x7e<c2)) {
2238 set_iconv(FALSE, 0);
2239 return; /* too late to rescue this char */
2241 output_mode = SHIFT_JIS;
2242 e2s_conv(c2, c1, &c2, &c1);
2254 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2257 (*o_putc)(ascii_intro);
2258 output_mode = ASCII;
2261 } else if (c2==X0201) {
2262 if (output_mode!=X0201) {
2263 output_mode = X0201;
2269 } else if (c2==ISO8859_1) {
2270 /* iso8859 introduction, or 8th bit on */
2271 /* Can we convert in 7bit form using ESC-'-'-A ?
2273 output_mode = ISO8859_1;
2275 } else if (c2 == 0) {
2276 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2279 (*o_putc)(ascii_intro);
2280 output_mode = ASCII;
2284 if (output_mode != X0208) {
2285 output_mode = X0208;
2288 (*o_putc)(kanji_intro);
2290 if (c1<0x20 || 0x7e<c1)
2292 if (c2<0x20 || 0x7e<c2)
2304 if (base64_count>50 && !mimeout_mode && c2==0 && c1==SPACE) {
2306 } else if (base64_count>66 && mimeout_mode) {
2307 (*o_base64conv)(EOF,0);
2309 (*o_putc)('\t'); base64_count += 7;
2311 (*o_base64conv)(c2,c1);
2315 static int broken_buf[3];
2316 static int broken_counter = 0;
2317 static int broken_last = 0;
2324 if (broken_counter>0) {
2325 return broken_buf[--broken_counter];
2328 if (c=='$' && broken_last != ESC
2329 && (input_mode==ASCII || input_mode==X0201)) {
2332 if (c1=='@'|| c1=='B') {
2333 broken_buf[0]=c1; broken_buf[1]=c;
2340 } else if (c=='(' && broken_last != ESC
2341 && (input_mode==X0208 || input_mode==X0201)) { /* ) */
2344 if (c1=='J'|| c1=='B') {
2345 broken_buf[0]=c1; broken_buf[1]=c;
2363 if (broken_counter<2)
2364 broken_buf[broken_counter++]=c;
2368 static int prev_cr = 0;
2376 if (! (c2==0&&c1==NL) ) {
2382 } else if (c1=='\r') {
2384 } else if (c1=='\n') {
2385 if (crmode_f==CRLF) {
2386 (*o_crconv)(0,'\r');
2387 } else if (crmode_f==CR) {
2388 (*o_crconv)(0,'\r');
2392 } else if (c1!='\032' || crmode_f!=NL){
2398 Return value of fold_conv()
2400 \n add newline and output char
2401 \r add newline and output nothing
2404 1 (or else) normal output
2406 fold state in prev (previous character)
2408 >0x80 Japanese (X0208/X0201)
2413 This fold algorthm does not preserve heading space in a line.
2414 This is the main difference from fmt.
2417 #define char_size(c2,c1) (c2?2:1)
2426 if (c1== '\r' && !fold_preserve_f) {
2427 fold_state=0; /* ignore cr */
2428 }else if (c1== '\n'&&f_prev=='\r' && fold_preserve_f) {
2430 fold_state=0; /* ignore cr */
2431 } else if (c1== BS) {
2432 if (f_line>0) f_line--;
2434 } else if (c2==EOF && f_line != 0) { /* close open last line */
2436 } else if ((c1=='\n' && !fold_preserve_f)
2437 || ((c1=='\r'||(c1=='\n'&&f_prev!='\r'))
2438 && fold_preserve_f)) {
2440 if (fold_preserve_f) {
2444 } else if ((f_prev == c1 && !fold_preserve_f)
2445 || (f_prev == '\n' && fold_preserve_f)
2446 ) { /* duplicate newline */
2449 fold_state = '\n'; /* output two newline */
2455 if (f_prev&0x80) { /* Japanese? */
2457 fold_state = 0; /* ignore given single newline */
2458 } else if (f_prev==' ') {
2462 if (++f_line<=fold_len)
2466 fold_state = '\r'; /* fold and output nothing */
2470 } else if (c1=='\f') {
2475 fold_state = '\n'; /* output newline and clear */
2476 } else if ( (c2==0 && c1==' ')||
2477 (c2==0 && c1=='\t')||
2478 (c2=='!'&& c1=='!')) {
2479 /* X0208 kankaku or ascii space */
2480 if (f_prev == ' ') {
2481 fold_state = 0; /* remove duplicate spaces */
2484 if (++f_line<=fold_len)
2485 fold_state = ' '; /* output ASCII space only */
2487 f_prev = ' '; f_line = 0;
2488 fold_state = '\r'; /* fold and output nothing */
2492 prev0 = f_prev; /* we still need this one... , but almost done */
2494 if (c2 || c2==X0201)
2495 f_prev |= 0x80; /* this is Japanese */
2496 f_line += char_size(c2,c1);
2497 if (f_line<=fold_len) { /* normal case */
2500 if (f_line>=fold_len+fold_margin) { /* too many kinsou suspension */
2501 f_line = char_size(c2,c1);
2502 fold_state = '\n'; /* We can't wait, do fold now */
2503 } else if (c2==X0201) {
2504 /* simple kinsoku rules return 1 means no folding */
2505 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
2506 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
2507 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
2508 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
2509 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
2510 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
2511 else if (SPACE<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
2513 fold_state = '\n';/* add one new f_line before this character */
2516 fold_state = '\n';/* add one new f_line before this character */
2519 /* kinsoku point in ASCII */
2520 if ( c1==')'|| /* { [ ( */
2531 /* just after special */
2532 } else if (!is_alnum(prev0)) {
2533 f_line = char_size(c2,c1);
2535 } else if ((prev0==' ') || /* ignored new f_line */
2536 (prev0=='\n')|| /* ignored new f_line */
2537 (prev0&0x80)) { /* X0208 - ASCII */
2538 f_line = char_size(c2,c1);
2539 fold_state = '\n';/* add one new f_line before this character */
2541 fold_state = 1; /* default no fold in ASCII */
2545 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
2546 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
2547 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
2548 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
2549 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
2550 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
2551 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
2552 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
2553 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
2554 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
2555 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
2556 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
2557 /* default no fold in kinsoku */
2560 f_line = char_size(c2,c1);
2561 /* add one new f_line before this character */
2564 f_line = char_size(c2,c1);
2566 /* add one new f_line before this character */
2571 /* terminator process */
2572 switch(fold_state) {
2591 int z_prev2=0,z_prev1=0;
2598 /* if (c2) c1 &= 0x7f; assertion */
2600 if (x0201_f && z_prev2==X0201) { /* X0201 */
2601 if (c1==(0xde&0x7f)) { /*
\e$BByE@
\e(B */
2603 (*o_zconv)(dv[(z_prev1-SPACE)*2],dv[(z_prev1-SPACE)*2+1]);
2605 } else if (c1==(0xdf&0x7f)&&ev[(z_prev1-SPACE)*2]) { /*
\e$BH>ByE@
\e(B */
2607 (*o_zconv)(ev[(z_prev1-SPACE)*2],ev[(z_prev1-SPACE)*2+1]);
2611 (*o_zconv)(cv[(z_prev1-SPACE)*2],cv[(z_prev1-SPACE)*2+1]);
2620 if (x0201_f && c2==X0201) {
2621 if (dv[(c1-SPACE)*2]||ev[(c1-SPACE)*2]) {
2622 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
2623 z_prev1 = c1; z_prev2 = c2;
2626 (*o_zconv)(cv[(c1-SPACE)*2],cv[(c1-SPACE)*2+1]);
2631 /* JISX0208 Alphabet */
2632 if (alpha_f && c2 == 0x23 ) {
2634 } else if (alpha_f && c2 == 0x21 ) {
2635 /* JISX0208 Kigou */
2640 } else if (alpha_f&0x4) {
2645 } else if (0x20<c1 && c1<0x7f && fv[c1-0x20]) {
2651 case '>': entity = ">"; break;
2652 case '<': entity = "<"; break;
2653 case '\"': entity = """; break;
2654 case '&': entity = "&"; break;
2657 while (*entity) (*o_zconv)(0, *entity++);
2667 #define rot13(c) ( \
2669 (c <= 'M') ? (c + 13): \
2670 (c <= 'Z') ? (c - 13): \
2672 (c <= 'm') ? (c + 13): \
2673 (c <= 'z') ? (c - 13): \
2677 #define rot47(c) ( \
2679 ( c <= 'O' ) ? (c + 47) : \
2680 ( c <= '~' ) ? (c - 47) : \
2688 if (c2==0 || c2==X0201 || c2==ISO8859_1) {
2694 (*o_rot_conv)(c2,c1);
2701 if ((hira_f & 1) && c2==0x25 && 0x20<c1 && c1<0x74) {
2703 } else if ((hira_f & 2) && c2==0x24 && 0x20<c1 && c1<0x74) {
2706 (*o_hira_conv)(c2,c1);
2711 iso2022jp_check_conv(c2,c1)
2714 static int range[RANGE_NUM_MAX][2] = {
2737 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
2741 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
2746 for (i = 0; i < RANGE_NUM_MAX; i++) {
2747 start = range[i][0];
2750 if (c >= start && c <= end) {
2755 (*o_iso2022jp_check_conv)(c2,c1);
2759 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
2761 unsigned char *mime_pattern[] = {
2762 (unsigned char *)"\075?EUC-JP?B?",
2763 (unsigned char *)"\075?SHIFT_JIS?B?",
2764 (unsigned char *)"\075?ISO-8859-1?Q?",
2765 (unsigned char *)"\075?ISO-2022-JP?B?",
2766 (unsigned char *)"\075?ISO-2022-JP?Q?",
2767 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
2768 (unsigned char *)"\075?UTF-8?B?",
2773 int mime_encode[] = {
2774 JAPANESE_EUC, SHIFT_JIS,ISO8859_1, X0208, X0201,
2775 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
2781 int mime_encode_method[] = {
2782 'B', 'B','Q', 'B', 'Q',
2783 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
2790 #define MAXRECOVER 20
2792 /* I don't trust portablity of toupper */
2793 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
2794 #define nkf_isdigit(c) ('0'<=c && c<='9')
2795 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
2800 if (i_getc!=mime_getc) {
2801 i_mgetc = i_getc; i_getc = mime_getc;
2802 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2803 if(mime_f==STRICT_MIME) {
2804 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
2805 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
2811 unswitch_mime_getc()
2813 if(mime_f==STRICT_MIME) {
2814 i_mgetc = i_mgetc_buf;
2815 i_mungetc = i_mungetc_buf;
2818 i_ungetc = i_mungetc;
2822 mime_begin_strict(f)
2827 unsigned char *p,*q;
2828 int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
2830 mime_decode_mode = FALSE;
2831 /* =? has been checked */
2833 p = mime_pattern[j];
2836 for(i=2;p[i]>' ';i++) { /* start at =? */
2837 if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) {
2838 /* pattern fails, try next one */
2840 while ((p = mime_pattern[++j])) {
2841 for(k=2;k<i;k++) /* assume length(p) > i */
2842 if (p[k]!=q[k]) break;
2843 if (k==i && nkf_toupper(c1)==p[k]) break;
2845 if (p) continue; /* found next one, continue */
2846 /* all fails, output from recovery buffer */
2854 mime_decode_mode = p[i-2];
2855 if (mime_decode_mode=='B') {
2856 mimebuf_f = unbuf_f;
2858 /* do MIME integrity check */
2859 return mime_integrity(f,mime_pattern[j]);
2871 /* we don't keep eof of Fifo, becase it contains ?= as
2872 a terminator. It was checked in mime_integrity. */
2873 return ((mimebuf_f)?
2874 (*i_mgetc_buf)(f):Fifo(mime_input++));
2878 mime_ungetc_buf(c,f)
2883 (*i_mungetc_buf)(c,f);
2885 Fifo(--mime_input)=c;
2896 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
2897 /* re-read and convert again from mime_buffer. */
2899 /* =? has been checked */
2901 Fifo(mime_last++)='='; Fifo(mime_last++)='?';
2902 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
2903 /* We accept any character type even if it is breaked by new lines */
2904 c1 = (*i_getc)(f); Fifo(mime_last++)= c1 ;
2905 if (c1=='\n'||c1==' '||c1=='\r'||
2906 c1=='-'||c1=='_'||is_alnum(c1) ) continue;
2908 /* Failed. But this could be another MIME preemble */
2916 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
2917 if (!(++i<MAXRECOVER) || c1==EOF) break;
2918 if (c1=='b'||c1=='B') {
2919 mime_decode_mode = 'B';
2920 } else if (c1=='q'||c1=='Q') {
2921 mime_decode_mode = 'Q';
2925 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
2926 if (!(++i<MAXRECOVER) || c1==EOF) break;
2928 mime_decode_mode = FALSE;
2934 if (!mime_decode_mode) {
2935 /* false MIME premble, restart from mime_buffer */
2936 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
2937 /* Since we are in MIME mode until buffer becomes empty, */
2938 /* we never go into mime_begin again for a while. */
2941 /* discard mime preemble, and goto MIME mode */
2943 /* do no MIME integrity check */
2944 return c1; /* used only for checking EOF */
2969 if (nkf_isdigit(x)) return x - '0';
2970 return nkf_toupper(x) - 'A' + 10;
2973 #ifdef ANSI_C_PROTOTYPE
2974 int hex_getc(int ch, FILE *f, int (*g)(FILE *f), int (*u)(int c, FILE *f))
2977 hex_getc(ch, f, g, u)
2990 if (!nkf_isxdigit(c2)){
2995 if (!nkf_isxdigit(c3)){
3000 return (hex2bin(c2) << 4) | hex2bin(c3);
3007 return hex_getc(':', f, i_cgetc, i_cungetc);
3015 return (*i_cungetc)(c, f);
3022 return hex_getc('%', f, i_ugetc, i_uungetc);
3030 return (*i_uungetc)(c, f);
3037 int (*g)() = i_ngetc;
3038 int (*u)() = i_nungetc;
3049 if (buf[i] == 'x' || buf[i] == 'X'){
3050 for (j = 0; j < 5; j++){
3052 if (!nkf_isxdigit(buf[i])){
3059 c |= hex2bin(buf[i]);
3062 for (j = 0; j < 6; j++){
3066 if (!nkf_isdigit(buf[i])){
3073 c += hex2bin(buf[i]);
3084 w16w_conv(c, &c2, &c1, &c0);
3085 if (iconv == w_iconv){
3092 if (w2e_conv(c2, c1, c0, &c2, &c1) == 0){
3095 if (iconv == s_iconv){
3096 e2s_conv(c2, c1, &c2, &c1);
3111 numchar_ungetc(c, f)
3115 return (*i_nungetc)(c, f);
3124 int c1, c2, c3, c4, cc;
3125 int t1, t2, t3, t4, mode, exit_mode;
3127 if (mime_top != mime_last) { /* Something is in FIFO */
3128 return Fifo(mime_top++);
3130 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
3131 mime_decode_mode=FALSE;
3132 unswitch_mime_getc();
3133 return (*i_getc)(f);
3136 if (mimebuf_f == FIXED_MIME)
3137 exit_mode = mime_decode_mode;
3140 if (mime_decode_mode == 'Q') {
3141 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3143 if (c1=='_') return ' ';
3144 if (c1!='=' && c1!='?') {
3148 mime_decode_mode = exit_mode; /* prepare for quit */
3149 if (c1<=' ') return c1;
3150 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
3151 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
3152 /* end Q encoding */
3153 input_mode = exit_mode;
3154 while((c1=(*i_getc)(f))!=EOF && c1==SPACE
3155 /* && (c1==NL||c1==TAB||c1=='\r') */ ) ;
3158 if (c1=='='&&c2<' ') { /* this is soft wrap */
3159 while((c1 = (*i_mgetc)(f)) <=' ') {
3160 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3162 mime_decode_mode = 'Q'; /* still in MIME */
3163 goto restart_mime_q;
3166 mime_decode_mode = 'Q'; /* still in MIME */
3170 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
3171 if (c2<=' ') return c2;
3172 mime_decode_mode = 'Q'; /* still in MIME */
3173 #define hex(c) (('0'<=c&&c<='9')?(c-'0'):\
3174 ('A'<=c&&c<='F')?(c-'A'+10):('a'<=c&&c<='f')?(c-'a'+10):0)
3175 return ((hex(c2)<<4) + hex(c3));
3178 if (mime_decode_mode != 'B') {
3179 mime_decode_mode = FALSE;
3180 return (*i_mgetc)(f);
3184 /* Base64 encoding */
3186 MIME allows line break in the middle of
3187 Base64, but we are very pessimistic in decoding
3188 in unbuf mode because MIME encoded code may broken by
3189 less or editor's control sequence (such as ESC-[-K in unbuffered
3190 mode. ignore incomplete MIME.
3192 mode = mime_decode_mode;
3193 mime_decode_mode = exit_mode; /* prepare for quit */
3195 while ((c1 = (*i_mgetc)(f))<=' ') {
3200 if ((c2 = (*i_mgetc)(f))<=' ') {
3203 if (mime_f != STRICT_MIME) goto mime_c2_retry;
3204 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3207 if ((c1 == '?') && (c2 == '=')) {
3209 while((c1=(*i_getc)(f))!=EOF && c1==SPACE
3210 /* && (c1==NL||c1==TAB||c1=='\r') */ ) ;
3214 if ((c3 = (*i_mgetc)(f))<=' ') {
3217 if (mime_f != STRICT_MIME) goto mime_c3_retry;
3218 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3222 if ((c4 = (*i_mgetc)(f))<=' ') {
3225 if (mime_f != STRICT_MIME) goto mime_c4_retry;
3226 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3230 mime_decode_mode = mode; /* still in MIME sigh... */
3232 /* BASE 64 decoding */
3234 t1 = 0x3f & base64decode(c1);
3235 t2 = 0x3f & base64decode(c2);
3236 t3 = 0x3f & base64decode(c3);
3237 t4 = 0x3f & base64decode(c4);
3238 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
3240 Fifo(mime_last++) = cc;
3241 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
3243 Fifo(mime_last++) = cc;
3244 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
3246 Fifo(mime_last++) = cc;
3251 return Fifo(mime_top++);
3259 Fifo(--mime_top) = c;
3270 /* In buffered mode, read until =? or NL or buffer full
3272 mime_input = mime_top;
3273 mime_last = mime_top;
3274 while(*p) Fifo(mime_input++) = *p++;
3277 while((c=(*i_getc)(f))!=EOF) {
3278 if (((mime_input-mime_top)&MIME_BUF_MASK)==0) {
3279 break; /* buffer full */
3281 if (c=='=' && d=='?') {
3282 /* checked. skip header, start decode */
3283 Fifo(mime_input++) = c;
3284 /* mime_last_input = mime_input; */
3289 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
3291 /* Should we check length mod 4? */
3292 Fifo(mime_input++) = c;
3295 /* In case of Incomplete MIME, no MIME decode */
3296 Fifo(mime_input++) = c;
3297 mime_last = mime_input; /* point undecoded buffer */
3298 mime_decode_mode = 1; /* no decode on Fifo last in mime_getc */
3299 switch_mime_getc(); /* anyway we need buffered getc */
3310 i = c - 'A'; /* A..Z 0-25 */
3312 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
3314 } else if (c > '/') {
3315 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
3316 } else if (c == '+') {
3317 i = '>' /* 62 */ ; /* + 62 */
3319 i = '?' /* 63 */ ; /* / 63 */
3324 static char basis_64[] =
3325 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
3335 p = mime_pattern[0];
3336 for(i=0;mime_encode[i];i++) {
3337 if (mode == mime_encode[i]) {
3338 p = mime_pattern[i];
3342 mimeout_mode = mime_encode_method[i];
3344 /* (*o_mputc)(' '); */
3361 #define itoh4(c) (c>=10?c+'A'-10:c+'0')
3367 if (mimeout_f==FIXED_MIME) {
3368 if (base64_count>71) {
3376 if ( c<=DEL &&(output_mode==ASCII ||output_mode == ISO8859_1 )
3377 && mimeout_f!=FIXED_MIME) {
3378 if (mimeout_mode=='Q') {
3385 if (mimeout_mode!='B' || c!=SPACE) {
3394 } else if (!mimeout_mode && mimeout_f!=FIXED_MIME) {
3395 open_mime(output_mode);
3397 } else { /* c==EOF */
3398 switch(mimeout_mode) {
3403 (*o_mputc)(basis_64[((b64c & 0x3)<< 4)]);
3409 (*o_mputc)(basis_64[((b64c & 0xF) << 2)]);
3415 if (mimeout_f!=FIXED_MIME) {
3417 } else if (mimeout_mode != 'Q')
3422 switch(mimeout_mode) {
3426 (*o_mputc)(itoh4(((c>>4)&0xf)));
3427 (*o_mputc)(itoh4((c&0xf)));
3434 (*o_mputc)(basis_64[c>>2]);
3439 (*o_mputc)(basis_64[((b64c & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
3445 (*o_mputc)(basis_64[((b64c & 0xF) << 2) | ((c & 0xC0) >>6)]);
3446 (*o_mputc)(basis_64[c & 0x3F]);
3466 mime_f = STRICT_MIME;
3470 #if defined(MSDOS) || defined(__OS2__)
3475 iso2022jp_f = FALSE;
3477 kanji_intro = DEFAULT_J;
3478 ascii_intro = DEFAULT_R;
3480 output_conv = DEFAULT_CONV;
3481 oconv = DEFAULT_CONV;
3484 i_mungetc = std_ungetc;
3485 i_mgetc_buf = std_getc;
3486 i_mungetc_buf = std_ungetc;
3489 i_ungetc=std_ungetc;
3492 i_bungetc= std_ungetc;
3496 o_crconv = no_connection;
3497 o_rot_conv = no_connection;
3498 o_iso2022jp_check_conv = no_connection;
3499 o_hira_conv = no_connection;
3500 o_fconv = no_connection;
3501 o_zconv = no_connection;
3504 i_ungetc = std_ungetc;
3506 i_mungetc = std_ungetc;
3508 output_mode = ASCII;
3511 mime_decode_mode = FALSE;
3520 struct input_code *p = input_code_list;
3525 #ifdef UTF8_OUTPUT_ENABLE
3526 if (w_oconv16_begin_f) {
3527 w_oconv16_begin_f = 2;
3532 fold_preserve_f = FALSE;
3535 fold_margin = FOLD_MARGIN;
3538 z_prev2=0,z_prev1=0;
3544 no_connection(c2,c1)
3547 no_connection2(c2,c1,0);
3551 no_connection2(c2,c1,c0)
3554 fprintf(stderr,"nkf internal module connection failure.\n");
3562 fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
3563 fprintf(stderr,"Flags:\n");
3564 fprintf(stderr,"b,u Output is bufferred (DEFAULT),Output is unbufferred\n");
3565 #ifdef DEFAULT_CODE_SJIS
3566 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC), UTF-8\n");
3568 #ifdef DEFAULT_CODE_JIS
3569 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC), UTF-8\n");
3571 #ifdef DEFAULT_CODE_EUC
3572 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT), UTF-8\n");
3574 #ifdef DEFAULT_CODE_UTF8
3575 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC), UTF-8 (DEFAULT)\n");
3577 fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC), UTF-8\n");
3578 fprintf(stderr,"t no conversion\n");
3579 fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n");
3580 fprintf(stderr,"r {de/en}crypt ROT13/47\n");
3581 fprintf(stderr,"h 1 hirakana->katakana, 2 katakana->hirakana,3 both\n");
3582 fprintf(stderr,"v Show this usage. V: show version\n");
3583 fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
3584 fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
3585 fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
3586 fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
3587 fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces,\n");
3588 fprintf(stderr," 3: Convert HTML Entity\n");
3589 fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
3590 fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
3592 fprintf(stderr,"T Text mode output\n");
3594 fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
3595 fprintf(stderr,"d,c Delete \\r in line feed and \\032, Add \\r in line feed\n");
3596 fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n");
3597 fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
3598 fprintf(stderr,"long name options\n");
3599 fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n");
3600 fprintf(stderr," --jis,--euc,--sjis,--utf8,--utf16,--mime,--base64 convert for the code\n");
3601 fprintf(stderr," --help,--version\n");
3608 fprintf(stderr,"Network Kanji Filter Version %s (%s) "
3609 #if defined(MSDOS) && !defined(_Windows)
3612 #if !defined(__WIN32__) && defined(_Windows)
3615 #if defined(__WIN32__) && defined(_Windows)
3621 ,Version,Patchlevel);
3622 fprintf(stderr,"\n%s\n",CopyRight);
3627 **
\e$B%Q%C%A@):n<T
\e(B
3628 ** void@merope.pleiades.or.jp (Kusakabe Youichi)
3629 ** NIDE Naoyuki <nide@ics.nara-wu.ac.jp>
3630 ** ohta@src.ricoh.co.jp (Junn Ohta)
3631 ** inouet@strl.nhk.or.jp (Tomoyuki Inoue)
3632 ** kiri@pulser.win.or.jp (Tetsuaki Kiriyama)
3633 ** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp>
3634 ** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe)
3635 ** kono@ie.u-ryukyu.ac.jp (Shinji Kono)
3636 ** GHG00637@nifty-serve.or.jp (COW)