1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
5 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
6 ** Copyright (C) 1996,1998
8 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
9 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
10 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
11 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
13 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
14 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
15 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
16 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
17 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
18 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
20 ** Everyone is permitted to do anything on this program
21 ** including copying, modifying, improving,
22 ** as long as you don't try to pretend that you wrote it.
23 ** i.e., the above copyright notice has to appear in all copies.
24 ** Binary distribution requires original version messages.
25 ** You don't have to ask before copying, redistribution or publishing.
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
29 /***********************************************************************
30 ** UTF-8
\e$B%5%]!<%H$K$D$$$F
\e(B
31 **
\e$B=>Mh$N
\e(B nkf
\e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9
\e(B
32 ** nkf -e
\e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G
\e(B UTF-8
\e$B$HH=Dj$5$l$l$P!"
\e(B
33 **
\e$B$=$N$^$^
\e(B euc-jp
\e$B$KJQ49$5$l$^$9
\e(B
35 **
\e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#
\e(B
36 ** (
\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O
\e(B)
38 **
\e$B2?$+LdBj$r8+$D$1$?$i!"
\e(B
39 ** E-Mail: furukawa@tcp-ip.or.jp
40 **
\e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#
\e(B
41 ***********************************************************************/
44 static char *CopyRight =
45 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2003 Kono, Furukawa";
46 static char *Version =
48 static char *Patchlevel =
55 ** USAGE: nkf [flags] [file]
58 ** b Output is bufferred (DEFAULT)
59 ** u Output is unbufferred
63 ** j Outout code is JIS 7 bit (DEFAULT SELECT)
64 ** s Output code is MS Kanji (DEFAULT SELECT)
65 ** e Output code is AT&T JIS (DEFAULT SELECT)
66 ** w Output code is AT&T JIS (DEFAULT SELECT)
67 ** l Output code is JIS 7bit and ISO8859-1 Latin-1
69 ** m MIME conversion for ISO-2022-JP
70 ** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
71 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
72 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
73 ** M MIME output conversion
75 ** r {de/en}crypt ROT13/47
79 ** T Text mode output (for MS-DOS)
81 ** x Do not convert X0201 kana into X0208
82 ** Z Convert X0208 alphabet to ASCII
87 ** B try to fix broken JIS, missing Escape
88 ** B[1-9] broken level
90 ** O Output to 'nkf.out' file or last file name
91 ** d Delete \r in line feed
92 ** c Add \r in line feed
93 ** -- other long option
94 ** -- ignore following option (don't use with -O )
98 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C)) && !defined(MSDOS)
100 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
116 #if defined(MSDOS) || defined(__OS2__)
123 #define setbinmode(fp) fsetbin(fp)
124 #else /* Microsoft C, Turbo C */
125 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
127 #else /* UNIX,OS/2 */
128 #define setbinmode(fp)
131 #ifdef _IOFBF /* SysV and MSDOS, Windows */
132 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
134 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
137 /*Borland C++ 4.5 EasyWin*/
138 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
147 /* added by satoru@isoternet.org */
148 #include <sys/stat.h>
149 #ifndef MSDOS /* UNIX, OS/2 */
153 #if defined(_MSC_VER) /* VC++ */
154 #include <sys/utime.h>
155 #elif defined(__TURBOC__) /* BCC */
157 #elif defined(LSI_C) /* LSI C */
165 /* state of output_mode and input_mode
182 /* Input Assumption */
186 #define LATIN1_INPUT 6
188 #define STRICT_MIME 8
193 #define JAPANESE_EUC 10
197 #define UTF8_INPUT 13
198 #define UTF16_INPUT 14
199 #define UTF16BE_INPUT 15
217 #define is_alnum(c) \
218 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
220 #define HOLD_SIZE 1024
221 #define IOBUF_SIZE 16384
223 #define DEFAULT_J 'B'
224 #define DEFAULT_R 'B'
226 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
227 #define SJ6394 0x0161 /* 63 - 94 ku offset */
229 #define RANGE_NUM_MAX 18
234 #if defined( UTF8_OUTPUT_ENABLE ) || defined( UTF8_INPUT_ENABLE )
235 #define sizeof_euc_utf8 94
236 #define sizeof_euc_to_utf8_1byte 94
237 #define sizeof_euc_to_utf8_2bytes 94
238 #define sizeof_utf8_to_euc_C2 64
239 #define sizeof_utf8_to_euc_E5B8 64
240 #define sizeof_utf8_to_euc_2bytes 112
241 #define sizeof_utf8_to_euc_3bytes 112
244 /* MIME preprocessor */
247 #ifdef EASYWIN /*Easy Win */
248 extern POINT _BufferSize;
251 /* function prototype */
253 #ifdef ANSI_C_PROTOTYPE
255 #define STATIC static
267 void (*status_func)PROTO((struct input_code *, int));
268 int (*iconv_func)PROTO((int c2, int c1, int c0));
272 STATIC int noconvert PROTO((FILE *f));
273 STATIC int kanji_convert PROTO((FILE *f));
274 STATIC int h_conv PROTO((FILE *f,int c2,int c1));
275 STATIC int push_hold_buf PROTO((int c2));
276 STATIC void set_iconv PROTO((int f, int (*iconv_func)()));
277 STATIC int s_iconv PROTO((int c2,int c1,int c0));
278 STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
279 STATIC int e_iconv PROTO((int c2,int c1,int c0));
280 #ifdef UTF8_INPUT_ENABLE
281 STATIC int w2e_conv PROTO((int c2,int c1,int c0,int *p2,int *p1));
282 STATIC int w_iconv PROTO((int c2,int c1,int c0));
283 STATIC int w_iconv16 PROTO((int c2,int c1,int c0));
284 STATIC int w_iconv_common PROTO((int c1,int c0,unsigned short **pp,int psize,int *p2,int *p1));
286 #ifdef UTF8_OUTPUT_ENABLE
287 STATIC int e2w_conv PROTO((int c2,int c1));
288 STATIC void w_oconv PROTO((int c2,int c1));
289 STATIC void w_oconv16 PROTO((int c2,int c1));
291 STATIC void e_oconv PROTO((int c2,int c1));
292 STATIC void e2s_conv PROTO((int c2, int c1, int *p2, int *p1));
293 STATIC void s_oconv PROTO((int c2,int c1));
294 STATIC void j_oconv PROTO((int c2,int c1));
295 STATIC void fold_conv PROTO((int c2,int c1));
296 STATIC void cr_conv PROTO((int c2,int c1));
297 STATIC void z_conv PROTO((int c2,int c1));
298 STATIC void rot_conv PROTO((int c2,int c1));
299 STATIC void hira_conv PROTO((int c2,int c1));
300 STATIC void base64_conv PROTO((int c2,int c1));
301 STATIC void iso2022jp_check_conv PROTO((int c2,int c1));
302 STATIC void no_connection PROTO((int c2,int c1));
303 STATIC int no_connection2 PROTO((int c2,int c1,int c0));
305 STATIC void code_score PROTO((struct input_code *ptr));
306 STATIC void code_status PROTO((int c));
308 STATIC void std_putc PROTO((int c));
309 STATIC int std_getc PROTO((FILE *f));
310 STATIC int std_ungetc PROTO((int c,FILE *f));
312 STATIC int broken_getc PROTO((FILE *f));
313 STATIC int broken_ungetc PROTO((int c,FILE *f));
315 STATIC int mime_begin PROTO((FILE *f));
316 STATIC int mime_getc PROTO((FILE *f));
317 STATIC int mime_ungetc PROTO((int c,FILE *f));
319 STATIC int mime_begin_strict PROTO((FILE *f));
320 STATIC int mime_getc_buf PROTO((FILE *f));
321 STATIC int mime_ungetc_buf PROTO((int c,FILE *f));
322 STATIC int mime_integrity PROTO((FILE *f,unsigned char *p));
324 STATIC int base64decode PROTO((int c));
325 STATIC void mime_putc PROTO((int c));
326 STATIC void open_mime PROTO((int c));
327 STATIC void close_mime PROTO(());
328 STATIC void usage PROTO(());
329 STATIC void version PROTO(());
330 STATIC void options PROTO((unsigned char *c));
332 STATIC void reinit PROTO(());
337 static unsigned char stdibuf[IOBUF_SIZE];
338 static unsigned char stdobuf[IOBUF_SIZE];
339 static unsigned char hold_buf[HOLD_SIZE*2];
340 static int hold_count;
342 /* MIME preprocessor fifo */
344 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
345 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
346 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
347 static unsigned char mime_buf[MIME_BUF_SIZE];
348 static unsigned int mime_top = 0;
349 static unsigned int mime_last = 0; /* decoded */
350 static unsigned int mime_input = 0; /* undecoded */
353 static int unbuf_f = FALSE;
354 static int estab_f = FALSE;
355 static int nop_f = FALSE;
356 static int binmode_f = TRUE; /* binary mode */
357 static int rot_f = FALSE; /* rot14/43 mode */
358 static int hira_f = FALSE; /* hira/kata henkan */
359 static int input_f = FALSE; /* non fixed input code */
360 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
361 static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
362 static int mimebuf_f = FALSE; /* MIME buffered input */
363 static int broken_f = FALSE; /* convert ESC-less broken JIS */
364 static int iso8859_f = FALSE; /* ISO8859 through */
365 static int mimeout_f = FALSE; /* base64 mode */
366 #if defined(MSDOS) || defined(__OS2__)
367 static int x0201_f = TRUE; /* Assume JISX0201 kana */
369 static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
371 static int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
372 #ifdef UTF8_OUTPUT_ENABLE
373 static int w_oconv16_begin_f= 0; /* utf-16 header */
374 static int w_oconv16_LE = 0; /* utf-16 little endian */
379 static int cap_f = FALSE;
380 static int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */
381 static int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc;
382 STATIC int cap_getc PROTO((FILE *f));
383 STATIC int cap_ungetc PROTO((int c,FILE *f));
385 static int url_f = FALSE;
386 static int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
387 static int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc;
388 STATIC int url_getc PROTO((FILE *f));
389 STATIC int url_ungetc PROTO((int c,FILE *f));
391 static int numchar_f = FALSE;
392 static int (*i_ngetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
393 static int (*i_nungetc)PROTO((int c ,FILE *f)) = std_ungetc;
394 STATIC int numchar_getc PROTO((FILE *f));
395 STATIC int numchar_ungetc PROTO((int c,FILE *f));
399 static int noout_f = FALSE;
400 STATIC void no_putc PROTO((int c));
401 static int debug_f = FALSE;
402 STATIC void debug PROTO((char *str));
405 #ifdef SHIFTJIS_CP932
406 STATIC int cp932_f = FALSE;
407 #define CP932_TABLE_BEGIN (0xfa)
408 #define CP932_TABLE_END (0xfc)
410 #endif /* SHIFTJIS_CP932 */
412 STATIC void e_status PROTO((struct input_code *, int));
413 STATIC void s_status PROTO((struct input_code *, int));
415 #ifdef UTF8_INPUT_ENABLE
416 STATIC void w_status PROTO((struct input_code *, int));
417 STATIC void w16_status PROTO((struct input_code *, int));
418 static int utf16_mode = UTF16_INPUT;
421 struct input_code input_code_list[] = {
422 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
423 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
424 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
425 {"UTF-16", 0, 0, 0, {0, 0, 0}, w16_status, w_iconv16, 0},
429 static int mimeout_mode = 0;
430 static int base64_count = 0;
432 /* X0208 -> ASCII converter */
435 static int f_line = 0; /* chars in line */
436 static int f_prev = 0;
437 static int fold_preserve_f = FALSE; /* preserve new lines */
438 static int fold_f = FALSE;
439 static int fold_len = 0;
442 static unsigned char kanji_intro = DEFAULT_J,
443 ascii_intro = DEFAULT_R;
447 #define FOLD_MARGIN 10
448 #define DEFAULT_FOLD 60
450 static int fold_margin = FOLD_MARGIN;
454 #ifdef DEFAULT_CODE_JIS
455 # define DEFAULT_CONV j_oconv
457 #ifdef DEFAULT_CODE_SJIS
458 # define DEFAULT_CONV s_oconv
460 #ifdef DEFAULT_CODE_EUC
461 # define DEFAULT_CONV e_oconv
463 #ifdef DEFAULT_CODE_UTF8
464 # define DEFAULT_CONV w_oconv
467 /* process default */
468 static void (*output_conv)PROTO((int c2,int c1)) = DEFAULT_CONV;
470 static void (*oconv)PROTO((int c2,int c1)) = no_connection;
471 /* s_iconv or oconv */
472 static int (*iconv)PROTO((int c2,int c1,int c0)) = no_connection2;
474 static void (*o_zconv)PROTO((int c2,int c1)) = no_connection;
475 static void (*o_fconv)PROTO((int c2,int c1)) = no_connection;
476 static void (*o_crconv)PROTO((int c2,int c1)) = no_connection;
477 static void (*o_rot_conv)PROTO((int c2,int c1)) = no_connection;
478 static void (*o_hira_conv)PROTO((int c2,int c1)) = no_connection;
479 static void (*o_base64conv)PROTO((int c2,int c1)) = no_connection;
480 static void (*o_iso2022jp_check_conv)PROTO((int c2,int c1)) = no_connection;
482 /* static redirections */
484 static void (*o_putc)PROTO((int c)) = std_putc;
486 static int (*i_getc)PROTO((FILE *f)) = std_getc; /* general input */
487 static int (*i_ungetc)PROTO((int c,FILE *f)) =std_ungetc;
489 static int (*i_bgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
490 static int (*i_bungetc)PROTO((int c ,FILE *f)) = std_ungetc;
492 static void (*o_mputc)PROTO((int c)) = std_putc ; /* output of mputc */
494 static int (*i_mgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
495 static int (*i_mungetc)PROTO((int c ,FILE *f)) = std_ungetc;
497 /* for strict mime */
498 static int (*i_mgetc_buf)PROTO((FILE *)) = std_getc; /* input of mgetc_buf */
499 static int (*i_mungetc_buf)PROTO((int c,FILE *f)) = std_ungetc;
502 static int output_mode = ASCII, /* output kanji mode */
503 input_mode = ASCII, /* input kanji mode */
504 shift_mode = FALSE; /* TRUE shift out, or X0201 */
505 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
507 /* X0201 / X0208 conversion tables */
509 /* X0201 kana conversion table */
512 unsigned char cv[]= {
513 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
514 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
515 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
516 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
517 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
518 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
519 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
520 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
521 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
522 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
523 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
524 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
525 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
526 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
527 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
528 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
532 /* X0201 kana conversion table for daguten */
535 unsigned char dv[]= {
536 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
537 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
538 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
539 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
540 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
541 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
542 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
543 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
544 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
545 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
546 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
547 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
548 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
549 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
550 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
551 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
554 /* X0201 kana conversion table for han-daguten */
557 unsigned char ev[]= {
558 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
559 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
560 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
561 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
562 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
563 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
564 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
565 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
566 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
567 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
568 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
569 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
570 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
571 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
572 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
573 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
577 /* X0208 kigou conversion table */
578 /* 0x8140 - 0x819e */
580 unsigned char fv[] = {
582 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
583 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
584 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
585 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
586 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
587 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
588 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
589 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
590 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
591 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
592 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
593 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
599 static int file_out = FALSE;
601 static int overwrite = FALSE;
604 static int crmode_f = 0; /* CR, NL, CRLF */
605 #ifdef EASYWIN /*Easy Win */
606 static int end_check;
618 #ifdef EASYWIN /*Easy Win */
619 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
622 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
623 cp = (unsigned char *)*argv;
626 if(x0201_f == WISH_TRUE)
627 x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
629 if (binmode_f == TRUE)
631 if (freopen("","wb",stdout) == NULL)
638 setbuf(stdout, (char *) NULL);
640 setvbuffer(stdout, stdobuf, IOBUF_SIZE);
643 if (binmode_f == TRUE)
645 if (freopen("","rb",stdin) == NULL) return (-1);
649 setvbuffer(stdin, stdibuf, IOBUF_SIZE);
653 kanji_convert(stdin);
659 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
668 /* reopen file for stdout */
669 if (file_out == TRUE) {
672 outfname = malloc(strlen(origfname)
673 + strlen(".nkftmpXXXXXX")
679 strcpy(outfname, origfname);
683 for (i = strlen(outfname); i; --i){
684 if (outfname[i - 1] == '/'
685 || outfname[i - 1] == '\\'){
691 strcat(outfname, "ntXXXXXX");
693 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC,
696 strcat(outfname, ".nkftmpXXXXXX");
697 fd = mkstemp(outfname);
700 || (fd_backup = dup(fileno(stdout))) < 0
701 || dup2(fd, fileno(stdout)) < 0
712 outfname = "nkf.out";
715 if(freopen(outfname, "w", stdout) == NULL) {
719 if (binmode_f == TRUE) {
721 if (freopen("","wb",stdout) == NULL)
728 if (binmode_f == TRUE)
730 if (freopen("","rb",fin) == NULL)
735 setvbuffer(fin, stdibuf, IOBUF_SIZE);
752 if (dup2(fd_backup, fileno(stdout)) < 0){
755 if (stat(origfname, &sb)) {
756 fprintf(stderr, "Can't stat %s\n", origfname);
758 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
759 if (chmod(outfname, sb.st_mode)) {
760 fprintf(stderr, "Can't set permission %s\n", outfname);
763 tb[0] = tb[1] = sb.st_mtime;
764 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
765 if (utime(outfname, tb)) {
766 fprintf(stderr, "Can't set timestamp %s\n", outfname);
769 if (unlink(origfname)){
773 tb.actime = sb.st_atime;
774 tb.modtime = sb.st_mtime;
775 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
776 if (utime(outfname, &tb)) {
777 fprintf(stderr, "Can't set timestamp %s\n", outfname);
780 if (rename(outfname, origfname)) {
782 fprintf(stderr, "Can't rename %s to %s\n",
783 outfname, origfname);
791 #ifdef EASYWIN /*Easy Win */
792 if (file_out == FALSE)
793 scanf("%d",&end_check);
796 #else /* for Other OS */
797 if (file_out == TRUE)
827 {"katakana-hiragana","h3"},
828 #ifdef UTF8_OUTPUT_ENABLE
832 #ifdef UTF8_INPUT_ENABLE
834 {"utf16-input", "W16"},
842 {"numchar-input", ""},
848 #ifdef SHIFTJIS_CP932
853 static int option_mode;
868 case '-': /* literal options */
869 if (!*cp) { /* ignore the rest of arguments */
873 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
875 p = (unsigned char *)long_option[i].name;
876 for (j=0;*p && *p++ == cp[j];j++);
877 if (! *p && !cp[j]) break;
880 cp = (unsigned char *)long_option[i].alias;
883 if (strcmp(long_option[i].name, "overwrite") == 0){
890 if (strcmp(long_option[i].name, "cap-input") == 0){
894 if (strcmp(long_option[i].name, "url-input") == 0){
898 if (strcmp(long_option[i].name, "numchar-input") == 0){
904 if (strcmp(long_option[i].name, "no-output") == 0){
908 if (strcmp(long_option[i].name, "debug") == 0){
913 #ifdef SHIFTJIS_CP932
914 if (strcmp(long_option[i].name, "cp932") == 0){
921 case 'b': /* buffered mode */
924 case 'u': /* non bufferd mode */
927 case 't': /* transparent mode */
930 case 'j': /* JIS output */
932 output_conv = j_oconv;
934 case 'e': /* AT&T EUC output */
935 output_conv = e_oconv;
937 case 's': /* SJIS output */
938 output_conv = s_oconv;
940 case 'l': /* ISO8859 Latin-1 support, no conversion */
941 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
942 input_f = LATIN1_INPUT;
944 case 'i': /* Kanji IN ESC-$-@/B */
945 if (*cp=='@'||*cp=='B')
948 case 'o': /* ASCII IN ESC-(-J/B */
949 if (*cp=='J'||*cp=='B'||*cp=='H')
957 if ('9'>= *cp && *cp>='0')
958 hira_f |= (*cp++ -'0');
965 #if defined(MSDOS) || defined(__OS2__)
980 #ifdef UTF8_OUTPUT_ENABLE
981 case 'w': /* UTF-8 output */
982 if ('1'== cp[0] && '6'==cp[1]) {
983 output_conv = w_oconv16; cp+=2;
985 w_oconv16_begin_f=2; cp++;
988 w_oconv16_begin_f=1; cp++;
990 } else if (cp[0] == 'B') {
991 w_oconv16_begin_f=2; cp++;
993 w_oconv16_begin_f=1; cp++;
997 output_conv = w_oconv;
1000 #ifdef UTF8_INPUT_ENABLE
1001 case 'W': /* UTF-8 input */
1002 if ('1'== cp[0] && '6'==cp[1]) {
1003 input_f = UTF16_INPUT;
1005 input_f = UTF8_INPUT;
1008 /* Input code assumption */
1009 case 'J': /* JIS input */
1010 case 'E': /* AT&T EUC input */
1011 input_f = JIS_INPUT;
1013 case 'S': /* MS Kanji input */
1014 input_f = SJIS_INPUT;
1015 if (x0201_f==NO_X0201) x0201_f=TRUE;
1017 case 'Z': /* Convert X0208 alphabet to asii */
1018 /* bit:0 Convert X0208
1019 bit:1 Convert Kankaku to one space
1020 bit:2 Convert Kankaku to two spaces
1021 bit:3 Convert HTML Entity
1023 if ('9'>= *cp && *cp>='0')
1024 alpha_f |= 1<<(*cp++ -'0');
1028 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
1029 x0201_f = FALSE; /* No X0201->X0208 conversion */
1031 ESC-(-I in JIS, EUC, MS Kanji
1032 SI/SO in JIS, EUC, MS Kanji
1033 SSO in EUC, JIS, not in MS Kanji
1034 MS Kanji (0xa0-0xdf)
1036 ESC-(-I in JIS (0x20-0x5f)
1037 SSO in EUC (0xa0-0xdf)
1038 0xa0-0xd in MS Kanji (0xa0-0xdf)
1041 case 'X': /* Assume X0201 kana */
1042 /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1045 case 'F': /* prserve new lines */
1046 fold_preserve_f = TRUE;
1047 case 'f': /* folding -f60 or -f */
1050 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1052 fold_len += *cp++ - '0';
1054 if (!(0<fold_len && fold_len<BUFSIZ))
1055 fold_len = DEFAULT_FOLD;
1059 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1061 fold_margin += *cp++ - '0';
1065 case 'm': /* MIME support */
1066 if (*cp=='B'||*cp=='Q') {
1067 mime_decode_mode = *cp++;
1068 mimebuf_f = FIXED_MIME;
1069 } else if (*cp=='N') {
1070 mime_f = TRUE; cp++;
1071 } else if (*cp=='S') {
1072 mime_f = STRICT_MIME; cp++;
1073 } else if (*cp=='0') {
1074 mime_f = FALSE; cp++;
1077 case 'M': /* MIME output */
1080 mimeout_f = FIXED_MIME; cp++;
1081 } else if (*cp=='Q') {
1083 mimeout_f = FIXED_MIME; cp++;
1088 case 'B': /* Broken JIS support */
1090 bit:1 allow any x on ESC-(-x or ESC-$-x
1091 bit:2 reset to ascii on NL
1093 if ('9'>= *cp && *cp>='0')
1094 broken_f |= 1<<(*cp++ -'0');
1099 case 'O':/* for Output file */
1103 case 'c':/* add cr code */
1106 case 'd':/* delete cr code */
1109 case 'I': /* ISO-2022-JP output */
1112 case 'L': /* line mode */
1113 if (*cp=='u') { /* unix */
1114 crmode_f = NL; cp++;
1115 } else if (*cp=='m') { /* mac */
1116 crmode_f = CR; cp++;
1117 } else if (*cp=='w') { /* windows */
1118 crmode_f = CRLF; cp++;
1119 } else if (*cp=='0') { /* no conversion */
1124 /* module muliple options in a string are allowed for Perl moudle */
1125 while(*cp && *cp!='-') cp++;
1129 /* bogus option but ignored */
1135 #ifdef ANSI_C_PROTOTYPE
1136 void set_iconv(int f, int (*iconv_func)(int c2,int c1,int c0))
1138 void set_iconv(f, iconv_func)
1140 int (*iconv_func)();
1144 static int (*iconv_for_check)() = 0;
1146 #ifdef INPUT_CODE_FIX
1154 #ifdef INPUT_CODE_FIX
1155 && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1161 if (estab_f && iconv_for_check != iconv){
1162 #ifdef UTF8_INPUT_ENABLE
1163 if (iconv == w_iconv) debug("UTF-8\n");
1164 if (iconv == w_iconv16) debug("UTF-16\n");
1166 if (iconv == s_iconv) debug("Shift_JIS\n");
1167 if (iconv == e_iconv) debug("EUC-JP\n");
1168 iconv_for_check = iconv;
1173 #define SCORE_KANA (1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
1174 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
1175 #ifdef SHIFTJIS_CP932
1176 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932
\e$B$K$h$kFI$_49$(
\e(B */
1177 #define SCORE_NO_EXIST (SCORE_CP932 << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1179 #define SCORE_NO_EXIST (SCORE_DEPEND << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1181 #define SCORE_ERROR (SCORE_NO_EXIST << 1) /*
\e$B%(%i!<
\e(B */
1182 int score_table_A0[] = {
1185 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1186 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1189 int score_table_F0[] = {
1191 0, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1192 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1193 SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1196 void set_code_score(ptr, score)
1197 struct input_code *ptr;
1200 ptr->score |= score;
1203 void code_score(ptr)
1204 struct input_code *ptr;
1206 int c2 = ptr->buf[0];
1207 int c1 = ptr->buf[1];
1209 set_code_score(ptr, SCORE_ERROR);
1210 }else if ((c2 & 0xf0) == 0xa0){
1211 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1212 }else if ((c2 & 0xf0) == 0xf0){
1213 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1214 }else if (c2 == SSO){
1215 set_code_score(ptr, SCORE_KANA);
1217 #ifdef UTF8_OUTPUT_ENABLE
1218 else if (!e2w_conv(c2, c1)){
1219 set_code_score(ptr, SCORE_NO_EXIST);
1224 void status_disable(ptr)
1225 struct input_code *ptr;
1230 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1233 void status_push_ch(ptr, c)
1234 struct input_code *ptr;
1237 ptr->buf[ptr->index++] = c;
1240 void status_reset(ptr)
1241 struct input_code *ptr;
1248 void status_reinit(ptr)
1249 struct input_code *ptr;
1252 ptr->_file_stat = 0;
1255 void status_check(ptr, c)
1256 struct input_code *ptr;
1259 if (c <= DEL && estab_f){
1264 void s_status(ptr, c)
1265 struct input_code *ptr;
1270 status_check(ptr, c);
1275 }else if (0xa1 <= c && c <= 0xdf){
1276 status_push_ch(ptr, SSO);
1277 status_push_ch(ptr, c);
1280 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
1282 status_push_ch(ptr, c);
1283 #ifdef SHIFTJIS_CP932
1285 && CP932_TABLE_BEGIN <= c && c <= CP932_TABLE_END){
1287 status_push_ch(ptr, c);
1288 #endif /* SHIFTJIS_CP932 */
1290 status_disable(ptr);
1294 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1295 status_push_ch(ptr, c);
1296 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
1300 status_disable(ptr);
1303 #ifdef SHIFTJIS_CP932
1305 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1306 status_push_ch(ptr, c);
1307 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
1308 set_code_score(ptr, SCORE_CP932);
1313 status_disable(ptr);
1315 #endif /* SHIFTJIS_CP932 */
1319 void e_status(ptr, c)
1320 struct input_code *ptr;
1325 status_check(ptr, c);
1330 }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
1332 status_push_ch(ptr, c);
1334 status_disable(ptr);
1338 if (0xa1 <= c && c <= 0xfe){
1339 status_push_ch(ptr, c);
1343 status_disable(ptr);
1349 #ifdef UTF8_INPUT_ENABLE
1350 void w16_status(ptr, c)
1351 struct input_code *ptr;
1358 if (ptr->_file_stat == 0){
1359 if (c == 0xfe || c == 0xff){
1361 status_push_ch(ptr, c);
1362 ptr->_file_stat = 1;
1364 status_disable(ptr);
1365 ptr->_file_stat = -1;
1367 }else if (ptr->_file_stat > 0){
1369 status_push_ch(ptr, c);
1370 }else if (ptr->_file_stat < 0){
1371 status_disable(ptr);
1377 status_disable(ptr);
1378 ptr->_file_stat = -1;
1380 status_push_ch(ptr, c);
1387 if (ptr->stat != c && (c == 0xfe || c == 0xff)){
1388 status_push_ch(ptr, c);
1391 status_disable(ptr);
1392 ptr->_file_stat = -1;
1398 void w_status(ptr, c)
1399 struct input_code *ptr;
1404 status_check(ptr, c);
1409 }else if (0xc0 <= c && c <= 0xdf){
1411 status_push_ch(ptr, c);
1412 }else if (0xe0 <= c && c <= 0xef){
1414 status_push_ch(ptr, c);
1416 status_disable(ptr);
1421 if (0x80 <= c && c <= 0xbf){
1422 status_push_ch(ptr, c);
1423 if (ptr->index > ptr->stat){
1424 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
1425 &ptr->buf[0], &ptr->buf[1]);
1430 status_disable(ptr);
1441 int action_flag = 1;
1442 struct input_code *result = 0;
1443 struct input_code *p = input_code_list;
1445 (p->status_func)(p, c);
1448 }else if(p->stat == 0){
1459 if (result && !estab_f){
1460 set_iconv(TRUE, result->iconv_func);
1461 }else if (c <= DEL){
1462 struct input_code *ptr = input_code_list;
1472 #define STD_GC_BUFSIZE (256)
1473 int std_gc_buf[STD_GC_BUFSIZE];
1483 return std_gc_buf[--std_gc_ndx];
1495 if (std_gc_ndx == STD_GC_BUFSIZE){
1498 std_gc_buf[std_gc_ndx++] = c;
1518 while ((c = (*i_getc)(f)) != EOF)
1527 oconv = output_conv;
1530 /* replace continucation module, from output side */
1532 /* output redicrection */
1541 if (mimeout_f == TRUE) {
1542 o_base64conv = oconv; oconv = base64_conv;
1544 /* base64_count = 0; */
1548 o_crconv = oconv; oconv = cr_conv;
1551 o_rot_conv = oconv; oconv = rot_conv;
1554 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
1557 o_hira_conv = oconv; oconv = hira_conv;
1560 o_fconv = oconv; oconv = fold_conv;
1563 if (alpha_f || x0201_f) {
1564 o_zconv = oconv; oconv = z_conv;
1568 /* input redicrection */
1571 i_cgetc = i_getc; i_getc = cap_getc;
1572 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
1575 i_ugetc = i_getc; i_getc = url_getc;
1576 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
1579 i_ngetc = i_getc; i_getc = numchar_getc;
1580 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
1583 if (mime_f && mimebuf_f==FIXED_MIME) {
1584 i_mgetc = i_getc; i_getc = mime_getc;
1585 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
1588 i_bgetc = i_getc; i_getc = broken_getc;
1589 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
1591 if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
1592 set_iconv(-TRUE, e_iconv);
1593 } else if (input_f == SJIS_INPUT) {
1594 set_iconv(-TRUE, s_iconv);
1595 #ifdef UTF8_INPUT_ENABLE
1596 } else if (input_f == UTF8_INPUT) {
1597 set_iconv(-TRUE, w_iconv);
1598 } else if (input_f == UTF16_INPUT) {
1599 set_iconv(-TRUE, w_iconv16);
1602 set_iconv(FALSE, e_iconv);
1606 struct input_code *p = input_code_list;
1614 Conversion main loop. Code detection only.
1624 module_connection();
1629 output_mode = ASCII;
1632 #define NEXT continue /* no output, get next */
1633 #define SEND ; /* output c1 and c2, get next */
1634 #define LAST break /* end of loop, go closing */
1636 while ((c1 = (*i_getc)(f)) != EOF) {
1641 /* in case of 8th bit is on */
1643 /* in case of not established yet */
1644 /* It is still ambiguious */
1645 if (h_conv(f, c2, c1)==EOF)
1651 /* in case of already established */
1653 /* ignore bogus code */
1659 /* second byte, 7 bit code */
1660 /* it might be kanji shitfted */
1661 if ((c1 == DEL) || (c1 <= SPACE)) {
1662 /* ignore bogus first code */
1670 #ifdef UTF8_INPUT_ENABLE
1679 } else if (c1 > DEL) {
1681 if (!estab_f && !iso8859_f) {
1682 /* not established yet */
1685 } else { /* estab_f==TRUE */
1690 } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
1691 /* SJIS X0201 Case... */
1692 if(iso2022jp_f && x0201_f==NO_X0201) {
1693 (*oconv)(GETA1, GETA2);
1700 } else if (c1==SSO && iconv != s_iconv) {
1701 /* EUC X0201 Case */
1702 c1 = (*i_getc)(f); /* skip SSO */
1704 if (SSP<=c1 && c1<0xe0) {
1705 if(iso2022jp_f && x0201_f==NO_X0201) {
1706 (*oconv)(GETA1, GETA2);
1713 } else { /* bogus code, skip SSO and one byte */
1717 /* already established */
1722 } else if ((c1 > SPACE) && (c1 != DEL)) {
1723 /* in case of Roman characters */
1725 /* output 1 shifted byte */
1729 } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
1730 /* output 1 shifted byte */
1731 if(iso2022jp_f && x0201_f==NO_X0201) {
1732 (*oconv)(GETA1, GETA2);
1739 /* look like bogus code */
1742 } else if (input_mode == X0208) {
1743 /* in case of Kanji shifted */
1746 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
1747 /* Check MIME code */
1748 if ((c1 = (*i_getc)(f)) == EOF) {
1751 } else if (c1 == '?') {
1752 /* =? is mime conversion start sequence */
1753 if(mime_f == STRICT_MIME) {
1754 /* check in real detail */
1755 if (mime_begin_strict(f) == EOF)
1759 } else if (mime_begin(f) == EOF)
1769 /* normal ASCII code */
1772 } else if (c1 == SI) {
1775 } else if (c1 == SO) {
1778 } else if (c1 == ESC ) {
1779 if ((c1 = (*i_getc)(f)) == EOF) {
1780 /* (*oconv)(0, ESC); don't send bogus code */
1782 } else if (c1 == '$') {
1783 if ((c1 = (*i_getc)(f)) == EOF) {
1785 (*oconv)(0, ESC); don't send bogus code
1786 (*oconv)(0, '$'); */
1788 } else if (c1 == '@'|| c1 == 'B') {
1789 /* This is kanji introduction */
1793 } else if (c1 == '(') {
1794 if ((c1 = (*i_getc)(f)) == EOF) {
1795 /* don't send bogus code
1801 } else if (c1 == '@'|| c1 == 'B') {
1802 /* This is kanji introduction */
1807 /* could be some special code */
1814 } else if (broken_f&0x2) {
1815 /* accept any ESC-(-x as broken code ... */
1825 } else if (c1 == '(') {
1826 if ((c1 = (*i_getc)(f)) == EOF) {
1827 /* don't send bogus code
1829 (*oconv)(0, '('); */
1833 /* This is X0201 kana introduction */
1834 input_mode = X0201; shift_mode = X0201;
1836 } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
1837 /* This is X0208 kanji introduction */
1838 input_mode = ASCII; shift_mode = FALSE;
1840 } else if (broken_f&0x2) {
1841 input_mode = ASCII; shift_mode = FALSE;
1846 /* maintain various input_mode here */
1850 } else if ( c1 == 'N' || c1 == 'n' ){
1852 c1 = (*i_getc)(f); /* skip SS2 */
1853 if ( SPACE<=c1 && c1 < 0xe0 ) {
1862 } else if ((c1 == NL || c1 == CR) && broken_f&4) {
1863 input_mode = ASCII; set_iconv(FALSE, 0);
1869 if (input_mode == X0208)
1870 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
1871 else if (input_mode)
1872 (*oconv)(input_mode, c1); /* other special case */
1873 else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */
1874 int c0 = (*i_getc)(f);
1877 (*iconv)(c2, c1, c0);
1883 /* goto next_word */
1887 (*iconv)(EOF, 0, 0);
1900 /** it must NOT be in the kanji shifte sequence */
1901 /** it must NOT be written in JIS7 */
1902 /** and it must be after 2 byte 8bit code */
1909 while ((c1 = (*i_getc)(f)) != EOF) {
1915 if (push_hold_buf(c1) == EOF || estab_f){
1921 struct input_code *p = input_code_list;
1922 struct input_code *result = p;
1924 if (p->score < result->score){
1929 set_iconv(FALSE, p->iconv_func);
1934 ** 1) EOF is detected, or
1935 ** 2) Code is established, or
1936 ** 3) Buffer is FULL (but last word is pushed)
1938 ** in 1) and 3) cases, we continue to use
1939 ** Kanji codes by oconv and leave estab_f unchanged.
1943 while (wc < hold_count){
1944 c2 = hold_buf[wc++];
1948 }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
1949 (*iconv)(X0201, c2, 0);
1952 if (wc < hold_count){
1953 c1 = hold_buf[wc++];
1956 if (c1 == EOF) break;
1959 if ((*iconv)(c2, c1, 0) < 0){
1961 if (wc < hold_count){
1962 c0 = hold_buf[wc++];
1965 if (c0 == EOF) break;
1968 (*iconv)(c2, c1, c0);
1982 if (hold_count >= HOLD_SIZE*2)
1984 hold_buf[hold_count++] = c2;
1985 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
1988 int s2e_conv(c2, c1, p2, p1)
1992 #ifdef SHIFTJIS_CP932
1993 if (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){
1994 extern unsigned short shiftjis_cp932[3][189];
1995 c1 = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
1996 if (c1 == 0) return 1;
2000 #endif /* SHIFTJIS_CP932 */
2001 c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
2003 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
2020 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2023 int ret = s2e_conv(c2, c1, &c2, &c1);
2024 if (ret) return ret;
2037 } else if (c2 == SSO){
2040 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2050 #ifdef UTF8_INPUT_ENABLE
2052 w2e_conv(c2, c1, c0, p2, p1)
2056 extern unsigned short * utf8_to_euc_2bytes[];
2057 extern unsigned short ** utf8_to_euc_3bytes[];
2059 if (0xc0 <= c2 && c2 <= 0xef) {
2060 unsigned short **pp;
2063 if (c0 == 0) return -1;
2064 pp = utf8_to_euc_3bytes[c2 - 0x80];
2065 return w_iconv_common(c1, c0, pp, sizeof_utf8_to_euc_C2, p2, p1);
2067 return w_iconv_common(c2, c1, utf8_to_euc_2bytes, sizeof_utf8_to_euc_2bytes, p2, p1);
2069 } else if (c2 == X0201) {
2082 int ret = w2e_conv(c2, c1, c0, &c2, &c1);
2090 w16w_conv(val, p2, p1, p0)
2098 }else if (val < 0x800){
2099 *p2 = 0xc0 | (val >> 6);
2100 *p1 = 0x80 | (val & 0x3f);
2103 *p2 = 0xe0 | (val >> 12);
2104 *p1 = 0x80 | ((val >> 6) & 0x3f);
2105 *p0 = 0x80 | (val & 0x3f);
2110 w16e_conv(val, p2, p1)
2114 extern unsigned short * utf8_to_euc_2bytes[];
2115 extern unsigned short ** utf8_to_euc_3bytes[];
2117 unsigned short **pp;
2120 w16w_conv(val, &c2, &c1, &c0);
2123 pp = utf8_to_euc_3bytes[c2 - 0x80];
2124 psize = sizeof_utf8_to_euc_C2;
2126 pp = utf8_to_euc_2bytes;
2127 psize = sizeof_utf8_to_euc_2bytes;
2129 return w_iconv_common(c1, c0, pp, psize, p2, p1);
2135 w_iconv16(c2, c1, c0)
2140 if (c2==0376 && c1==0377){
2141 utf16_mode = UTF16_INPUT;
2143 } else if (c2==0377 && c1==0376){
2144 utf16_mode = UTF16BE_INPUT;
2147 if (utf16_mode == UTF16BE_INPUT) {
2149 tmp=c1; c1=c2; c2=tmp;
2151 if (c2==0 || c2==EOF) {
2155 ret = w16e_conv(((c2<<8)&0xff00) + c1, &c2, &c1);
2156 if (ret) return ret;
2162 w_iconv_common(c1, c0, pp, psize, p2, p1)
2164 unsigned short **pp;
2172 if (pp == 0) return 1;
2175 if (c1 < 0 || psize <= c1) return 1;
2177 if (p == 0) return 1;
2180 if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1;
2182 if (val == 0) return 1;
2185 if (c2 == SO) c2 = X0201;
2194 #ifdef UTF8_OUTPUT_ENABLE
2199 extern unsigned short euc_to_utf8_1byte[];
2200 extern unsigned short * euc_to_utf8_2bytes[];
2204 p = euc_to_utf8_1byte;
2207 c2 = (c2&0x7f) - 0x21;
2208 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2209 p = euc_to_utf8_2bytes[c2];
2214 c1 = (c1 & 0x7f) - 0x21;
2215 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
2228 } else if (c2 == 0) {
2229 output_mode = ASCII;
2231 } else if (c2 == ISO8859_1) {
2232 output_mode = ISO8859_1;
2233 (*o_putc)(c1 | 0x080);
2235 unsigned short val = (unsigned short)e2w_conv(c2, c1);
2238 if (0 < val && val < 0x80){
2240 }else if (val < 0x800){
2241 (*o_putc)(0xc0 | (val >> 6));
2242 (*o_putc)(0x80 | (val & 0x3f));
2244 (*o_putc)(0xe0 | (val >> 12));
2245 (*o_putc)(0x80 | ((val >> 6) & 0x3f));
2246 (*o_putc)(0x80 | (val & 0x3f));
2261 if (w_oconv16_begin_f==2) {
2263 (*o_putc)((unsigned char)'\377');
2267 (*o_putc)((unsigned char)'\377');
2269 w_oconv16_begin_f=1;
2272 if (c2 == ISO8859_1) {
2276 unsigned short val = (unsigned short)e2w_conv(c2, c1);
2277 c2 = (val >> 8) & 0xff;
2299 } else if (c2 == 0) {
2300 output_mode = ASCII;
2302 } else if (c2 == X0201) {
2303 output_mode = JAPANESE_EUC;
2304 (*o_putc)(SSO); (*o_putc)(c1|0x80);
2305 } else if (c2 == ISO8859_1) {
2306 output_mode = ISO8859_1;
2307 (*o_putc)(c1 | 0x080);
2309 if ((c1<0x20 || 0x7e<c1) ||
2310 (c2<0x20 || 0x7e<c2)) {
2311 set_iconv(FALSE, 0);
2312 return; /* too late to rescue this char */
2314 output_mode = JAPANESE_EUC;
2315 (*o_putc)(c2 | 0x080);
2316 (*o_putc)(c1 | 0x080);
2321 e2s_conv(c2, c1, p2, p1)
2322 int c2, c1, *p2, *p1;
2324 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
2325 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
2336 } else if (c2 == 0) {
2337 output_mode = ASCII;
2339 } else if (c2 == X0201) {
2340 output_mode = SHIFT_JIS;
2342 } else if (c2 == ISO8859_1) {
2343 output_mode = ISO8859_1;
2344 (*o_putc)(c1 | 0x080);
2346 if ((c1<0x20 || 0x7e<c1) ||
2347 (c2<0x20 || 0x7e<c2)) {
2348 set_iconv(FALSE, 0);
2349 return; /* too late to rescue this char */
2351 output_mode = SHIFT_JIS;
2352 e2s_conv(c2, c1, &c2, &c1);
2364 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2367 (*o_putc)(ascii_intro);
2368 output_mode = ASCII;
2371 } else if (c2==X0201) {
2372 if (output_mode!=X0201) {
2373 output_mode = X0201;
2379 } else if (c2==ISO8859_1) {
2380 /* iso8859 introduction, or 8th bit on */
2381 /* Can we convert in 7bit form using ESC-'-'-A ?
2383 output_mode = ISO8859_1;
2385 } else if (c2 == 0) {
2386 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2389 (*o_putc)(ascii_intro);
2390 output_mode = ASCII;
2394 if (output_mode != X0208) {
2395 output_mode = X0208;
2398 (*o_putc)(kanji_intro);
2400 if (c1<0x20 || 0x7e<c1)
2402 if (c2<0x20 || 0x7e<c2)
2414 if (base64_count>50 && !mimeout_mode && c2==0 && c1==SPACE) {
2416 } else if (base64_count>66 && mimeout_mode) {
2417 (*o_base64conv)(EOF,0);
2419 (*o_putc)('\t'); base64_count += 7;
2421 (*o_base64conv)(c2,c1);
2425 static int broken_buf[3];
2426 static int broken_counter = 0;
2427 static int broken_last = 0;
2434 if (broken_counter>0) {
2435 return broken_buf[--broken_counter];
2438 if (c=='$' && broken_last != ESC
2439 && (input_mode==ASCII || input_mode==X0201)) {
2442 if (c1=='@'|| c1=='B') {
2443 broken_buf[0]=c1; broken_buf[1]=c;
2450 } else if (c=='(' && broken_last != ESC
2451 && (input_mode==X0208 || input_mode==X0201)) { /* ) */
2454 if (c1=='J'|| c1=='B') {
2455 broken_buf[0]=c1; broken_buf[1]=c;
2473 if (broken_counter<2)
2474 broken_buf[broken_counter++]=c;
2478 static int prev_cr = 0;
2486 if (! (c2==0&&c1==NL) ) {
2492 } else if (c1=='\r') {
2494 } else if (c1=='\n') {
2495 if (crmode_f==CRLF) {
2496 (*o_crconv)(0,'\r');
2497 } else if (crmode_f==CR) {
2498 (*o_crconv)(0,'\r');
2502 } else if (c1!='\032' || crmode_f!=NL){
2508 Return value of fold_conv()
2510 \n add newline and output char
2511 \r add newline and output nothing
2514 1 (or else) normal output
2516 fold state in prev (previous character)
2518 >0x80 Japanese (X0208/X0201)
2523 This fold algorthm does not preserve heading space in a line.
2524 This is the main difference from fmt.
2527 #define char_size(c2,c1) (c2?2:1)
2536 if (c1== '\r' && !fold_preserve_f) {
2537 fold_state=0; /* ignore cr */
2538 }else if (c1== '\n'&&f_prev=='\r' && fold_preserve_f) {
2540 fold_state=0; /* ignore cr */
2541 } else if (c1== BS) {
2542 if (f_line>0) f_line--;
2544 } else if (c2==EOF && f_line != 0) { /* close open last line */
2546 } else if ((c1=='\n' && !fold_preserve_f)
2547 || ((c1=='\r'||(c1=='\n'&&f_prev!='\r'))
2548 && fold_preserve_f)) {
2550 if (fold_preserve_f) {
2554 } else if ((f_prev == c1 && !fold_preserve_f)
2555 || (f_prev == '\n' && fold_preserve_f)
2556 ) { /* duplicate newline */
2559 fold_state = '\n'; /* output two newline */
2565 if (f_prev&0x80) { /* Japanese? */
2567 fold_state = 0; /* ignore given single newline */
2568 } else if (f_prev==' ') {
2572 if (++f_line<=fold_len)
2576 fold_state = '\r'; /* fold and output nothing */
2580 } else if (c1=='\f') {
2585 fold_state = '\n'; /* output newline and clear */
2586 } else if ( (c2==0 && c1==' ')||
2587 (c2==0 && c1=='\t')||
2588 (c2=='!'&& c1=='!')) {
2589 /* X0208 kankaku or ascii space */
2590 if (f_prev == ' ') {
2591 fold_state = 0; /* remove duplicate spaces */
2594 if (++f_line<=fold_len)
2595 fold_state = ' '; /* output ASCII space only */
2597 f_prev = ' '; f_line = 0;
2598 fold_state = '\r'; /* fold and output nothing */
2602 prev0 = f_prev; /* we still need this one... , but almost done */
2604 if (c2 || c2==X0201)
2605 f_prev |= 0x80; /* this is Japanese */
2606 f_line += char_size(c2,c1);
2607 if (f_line<=fold_len) { /* normal case */
2610 if (f_line>=fold_len+fold_margin) { /* too many kinsou suspension */
2611 f_line = char_size(c2,c1);
2612 fold_state = '\n'; /* We can't wait, do fold now */
2613 } else if (c2==X0201) {
2614 /* simple kinsoku rules return 1 means no folding */
2615 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
2616 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
2617 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
2618 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
2619 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
2620 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
2621 else if (SPACE<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
2623 fold_state = '\n';/* add one new f_line before this character */
2626 fold_state = '\n';/* add one new f_line before this character */
2629 /* kinsoku point in ASCII */
2630 if ( c1==')'|| /* { [ ( */
2641 /* just after special */
2642 } else if (!is_alnum(prev0)) {
2643 f_line = char_size(c2,c1);
2645 } else if ((prev0==' ') || /* ignored new f_line */
2646 (prev0=='\n')|| /* ignored new f_line */
2647 (prev0&0x80)) { /* X0208 - ASCII */
2648 f_line = char_size(c2,c1);
2649 fold_state = '\n';/* add one new f_line before this character */
2651 fold_state = 1; /* default no fold in ASCII */
2655 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
2656 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
2657 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
2658 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
2659 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
2660 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
2661 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
2662 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
2663 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
2664 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
2665 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
2666 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
2667 /* default no fold in kinsoku */
2670 f_line = char_size(c2,c1);
2671 /* add one new f_line before this character */
2674 f_line = char_size(c2,c1);
2676 /* add one new f_line before this character */
2681 /* terminator process */
2682 switch(fold_state) {
2701 int z_prev2=0,z_prev1=0;
2708 /* if (c2) c1 &= 0x7f; assertion */
2710 if (x0201_f && z_prev2==X0201) { /* X0201 */
2711 if (c1==(0xde&0x7f)) { /*
\e$BByE@
\e(B */
2713 (*o_zconv)(dv[(z_prev1-SPACE)*2],dv[(z_prev1-SPACE)*2+1]);
2715 } else if (c1==(0xdf&0x7f)&&ev[(z_prev1-SPACE)*2]) { /*
\e$BH>ByE@
\e(B */
2717 (*o_zconv)(ev[(z_prev1-SPACE)*2],ev[(z_prev1-SPACE)*2+1]);
2721 (*o_zconv)(cv[(z_prev1-SPACE)*2],cv[(z_prev1-SPACE)*2+1]);
2730 if (x0201_f && c2==X0201) {
2731 if (dv[(c1-SPACE)*2]||ev[(c1-SPACE)*2]) {
2732 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
2733 z_prev1 = c1; z_prev2 = c2;
2736 (*o_zconv)(cv[(c1-SPACE)*2],cv[(c1-SPACE)*2+1]);
2741 /* JISX0208 Alphabet */
2742 if (alpha_f && c2 == 0x23 ) {
2744 } else if (alpha_f && c2 == 0x21 ) {
2745 /* JISX0208 Kigou */
2750 } else if (alpha_f&0x4) {
2755 } else if (0x20<c1 && c1<0x7f && fv[c1-0x20]) {
2761 case '>': entity = ">"; break;
2762 case '<': entity = "<"; break;
2763 case '\"': entity = """; break;
2764 case '&': entity = "&"; break;
2767 while (*entity) (*o_zconv)(0, *entity++);
2777 #define rot13(c) ( \
2779 (c <= 'M') ? (c + 13): \
2780 (c <= 'Z') ? (c - 13): \
2782 (c <= 'm') ? (c + 13): \
2783 (c <= 'z') ? (c - 13): \
2787 #define rot47(c) ( \
2789 ( c <= 'O' ) ? (c + 47) : \
2790 ( c <= '~' ) ? (c - 47) : \
2798 if (c2==0 || c2==X0201 || c2==ISO8859_1) {
2804 (*o_rot_conv)(c2,c1);
2811 if ((hira_f & 1) && c2==0x25 && 0x20<c1 && c1<0x74) {
2813 } else if ((hira_f & 2) && c2==0x24 && 0x20<c1 && c1<0x74) {
2816 (*o_hira_conv)(c2,c1);
2821 iso2022jp_check_conv(c2,c1)
2824 static int range[RANGE_NUM_MAX][2] = {
2847 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
2851 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
2856 for (i = 0; i < RANGE_NUM_MAX; i++) {
2857 start = range[i][0];
2860 if (c >= start && c <= end) {
2865 (*o_iso2022jp_check_conv)(c2,c1);
2869 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
2871 unsigned char *mime_pattern[] = {
2872 (unsigned char *)"\075?EUC-JP?B?",
2873 (unsigned char *)"\075?SHIFT_JIS?B?",
2874 (unsigned char *)"\075?ISO-8859-1?Q?",
2875 (unsigned char *)"\075?ISO-8859-1?B?",
2876 (unsigned char *)"\075?ISO-2022-JP?B?",
2877 (unsigned char *)"\075?ISO-2022-JP?Q?",
2878 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
2879 (unsigned char *)"\075?UTF-8?B?",
2884 int mime_encode[] = {
2885 JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201,
2886 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
2892 int mime_encode_method[] = {
2893 'B', 'B','Q', 'B', 'B', 'Q',
2894 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
2901 #define MAXRECOVER 20
2903 /* I don't trust portablity of toupper */
2904 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
2905 #define nkf_isdigit(c) ('0'<=c && c<='9')
2906 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
2911 if (i_getc!=mime_getc) {
2912 i_mgetc = i_getc; i_getc = mime_getc;
2913 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2914 if(mime_f==STRICT_MIME) {
2915 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
2916 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
2922 unswitch_mime_getc()
2924 if(mime_f==STRICT_MIME) {
2925 i_mgetc = i_mgetc_buf;
2926 i_mungetc = i_mungetc_buf;
2929 i_ungetc = i_mungetc;
2933 mime_begin_strict(f)
2938 unsigned char *p,*q;
2939 int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
2941 mime_decode_mode = FALSE;
2942 /* =? has been checked */
2944 p = mime_pattern[j];
2947 for(i=2;p[i]>' ';i++) { /* start at =? */
2948 if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) {
2949 /* pattern fails, try next one */
2951 while ((p = mime_pattern[++j])) {
2952 for(k=2;k<i;k++) /* assume length(p) > i */
2953 if (p[k]!=q[k]) break;
2954 if (k==i && nkf_toupper(c1)==p[k]) break;
2956 if (p) continue; /* found next one, continue */
2957 /* all fails, output from recovery buffer */
2965 mime_decode_mode = p[i-2];
2966 if (mime_decode_mode=='B') {
2967 mimebuf_f = unbuf_f;
2969 /* do MIME integrity check */
2970 return mime_integrity(f,mime_pattern[j]);
2982 /* we don't keep eof of Fifo, becase it contains ?= as
2983 a terminator. It was checked in mime_integrity. */
2984 return ((mimebuf_f)?
2985 (*i_mgetc_buf)(f):Fifo(mime_input++));
2989 mime_ungetc_buf(c,f)
2994 (*i_mungetc_buf)(c,f);
2996 Fifo(--mime_input)=c;
3007 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
3008 /* re-read and convert again from mime_buffer. */
3010 /* =? has been checked */
3012 Fifo(mime_last++)='='; Fifo(mime_last++)='?';
3013 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
3014 /* We accept any character type even if it is breaked by new lines */
3015 c1 = (*i_getc)(f); Fifo(mime_last++)= c1 ;
3016 if (c1=='\n'||c1==' '||c1=='\r'||
3017 c1=='-'||c1=='_'||is_alnum(c1) ) continue;
3019 /* Failed. But this could be another MIME preemble */
3027 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3028 if (!(++i<MAXRECOVER) || c1==EOF) break;
3029 if (c1=='b'||c1=='B') {
3030 mime_decode_mode = 'B';
3031 } else if (c1=='q'||c1=='Q') {
3032 mime_decode_mode = 'Q';
3036 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3037 if (!(++i<MAXRECOVER) || c1==EOF) break;
3039 mime_decode_mode = FALSE;
3045 if (!mime_decode_mode) {
3046 /* false MIME premble, restart from mime_buffer */
3047 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
3048 /* Since we are in MIME mode until buffer becomes empty, */
3049 /* we never go into mime_begin again for a while. */
3052 /* discard mime preemble, and goto MIME mode */
3054 /* do no MIME integrity check */
3055 return c1; /* used only for checking EOF */
3080 if (nkf_isdigit(x)) return x - '0';
3081 return nkf_toupper(x) - 'A' + 10;
3084 #ifdef ANSI_C_PROTOTYPE
3085 int hex_getc(int ch, FILE *f, int (*g)(FILE *f), int (*u)(int c, FILE *f))
3088 hex_getc(ch, f, g, u)
3101 if (!nkf_isxdigit(c2)){
3106 if (!nkf_isxdigit(c3)){
3111 return (hex2bin(c2) << 4) | hex2bin(c3);
3118 return hex_getc(':', f, i_cgetc, i_cungetc);
3126 return (*i_cungetc)(c, f);
3133 return hex_getc('%', f, i_ugetc, i_uungetc);
3141 return (*i_uungetc)(c, f);
3148 int (*g)() = i_ngetc;
3149 int (*u)() = i_nungetc;
3160 if (buf[i] == 'x' || buf[i] == 'X'){
3161 for (j = 0; j < 5; j++){
3163 if (!nkf_isxdigit(buf[i])){
3170 c |= hex2bin(buf[i]);
3173 for (j = 0; j < 6; j++){
3177 if (!nkf_isdigit(buf[i])){
3184 c += hex2bin(buf[i]);
3195 w16w_conv(c, &c2, &c1, &c0);
3196 if (iconv == w_iconv){
3203 if (w2e_conv(c2, c1, c0, &c2, &c1) == 0){
3206 if (iconv == s_iconv){
3207 e2s_conv(c2, c1, &c2, &c1);
3222 numchar_ungetc(c, f)
3226 return (*i_nungetc)(c, f);
3235 int c1, c2, c3, c4, cc;
3236 int t1, t2, t3, t4, mode, exit_mode;
3238 if (mime_top != mime_last) { /* Something is in FIFO */
3239 return Fifo(mime_top++);
3241 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
3242 mime_decode_mode=FALSE;
3243 unswitch_mime_getc();
3244 return (*i_getc)(f);
3247 if (mimebuf_f == FIXED_MIME)
3248 exit_mode = mime_decode_mode;
3251 if (mime_decode_mode == 'Q') {
3252 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3254 if (c1=='_') return ' ';
3255 if (c1!='=' && c1!='?') {
3259 mime_decode_mode = exit_mode; /* prepare for quit */
3260 if (c1<=' ') return c1;
3261 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
3262 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
3263 /* end Q encoding */
3264 input_mode = exit_mode;
3265 while((c1=(*i_getc)(f))!=EOF && c1==SPACE
3266 /* && (c1==NL||c1==TAB||c1=='\r') */ ) ;
3269 if (c1=='='&&c2<' ') { /* this is soft wrap */
3270 while((c1 = (*i_mgetc)(f)) <=' ') {
3271 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3273 mime_decode_mode = 'Q'; /* still in MIME */
3274 goto restart_mime_q;
3277 mime_decode_mode = 'Q'; /* still in MIME */
3281 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
3282 if (c2<=' ') return c2;
3283 mime_decode_mode = 'Q'; /* still in MIME */
3284 #define hex(c) (('0'<=c&&c<='9')?(c-'0'):\
3285 ('A'<=c&&c<='F')?(c-'A'+10):('a'<=c&&c<='f')?(c-'a'+10):0)
3286 return ((hex(c2)<<4) + hex(c3));
3289 if (mime_decode_mode != 'B') {
3290 mime_decode_mode = FALSE;
3291 return (*i_mgetc)(f);
3295 /* Base64 encoding */
3297 MIME allows line break in the middle of
3298 Base64, but we are very pessimistic in decoding
3299 in unbuf mode because MIME encoded code may broken by
3300 less or editor's control sequence (such as ESC-[-K in unbuffered
3301 mode. ignore incomplete MIME.
3303 mode = mime_decode_mode;
3304 mime_decode_mode = exit_mode; /* prepare for quit */
3306 while ((c1 = (*i_mgetc)(f))<=' ') {
3311 if ((c2 = (*i_mgetc)(f))<=' ') {
3314 if (mime_f != STRICT_MIME) goto mime_c2_retry;
3315 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3318 if ((c1 == '?') && (c2 == '=')) {
3320 while((c1=(*i_getc)(f))!=EOF && c1==SPACE
3321 /* && (c1==NL||c1==TAB||c1=='\r') */ ) ;
3325 if ((c3 = (*i_mgetc)(f))<=' ') {
3328 if (mime_f != STRICT_MIME) goto mime_c3_retry;
3329 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3333 if ((c4 = (*i_mgetc)(f))<=' ') {
3336 if (mime_f != STRICT_MIME) goto mime_c4_retry;
3337 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3341 mime_decode_mode = mode; /* still in MIME sigh... */
3343 /* BASE 64 decoding */
3345 t1 = 0x3f & base64decode(c1);
3346 t2 = 0x3f & base64decode(c2);
3347 t3 = 0x3f & base64decode(c3);
3348 t4 = 0x3f & base64decode(c4);
3349 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
3351 Fifo(mime_last++) = cc;
3352 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
3354 Fifo(mime_last++) = cc;
3355 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
3357 Fifo(mime_last++) = cc;
3362 return Fifo(mime_top++);
3370 Fifo(--mime_top) = c;
3381 /* In buffered mode, read until =? or NL or buffer full
3383 mime_input = mime_top;
3384 mime_last = mime_top;
3385 while(*p) Fifo(mime_input++) = *p++;
3388 while((c=(*i_getc)(f))!=EOF) {
3389 if (((mime_input-mime_top)&MIME_BUF_MASK)==0) {
3390 break; /* buffer full */
3392 if (c=='=' && d=='?') {
3393 /* checked. skip header, start decode */
3394 Fifo(mime_input++) = c;
3395 /* mime_last_input = mime_input; */
3400 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
3402 /* Should we check length mod 4? */
3403 Fifo(mime_input++) = c;
3406 /* In case of Incomplete MIME, no MIME decode */
3407 Fifo(mime_input++) = c;
3408 mime_last = mime_input; /* point undecoded buffer */
3409 mime_decode_mode = 1; /* no decode on Fifo last in mime_getc */
3410 switch_mime_getc(); /* anyway we need buffered getc */
3421 i = c - 'A'; /* A..Z 0-25 */
3423 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
3425 } else if (c > '/') {
3426 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
3427 } else if (c == '+') {
3428 i = '>' /* 62 */ ; /* + 62 */
3430 i = '?' /* 63 */ ; /* / 63 */
3435 static char basis_64[] =
3436 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
3446 p = mime_pattern[0];
3447 for(i=0;mime_encode[i];i++) {
3448 if (mode == mime_encode[i]) {
3449 p = mime_pattern[i];
3453 mimeout_mode = mime_encode_method[i];
3455 /* (*o_mputc)(' '); */
3472 #define itoh4(c) (c>=10?c+'A'-10:c+'0')
3478 if (mimeout_f==FIXED_MIME) {
3479 if (base64_count>71) {
3487 if ( c<=DEL &&(output_mode==ASCII ||output_mode == ISO8859_1 )
3488 && mimeout_f!=FIXED_MIME) {
3489 if (mimeout_mode=='Q') {
3496 if (mimeout_mode!='B' || c!=SPACE) {
3505 } else if (!mimeout_mode && mimeout_f!=FIXED_MIME) {
3506 open_mime(output_mode);
3508 } else { /* c==EOF */
3509 switch(mimeout_mode) {
3514 (*o_mputc)(basis_64[((b64c & 0x3)<< 4)]);
3520 (*o_mputc)(basis_64[((b64c & 0xF) << 2)]);
3526 if (mimeout_f!=FIXED_MIME) {
3528 } else if (mimeout_mode != 'Q')
3533 switch(mimeout_mode) {
3537 (*o_mputc)(itoh4(((c>>4)&0xf)));
3538 (*o_mputc)(itoh4((c&0xf)));
3545 (*o_mputc)(basis_64[c>>2]);
3550 (*o_mputc)(basis_64[((b64c & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
3556 (*o_mputc)(basis_64[((b64c & 0xF) << 2) | ((c & 0xC0) >>6)]);
3557 (*o_mputc)(basis_64[c & 0x3F]);
3577 mime_f = STRICT_MIME;
3581 #if defined(MSDOS) || defined(__OS2__)
3586 iso2022jp_f = FALSE;
3588 kanji_intro = DEFAULT_J;
3589 ascii_intro = DEFAULT_R;
3591 output_conv = DEFAULT_CONV;
3592 oconv = DEFAULT_CONV;
3595 i_mungetc = std_ungetc;
3596 i_mgetc_buf = std_getc;
3597 i_mungetc_buf = std_ungetc;
3600 i_ungetc=std_ungetc;
3603 i_bungetc= std_ungetc;
3607 o_crconv = no_connection;
3608 o_rot_conv = no_connection;
3609 o_iso2022jp_check_conv = no_connection;
3610 o_hira_conv = no_connection;
3611 o_fconv = no_connection;
3612 o_zconv = no_connection;
3615 i_ungetc = std_ungetc;
3617 i_mungetc = std_ungetc;
3619 output_mode = ASCII;
3622 mime_decode_mode = FALSE;
3631 struct input_code *p = input_code_list;
3636 #ifdef UTF8_OUTPUT_ENABLE
3637 if (w_oconv16_begin_f) {
3638 w_oconv16_begin_f = 2;
3643 fold_preserve_f = FALSE;
3646 fold_margin = FOLD_MARGIN;
3649 z_prev2=0,z_prev1=0;
3655 no_connection(c2,c1)
3658 no_connection2(c2,c1,0);
3662 no_connection2(c2,c1,c0)
3665 fprintf(stderr,"nkf internal module connection failure.\n");
3673 fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
3674 fprintf(stderr,"Flags:\n");
3675 fprintf(stderr,"b,u Output is bufferred (DEFAULT),Output is unbufferred\n");
3676 #ifdef DEFAULT_CODE_SJIS
3677 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC), UTF-8\n");
3679 #ifdef DEFAULT_CODE_JIS
3680 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC), UTF-8\n");
3682 #ifdef DEFAULT_CODE_EUC
3683 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT), UTF-8\n");
3685 #ifdef DEFAULT_CODE_UTF8
3686 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC), UTF-8 (DEFAULT)\n");
3688 fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC), UTF-8\n");
3689 fprintf(stderr,"t no conversion\n");
3690 fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n");
3691 fprintf(stderr,"r {de/en}crypt ROT13/47\n");
3692 fprintf(stderr,"h 1 hirakana->katakana, 2 katakana->hirakana,3 both\n");
3693 fprintf(stderr,"v Show this usage. V: show version\n");
3694 fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
3695 fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
3696 fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
3697 fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
3698 fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces,\n");
3699 fprintf(stderr," 3: Convert HTML Entity\n");
3700 fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
3701 fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
3703 fprintf(stderr,"T Text mode output\n");
3705 fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
3706 fprintf(stderr,"d,c Delete \\r in line feed and \\032, Add \\r in line feed\n");
3707 fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n");
3708 fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
3709 fprintf(stderr,"long name options\n");
3710 fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n");
3711 fprintf(stderr," --jis,--euc,--sjis,--utf8,--utf16,--mime,--base64 convert for the code\n");
3712 fprintf(stderr," --help,--version\n");
3719 fprintf(stderr,"Network Kanji Filter Version %s (%s) "
3720 #if defined(MSDOS) && !defined(__WIN32__) && !defined(__WIN16__)
3723 #if defined(MSDOS) && defined(__WIN16__)
3726 #if defined(MSDOS) && defined(__WIN32__)
3732 ,Version,Patchlevel);
3733 fprintf(stderr,"\n%s\n",CopyRight);
3738 **
\e$B%Q%C%A@):n<T
\e(B
3739 ** void@merope.pleiades.or.jp (Kusakabe Youichi)
3740 ** NIDE Naoyuki <nide@ics.nara-wu.ac.jp>
3741 ** ohta@src.ricoh.co.jp (Junn Ohta)
3742 ** inouet@strl.nhk.or.jp (Tomoyuki Inoue)
3743 ** kiri@pulser.win.or.jp (Tetsuaki Kiriyama)
3744 ** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp>
3745 ** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe)
3746 ** kono@ie.u-ryukyu.ac.jp (Shinji Kono)
3747 ** GHG00637@nifty-serve.or.jp (COW)