1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
5 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
6 ** Copyright (C) 1996,1998
8 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
9 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
10 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
11 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
13 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
14 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
15 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
16 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
17 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
18 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
20 ** Everyone is permitted to do anything on this program
21 ** including copying, modifying, improving,
22 ** as long as you don't try to pretend that you wrote it.
23 ** i.e., the above copyright notice has to appear in all copies.
24 ** Binary distribution requires original version messages.
25 ** You don't have to ask before copying, redistribution or publishing.
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
29 /***********************************************************************
30 ** UTF-8
\e$B%5%]!<%H$K$D$$$F
\e(B
31 **
\e$B=>Mh$N
\e(B nkf
\e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9
\e(B
32 ** nkf -e
\e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G
\e(B UTF-8
\e$B$HH=Dj$5$l$l$P!"
\e(B
33 **
\e$B$=$N$^$^
\e(B euc-jp
\e$B$KJQ49$5$l$^$9
\e(B
35 **
\e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#
\e(B
36 ** (
\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O
\e(B)
38 **
\e$B2?$+LdBj$r8+$D$1$?$i!"
\e(B
39 ** E-Mail: furukawa@tcp-ip.or.jp
40 **
\e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#
\e(B
41 ***********************************************************************/
44 static char *CopyRight =
45 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2004 Kono, Furukawa";
46 static char *Version =
48 static char *Patchlevel =
55 ** USAGE: nkf [flags] [file]
58 ** b Output is buffered (DEFAULT)
59 ** u Output is unbuffered
63 ** j Outout code is JIS 7 bit (DEFAULT SELECT)
64 ** s Output code is MS Kanji (DEFAULT SELECT)
65 ** e Output code is AT&T JIS (DEFAULT SELECT)
66 ** w Output code is AT&T JIS (DEFAULT SELECT)
67 ** l Output code is JIS 7bit and ISO8859-1 Latin-1
69 ** m MIME conversion for ISO-2022-JP
70 ** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
71 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
72 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
73 ** M MIME output conversion
75 ** r {de/en}crypt ROT13/47
79 ** T Text mode output (for MS-DOS)
81 ** x Do not convert X0201 kana into X0208
82 ** Z Convert X0208 alphabet to ASCII
87 ** B try to fix broken JIS, missing Escape
88 ** B[1-9] broken level
90 ** O Output to 'nkf.out' file or last file name
91 ** d Delete \r in line feed
92 ** c Add \r in line feed
93 ** -- other long option
94 ** -- ignore following option (don't use with -O )
98 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__)) && !defined(MSDOS)
100 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
113 #if defined(MSDOS) || defined(__OS2__)
120 #define setbinmode(fp) fsetbin(fp)
121 #else /* Microsoft C, Turbo C */
122 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
124 #else /* UNIX,OS/2 */
125 #define setbinmode(fp)
128 #ifdef _IOFBF /* SysV and MSDOS, Windows */
129 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
131 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
134 /*Borland C++ 4.5 EasyWin*/
135 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
144 /* added by satoru@isoternet.org */
147 #include <sys/stat.h>
148 #ifndef MSDOS /* UNIX, OS/2 */
152 #if defined(_MSC_VER) || defined(__MINGW32__) /* VC++, MinGW */
153 #include <sys/utime.h>
154 #elif defined(__TURBOC__) /* BCC */
156 #elif defined(LSI_C) /* LSI C */
168 /* state of output_mode and input_mode
185 /* Input Assumption */
189 #define LATIN1_INPUT 6
191 #define STRICT_MIME 8
196 #define JAPANESE_EUC 10
200 #define UTF8_INPUT 13
201 #define UTF16LE_INPUT 14
202 #define UTF16BE_INPUT 15
220 #define is_alnum(c) \
221 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
223 #define HOLD_SIZE 1024
224 #define IOBUF_SIZE 16384
226 #define DEFAULT_J 'B'
227 #define DEFAULT_R 'B'
229 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
230 #define SJ6394 0x0161 /* 63 - 94 ku offset */
232 #define RANGE_NUM_MAX 18
237 #if defined( UTF8_OUTPUT_ENABLE ) || defined( UTF8_INPUT_ENABLE )
238 #define sizeof_euc_utf8 94
239 #define sizeof_euc_to_utf8_1byte 94
240 #define sizeof_euc_to_utf8_2bytes 94
241 #define sizeof_utf8_to_euc_C2 64
242 #define sizeof_utf8_to_euc_E5B8 64
243 #define sizeof_utf8_to_euc_2bytes 112
244 #define sizeof_utf8_to_euc_3bytes 112
247 /* MIME preprocessor */
250 #ifdef EASYWIN /*Easy Win */
251 extern POINT _BufferSize;
254 /* function prototype */
256 #ifdef ANSI_C_PROTOTYPE
258 #define STATIC static
270 void (*status_func)PROTO((struct input_code *, int));
271 int (*iconv_func)PROTO((int c2, int c1, int c0));
275 STATIC char *input_codename = "";
277 STATIC int noconvert PROTO((FILE *f));
278 STATIC int kanji_convert PROTO((FILE *f));
279 STATIC int h_conv PROTO((FILE *f,int c2,int c1));
280 STATIC int push_hold_buf PROTO((int c2));
281 STATIC void set_iconv PROTO((int f, int (*iconv_func)()));
282 STATIC int s_iconv PROTO((int c2,int c1,int c0));
283 STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
284 STATIC int e_iconv PROTO((int c2,int c1,int c0));
285 #ifdef UTF8_INPUT_ENABLE
286 STATIC int w2e_conv PROTO((int c2,int c1,int c0,int *p2,int *p1));
287 STATIC int w_iconv PROTO((int c2,int c1,int c0));
288 STATIC int w_iconv16 PROTO((int c2,int c1,int c0));
289 STATIC int w_iconv_common PROTO((int c1,int c0,unsigned short **pp,int psize,int *p2,int *p1));
290 STATIC int ww16_conv PROTO((int c2, int c1, int c0));
292 #ifdef UTF8_OUTPUT_ENABLE
293 STATIC int e2w_conv PROTO((int c2,int c1));
294 STATIC void w_oconv PROTO((int c2,int c1));
295 STATIC void w_oconv16 PROTO((int c2,int c1));
297 STATIC void e_oconv PROTO((int c2,int c1));
298 STATIC void e2s_conv PROTO((int c2, int c1, int *p2, int *p1));
299 STATIC void s_oconv PROTO((int c2,int c1));
300 STATIC void j_oconv PROTO((int c2,int c1));
301 STATIC void fold_conv PROTO((int c2,int c1));
302 STATIC void cr_conv PROTO((int c2,int c1));
303 STATIC void z_conv PROTO((int c2,int c1));
304 STATIC void rot_conv PROTO((int c2,int c1));
305 STATIC void hira_conv PROTO((int c2,int c1));
306 STATIC void base64_conv PROTO((int c2,int c1));
307 STATIC void iso2022jp_check_conv PROTO((int c2,int c1));
308 STATIC void no_connection PROTO((int c2,int c1));
309 STATIC int no_connection2 PROTO((int c2,int c1,int c0));
311 STATIC void code_score PROTO((struct input_code *ptr));
312 STATIC void code_status PROTO((int c));
314 STATIC void std_putc PROTO((int c));
315 STATIC int std_getc PROTO((FILE *f));
316 STATIC int std_ungetc PROTO((int c,FILE *f));
318 STATIC int broken_getc PROTO((FILE *f));
319 STATIC int broken_ungetc PROTO((int c,FILE *f));
321 STATIC int mime_begin PROTO((FILE *f));
322 STATIC int mime_getc PROTO((FILE *f));
323 STATIC int mime_ungetc PROTO((int c,FILE *f));
325 STATIC int mime_begin_strict PROTO((FILE *f));
326 STATIC int mime_getc_buf PROTO((FILE *f));
327 STATIC int mime_ungetc_buf PROTO((int c,FILE *f));
328 STATIC int mime_integrity PROTO((FILE *f,unsigned char *p));
330 STATIC int base64decode PROTO((int c));
331 STATIC void mime_putc PROTO((int c));
332 STATIC void open_mime PROTO((int c));
333 STATIC void close_mime PROTO(());
334 STATIC void usage PROTO(());
335 STATIC void version PROTO(());
336 STATIC void options PROTO((unsigned char *c));
338 STATIC void reinit PROTO(());
343 static unsigned char stdibuf[IOBUF_SIZE];
344 static unsigned char stdobuf[IOBUF_SIZE];
345 static unsigned char hold_buf[HOLD_SIZE*2];
346 static int hold_count;
348 /* MIME preprocessor fifo */
350 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
351 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
352 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
353 static unsigned char mime_buf[MIME_BUF_SIZE];
354 static unsigned int mime_top = 0;
355 static unsigned int mime_last = 0; /* decoded */
356 static unsigned int mime_input = 0; /* undecoded */
359 static int unbuf_f = FALSE;
360 static int estab_f = FALSE;
361 static int nop_f = FALSE;
362 static int binmode_f = TRUE; /* binary mode */
363 static int rot_f = FALSE; /* rot14/43 mode */
364 static int hira_f = FALSE; /* hira/kata henkan */
365 static int input_f = FALSE; /* non fixed input code */
366 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
367 static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
368 static int mimebuf_f = FALSE; /* MIME buffered input */
369 static int broken_f = FALSE; /* convert ESC-less broken JIS */
370 static int iso8859_f = FALSE; /* ISO8859 through */
371 static int mimeout_f = FALSE; /* base64 mode */
372 #if defined(MSDOS) || defined(__OS2__)
373 static int x0201_f = TRUE; /* Assume JISX0201 kana */
375 static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
377 static int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
378 #ifdef UTF8_OUTPUT_ENABLE
379 static int unicode_bom_f= 0; /* Output Unicode BOM */
380 static int w_oconv16_LE = 0; /* utf-16 little endian */
381 static int ms_ucs_map_f = FALSE; /* Microsoft UCS Mapping Compatible */
385 #ifdef NUMCHAR_OPTION
387 #define CLASS_MASK 0x0f000000
388 #define CLASS_UTF16 0x01000000
392 static int cap_f = FALSE;
393 static int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */
394 static int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc;
395 STATIC int cap_getc PROTO((FILE *f));
396 STATIC int cap_ungetc PROTO((int c,FILE *f));
398 static int url_f = FALSE;
399 static int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
400 static int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc;
401 STATIC int url_getc PROTO((FILE *f));
402 STATIC int url_ungetc PROTO((int c,FILE *f));
404 static int numchar_f = FALSE;
405 static int (*i_ngetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
406 static int (*i_nungetc)PROTO((int c ,FILE *f)) = std_ungetc;
407 STATIC int numchar_getc PROTO((FILE *f));
408 STATIC int numchar_ungetc PROTO((int c,FILE *f));
412 static int noout_f = FALSE;
413 STATIC void no_putc PROTO((int c));
414 static int debug_f = FALSE;
415 STATIC void debug PROTO((char *str));
418 static int guess_f = FALSE;
419 STATIC void print_guessed_code PROTO((char *filename));
420 STATIC void set_input_codename PROTO((char *codename));
421 static int is_inputcode_mixed = FALSE;
422 static int is_inputcode_set = FALSE;
425 static int exec_f = 0;
428 #ifdef SHIFTJIS_CP932
429 STATIC int cp932_f = TRUE;
430 #define CP932_TABLE_BEGIN (0xfa)
431 #define CP932_TABLE_END (0xfc)
433 STATIC int cp932inv_f = FALSE;
434 #define CP932INV_TABLE_BEGIN (0xed)
435 #define CP932INV_TABLE_END (0xee)
437 #endif /* SHIFTJIS_CP932 */
439 STATIC unsigned char prefix_table[256];
441 STATIC void e_status PROTO((struct input_code *, int));
442 STATIC void s_status PROTO((struct input_code *, int));
444 #ifdef UTF8_INPUT_ENABLE
445 STATIC void w_status PROTO((struct input_code *, int));
446 STATIC void w16_status PROTO((struct input_code *, int));
447 static int utf16_mode = UTF16LE_INPUT;
450 struct input_code input_code_list[] = {
451 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
452 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
453 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
454 {"UTF-16", 0, 0, 0, {0, 0, 0}, w16_status, w_iconv16, 0},
458 static int mimeout_mode = 0;
459 static int base64_count = 0;
461 /* X0208 -> ASCII converter */
464 static int f_line = 0; /* chars in line */
465 static int f_prev = 0;
466 static int fold_preserve_f = FALSE; /* preserve new lines */
467 static int fold_f = FALSE;
468 static int fold_len = 0;
471 static unsigned char kanji_intro = DEFAULT_J,
472 ascii_intro = DEFAULT_R;
476 #define FOLD_MARGIN 10
477 #define DEFAULT_FOLD 60
479 static int fold_margin = FOLD_MARGIN;
483 #ifdef DEFAULT_CODE_JIS
484 # define DEFAULT_CONV j_oconv
486 #ifdef DEFAULT_CODE_SJIS
487 # define DEFAULT_CONV s_oconv
489 #ifdef DEFAULT_CODE_EUC
490 # define DEFAULT_CONV e_oconv
492 #ifdef DEFAULT_CODE_UTF8
493 # define DEFAULT_CONV w_oconv
496 /* process default */
497 static void (*output_conv)PROTO((int c2,int c1)) = DEFAULT_CONV;
499 static void (*oconv)PROTO((int c2,int c1)) = no_connection;
500 /* s_iconv or oconv */
501 static int (*iconv)PROTO((int c2,int c1,int c0)) = no_connection2;
503 static void (*o_zconv)PROTO((int c2,int c1)) = no_connection;
504 static void (*o_fconv)PROTO((int c2,int c1)) = no_connection;
505 static void (*o_crconv)PROTO((int c2,int c1)) = no_connection;
506 static void (*o_rot_conv)PROTO((int c2,int c1)) = no_connection;
507 static void (*o_hira_conv)PROTO((int c2,int c1)) = no_connection;
508 static void (*o_base64conv)PROTO((int c2,int c1)) = no_connection;
509 static void (*o_iso2022jp_check_conv)PROTO((int c2,int c1)) = no_connection;
511 /* static redirections */
513 static void (*o_putc)PROTO((int c)) = std_putc;
515 static int (*i_getc)PROTO((FILE *f)) = std_getc; /* general input */
516 static int (*i_ungetc)PROTO((int c,FILE *f)) =std_ungetc;
518 static int (*i_bgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
519 static int (*i_bungetc)PROTO((int c ,FILE *f)) = std_ungetc;
521 static void (*o_mputc)PROTO((int c)) = std_putc ; /* output of mputc */
523 static int (*i_mgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
524 static int (*i_mungetc)PROTO((int c ,FILE *f)) = std_ungetc;
526 /* for strict mime */
527 static int (*i_mgetc_buf)PROTO((FILE *)) = std_getc; /* input of mgetc_buf */
528 static int (*i_mungetc_buf)PROTO((int c,FILE *f)) = std_ungetc;
531 static int output_mode = ASCII, /* output kanji mode */
532 input_mode = ASCII, /* input kanji mode */
533 shift_mode = FALSE; /* TRUE shift out, or X0201 */
534 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
536 /* X0201 / X0208 conversion tables */
538 /* X0201 kana conversion table */
541 unsigned char cv[]= {
542 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
543 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
544 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
545 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
546 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
547 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
548 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
549 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
550 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
551 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
552 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
553 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
554 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
555 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
556 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
557 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
561 /* X0201 kana conversion table for daguten */
564 unsigned char dv[]= {
565 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
566 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
567 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
568 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
569 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
570 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
571 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
572 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
573 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
574 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
575 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
576 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
577 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
578 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
579 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
580 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
583 /* X0201 kana conversion table for han-daguten */
586 unsigned char ev[]= {
587 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
588 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
589 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
590 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
591 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
592 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
593 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
594 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
595 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
596 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
597 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
598 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
601 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
602 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
606 /* X0208 kigou conversion table */
607 /* 0x8140 - 0x819e */
609 unsigned char fv[] = {
611 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
612 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
613 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
614 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
615 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
616 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
617 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
618 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
619 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
620 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
621 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
622 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
628 static int file_out = FALSE;
630 static int overwrite = FALSE;
633 static int crmode_f = 0; /* CR, NL, CRLF */
634 #ifdef EASYWIN /*Easy Win */
635 static int end_check;
650 #ifdef EASYWIN /*Easy Win */
651 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
654 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
655 cp = (unsigned char *)*argv;
660 if (pipe(fds) < 0 || (pid = fork()) < 0){
671 execvp(argv[1], &argv[1]);
685 if(x0201_f == WISH_TRUE)
686 x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
688 if (binmode_f == TRUE)
690 if (freopen("","wb",stdout) == NULL)
697 setbuf(stdout, (char *) NULL);
699 setvbuffer(stdout, stdobuf, IOBUF_SIZE);
702 if (binmode_f == TRUE)
704 if (freopen("","rb",stdin) == NULL) return (-1);
708 setvbuffer(stdin, stdibuf, IOBUF_SIZE);
712 kanji_convert(stdin);
713 if (guess_f) print_guessed_code(NULL);
718 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
727 /* reopen file for stdout */
728 if (file_out == TRUE) {
731 outfname = malloc(strlen(origfname)
732 + strlen(".nkftmpXXXXXX")
738 strcpy(outfname, origfname);
742 for (i = strlen(outfname); i; --i){
743 if (outfname[i - 1] == '/'
744 || outfname[i - 1] == '\\'){
750 strcat(outfname, "ntXXXXXX");
752 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC,
755 strcat(outfname, ".nkftmpXXXXXX");
756 fd = mkstemp(outfname);
759 || (fd_backup = dup(fileno(stdout))) < 0
760 || dup2(fd, fileno(stdout)) < 0
771 outfname = "nkf.out";
774 if(freopen(outfname, "w", stdout) == NULL) {
778 if (binmode_f == TRUE) {
780 if (freopen("","wb",stdout) == NULL)
787 if (binmode_f == TRUE)
789 if (freopen("","rb",fin) == NULL)
794 setvbuffer(fin, stdibuf, IOBUF_SIZE);
798 char *filename = NULL;
800 if (nfiles > 1) filename = origfname;
801 if (guess_f) print_guessed_code(filename);
807 #if defined(MSDOS) && !defined(__MINGW32__)
815 if (dup2(fd_backup, fileno(stdout)) < 0){
818 if (stat(origfname, &sb)) {
819 fprintf(stderr, "Can't stat %s\n", origfname);
821 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
822 if (chmod(outfname, sb.st_mode)) {
823 fprintf(stderr, "Can't set permission %s\n", outfname);
826 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
827 #if defined(MSDOS) && !defined(__MINGW32__)
828 tb[0] = tb[1] = sb.st_mtime;
829 if (utime(outfname, tb)) {
830 fprintf(stderr, "Can't set timestamp %s\n", outfname);
833 tb.actime = sb.st_atime;
834 tb.modtime = sb.st_mtime;
835 if (utime(outfname, &tb)) {
836 fprintf(stderr, "Can't set timestamp %s\n", outfname);
840 if (unlink(origfname)){
844 if (rename(outfname, origfname)) {
846 fprintf(stderr, "Can't rename %s to %s\n",
847 outfname, origfname);
855 #ifdef EASYWIN /*Easy Win */
856 if (file_out == FALSE)
857 scanf("%d",&end_check);
860 #else /* for Other OS */
861 if (file_out == TRUE)
891 {"katakana-hiragana","h3"},
893 #ifdef UTF8_OUTPUT_ENABLE
898 #ifdef UTF8_INPUT_ENABLE
900 {"utf16-input", "W16"},
909 #ifdef NUMCHAR_OPTION
910 {"numchar-input", ""},
916 #ifdef SHIFTJIS_CP932
927 static int option_mode;
942 case '-': /* literal options */
943 if (!*cp) { /* ignore the rest of arguments */
947 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
949 p = (unsigned char *)long_option[i].name;
950 for (j=0;*p && (*p != '=') && *p == cp[j];p++, j++);
958 cp = (unsigned char *)long_option[i].alias;
961 if (strcmp(long_option[i].name, "overwrite") == 0){
968 if (strcmp(long_option[i].name, "cap-input") == 0){
972 if (strcmp(long_option[i].name, "url-input") == 0){
977 #ifdef NUMCHAR_OPTION
978 if (strcmp(long_option[i].name, "numchar-input") == 0){
984 if (strcmp(long_option[i].name, "no-output") == 0){
988 if (strcmp(long_option[i].name, "debug") == 0){
993 #ifdef SHIFTJIS_CP932
994 if (strcmp(long_option[i].name, "no-cp932") == 0){
998 if (strcmp(long_option[i].name, "cp932inv") == 0){
1004 if (strcmp(long_option[i].name, "exec-in") == 0){
1008 if (strcmp(long_option[i].name, "exec-out") == 0){
1013 #ifdef UTF8_OUTPUT_ENABLE
1014 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1015 ms_ucs_map_f = TRUE;
1019 if (strcmp(long_option[i].name, "prefix=") == 0){
1020 if (*p == '=' && ' ' < p[1] && p[1] < 128){
1021 for (i = 2; ' ' < p[i] && p[i] < 128; i++){
1022 prefix_table[p[i]] = p[1];
1029 case 'b': /* buffered mode */
1032 case 'u': /* non bufferd mode */
1035 case 't': /* transparent mode */
1038 case 'j': /* JIS output */
1040 output_conv = j_oconv;
1042 case 'e': /* AT&T EUC output */
1043 output_conv = e_oconv;
1045 case 's': /* SJIS output */
1046 output_conv = s_oconv;
1048 case 'l': /* ISO8859 Latin-1 support, no conversion */
1049 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
1050 input_f = LATIN1_INPUT;
1052 case 'i': /* Kanji IN ESC-$-@/B */
1053 if (*cp=='@'||*cp=='B')
1054 kanji_intro = *cp++;
1056 case 'o': /* ASCII IN ESC-(-J/B */
1057 if (*cp=='J'||*cp=='B'||*cp=='H')
1058 ascii_intro = *cp++;
1065 if ('9'>= *cp && *cp>='0')
1066 hira_f |= (*cp++ -'0');
1073 #if defined(MSDOS) || defined(__OS2__)
1088 #ifdef UTF8_OUTPUT_ENABLE
1089 case 'w': /* UTF-8 output */
1090 if ('1'== cp[0] && '6'==cp[1]) {
1091 output_conv = w_oconv16; cp+=2;
1093 unicode_bom_f=2; cp++;
1096 unicode_bom_f=1; cp++;
1098 } else if (cp[0] == 'B') {
1099 unicode_bom_f=2; cp++;
1101 unicode_bom_f=1; cp++;
1104 } else if (cp[0] == '8') {
1105 output_conv = w_oconv; cp++;
1108 unicode_bom_f=1; cp++;
1111 output_conv = w_oconv;
1114 #ifdef UTF8_INPUT_ENABLE
1115 case 'W': /* UTF-8 input */
1116 if ('1'== cp[0] && '6'==cp[1]) {
1117 input_f = UTF16LE_INPUT;
1120 } else if (cp[0] == 'B') {
1122 input_f = UTF16BE_INPUT;
1124 } else if (cp[0] == '8') {
1126 input_f = UTF8_INPUT;
1128 input_f = UTF8_INPUT;
1131 /* Input code assumption */
1132 case 'J': /* JIS input */
1133 case 'E': /* AT&T EUC input */
1134 input_f = JIS_INPUT;
1136 case 'S': /* MS Kanji input */
1137 input_f = SJIS_INPUT;
1138 if (x0201_f==NO_X0201) x0201_f=TRUE;
1140 case 'Z': /* Convert X0208 alphabet to asii */
1141 /* bit:0 Convert X0208
1142 bit:1 Convert Kankaku to one space
1143 bit:2 Convert Kankaku to two spaces
1144 bit:3 Convert HTML Entity
1146 if ('9'>= *cp && *cp>='0')
1147 alpha_f |= 1<<(*cp++ -'0');
1151 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
1152 x0201_f = FALSE; /* No X0201->X0208 conversion */
1154 ESC-(-I in JIS, EUC, MS Kanji
1155 SI/SO in JIS, EUC, MS Kanji
1156 SSO in EUC, JIS, not in MS Kanji
1157 MS Kanji (0xa0-0xdf)
1159 ESC-(-I in JIS (0x20-0x5f)
1160 SSO in EUC (0xa0-0xdf)
1161 0xa0-0xd in MS Kanji (0xa0-0xdf)
1164 case 'X': /* Assume X0201 kana */
1165 /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1168 case 'F': /* prserve new lines */
1169 fold_preserve_f = TRUE;
1170 case 'f': /* folding -f60 or -f */
1173 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1175 fold_len += *cp++ - '0';
1177 if (!(0<fold_len && fold_len<BUFSIZ))
1178 fold_len = DEFAULT_FOLD;
1182 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1184 fold_margin += *cp++ - '0';
1188 case 'm': /* MIME support */
1189 if (*cp=='B'||*cp=='Q') {
1190 mime_decode_mode = *cp++;
1191 mimebuf_f = FIXED_MIME;
1192 } else if (*cp=='N') {
1193 mime_f = TRUE; cp++;
1194 } else if (*cp=='S') {
1195 mime_f = STRICT_MIME; cp++;
1196 } else if (*cp=='0') {
1197 mime_f = FALSE; cp++;
1200 case 'M': /* MIME output */
1203 mimeout_f = FIXED_MIME; cp++;
1204 } else if (*cp=='Q') {
1206 mimeout_f = FIXED_MIME; cp++;
1211 case 'B': /* Broken JIS support */
1213 bit:1 allow any x on ESC-(-x or ESC-$-x
1214 bit:2 reset to ascii on NL
1216 if ('9'>= *cp && *cp>='0')
1217 broken_f |= 1<<(*cp++ -'0');
1222 case 'O':/* for Output file */
1226 case 'c':/* add cr code */
1229 case 'd':/* delete cr code */
1232 case 'I': /* ISO-2022-JP output */
1235 case 'L': /* line mode */
1236 if (*cp=='u') { /* unix */
1237 crmode_f = NL; cp++;
1238 } else if (*cp=='m') { /* mac */
1239 crmode_f = CR; cp++;
1240 } else if (*cp=='w') { /* windows */
1241 crmode_f = CRLF; cp++;
1242 } else if (*cp=='0') { /* no conversion */
1252 /* module muliple options in a string are allowed for Perl moudle */
1253 while(*cp && *cp!='-') cp++;
1257 /* bogus option but ignored */
1263 #ifdef ANSI_C_PROTOTYPE
1264 struct input_code * find_inputcode_byfunc(int (*iconv_func)(int c2,int c1,int c0))
1266 struct input_code * find_inputcode_byfunc(iconv_func)
1267 int (*iconv_func)();
1271 struct input_code *p = input_code_list;
1273 if (iconv_func == p->iconv_func){
1282 #ifdef ANSI_C_PROTOTYPE
1283 void set_iconv(int f, int (*iconv_func)(int c2,int c1,int c0))
1285 void set_iconv(f, iconv_func)
1287 int (*iconv_func)();
1291 static int (*iconv_for_check)() = 0;
1293 #ifdef INPUT_CODE_FIX
1301 #ifdef INPUT_CODE_FIX
1302 && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1308 if (estab_f && iconv_for_check != iconv){
1309 struct input_code *p = find_inputcode_byfunc(iconv);
1311 set_input_codename(p->name);
1312 debug(input_codename);
1314 iconv_for_check = iconv;
1319 #define SCORE_L2 (1) /*
\e$BBh
\e(B2
\e$B?e=`4A;z
\e(B */
1320 #define SCORE_KANA (SCORE_L2 << 1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
1321 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
1322 #ifdef SHIFTJIS_CP932
1323 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932
\e$B$K$h$kFI$_49$(
\e(B */
1324 #define SCORE_NO_EXIST (SCORE_CP932 << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1326 #define SCORE_NO_EXIST (SCORE_DEPEND << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1328 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME
\e$B$K$h$k;XDj
\e(B */
1329 #define SCORE_ERROR (SCORE_iMIME << 1) /*
\e$B%(%i!<
\e(B */
1331 #define SCORE_INIT (SCORE_iMIME)
1333 int score_table_A0[] = {
1336 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1337 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1340 int score_table_F0[] = {
1341 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1342 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1343 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1344 SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1347 void set_code_score(ptr, score)
1348 struct input_code *ptr;
1352 ptr->score |= score;
1356 void clr_code_score(ptr, score)
1357 struct input_code *ptr;
1361 ptr->score &= ~score;
1365 void code_score(ptr)
1366 struct input_code *ptr;
1368 int c2 = ptr->buf[0];
1369 int c1 = ptr->buf[1];
1371 set_code_score(ptr, SCORE_ERROR);
1372 }else if (c2 == SSO){
1373 set_code_score(ptr, SCORE_KANA);
1374 #ifdef UTF8_OUTPUT_ENABLE
1375 }else if (!e2w_conv(c2, c1)){
1376 set_code_score(ptr, SCORE_NO_EXIST);
1378 }else if ((c2 & 0x70) == 0x20){
1379 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1380 }else if ((c2 & 0x70) == 0x70){
1381 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1382 }else if ((c2 & 0x70) >= 0x50){
1383 set_code_score(ptr, SCORE_L2);
1387 void status_disable(ptr)
1388 struct input_code *ptr;
1393 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1396 void status_push_ch(ptr, c)
1397 struct input_code *ptr;
1400 ptr->buf[ptr->index++] = c;
1403 void status_clear(ptr)
1404 struct input_code *ptr;
1410 void status_reset(ptr)
1411 struct input_code *ptr;
1414 ptr->score = SCORE_INIT;
1417 void status_reinit(ptr)
1418 struct input_code *ptr;
1421 ptr->_file_stat = 0;
1424 void status_check(ptr, c)
1425 struct input_code *ptr;
1428 if (c <= DEL && estab_f){
1433 void s_status(ptr, c)
1434 struct input_code *ptr;
1439 status_check(ptr, c);
1444 #ifdef NUMCHAR_OPTION
1445 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1448 }else if (0xa1 <= c && c <= 0xdf){
1449 status_push_ch(ptr, SSO);
1450 status_push_ch(ptr, c);
1453 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
1455 status_push_ch(ptr, c);
1456 #ifdef SHIFTJIS_CP932
1458 && CP932_TABLE_BEGIN <= c && c <= CP932_TABLE_END){
1460 status_push_ch(ptr, c);
1461 #endif /* SHIFTJIS_CP932 */
1463 status_disable(ptr);
1467 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1468 status_push_ch(ptr, c);
1469 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
1473 status_disable(ptr);
1476 #ifdef SHIFTJIS_CP932
1478 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1479 status_push_ch(ptr, c);
1480 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
1481 set_code_score(ptr, SCORE_CP932);
1486 status_disable(ptr);
1488 #endif /* SHIFTJIS_CP932 */
1492 void e_status(ptr, c)
1493 struct input_code *ptr;
1498 status_check(ptr, c);
1503 #ifdef NUMCHAR_OPTION
1504 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1507 }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
1509 status_push_ch(ptr, c);
1511 status_disable(ptr);
1515 if (0xa1 <= c && c <= 0xfe){
1516 status_push_ch(ptr, c);
1520 status_disable(ptr);
1526 #ifdef UTF8_INPUT_ENABLE
1527 void w16_status(ptr, c)
1528 struct input_code *ptr;
1535 if (ptr->_file_stat == 0){
1536 if (c == 0xfe || c == 0xff){
1538 status_push_ch(ptr, c);
1539 ptr->_file_stat = 1;
1541 status_disable(ptr);
1542 ptr->_file_stat = -1;
1544 }else if (ptr->_file_stat > 0){
1546 status_push_ch(ptr, c);
1547 }else if (ptr->_file_stat < 0){
1548 status_disable(ptr);
1554 status_disable(ptr);
1555 ptr->_file_stat = -1;
1557 status_push_ch(ptr, c);
1564 if (ptr->stat != c && (c == 0xfe || c == 0xff)){
1565 status_push_ch(ptr, c);
1568 status_disable(ptr);
1569 ptr->_file_stat = -1;
1575 void w_status(ptr, c)
1576 struct input_code *ptr;
1581 status_check(ptr, c);
1586 #ifdef NUMCHAR_OPTION
1587 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1590 }else if (0xc0 <= c && c <= 0xdf){
1592 status_push_ch(ptr, c);
1593 }else if (0xe0 <= c && c <= 0xef){
1595 status_push_ch(ptr, c);
1597 status_disable(ptr);
1602 if (0x80 <= c && c <= 0xbf){
1603 status_push_ch(ptr, c);
1604 if (ptr->index > ptr->stat){
1605 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
1606 && ptr->buf[2] == 0xbf);
1607 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
1608 &ptr->buf[0], &ptr->buf[1]);
1615 status_disable(ptr);
1626 int action_flag = 1;
1627 struct input_code *result = 0;
1628 struct input_code *p = input_code_list;
1630 (p->status_func)(p, c);
1633 }else if(p->stat == 0){
1644 if (result && !estab_f){
1645 set_iconv(TRUE, result->iconv_func);
1646 }else if (c <= DEL){
1647 struct input_code *ptr = input_code_list;
1657 #define STD_GC_BUFSIZE (256)
1658 int std_gc_buf[STD_GC_BUFSIZE];
1668 return std_gc_buf[--std_gc_ndx];
1680 if (std_gc_ndx == STD_GC_BUFSIZE){
1683 std_gc_buf[std_gc_ndx++] = c;
1703 while ((c = (*i_getc)(f)) != EOF)
1712 oconv = output_conv;
1715 /* replace continucation module, from output side */
1717 /* output redicrection */
1719 if (noout_f || guess_f){
1726 if (mimeout_f == TRUE) {
1727 o_base64conv = oconv; oconv = base64_conv;
1729 /* base64_count = 0; */
1733 o_crconv = oconv; oconv = cr_conv;
1736 o_rot_conv = oconv; oconv = rot_conv;
1739 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
1742 o_hira_conv = oconv; oconv = hira_conv;
1745 o_fconv = oconv; oconv = fold_conv;
1748 if (alpha_f || x0201_f) {
1749 o_zconv = oconv; oconv = z_conv;
1753 i_ungetc = std_ungetc;
1754 /* input redicrection */
1757 i_cgetc = i_getc; i_getc = cap_getc;
1758 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
1761 i_ugetc = i_getc; i_getc = url_getc;
1762 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
1765 #ifdef NUMCHAR_OPTION
1767 i_ngetc = i_getc; i_getc = numchar_getc;
1768 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
1771 if (mime_f && mimebuf_f==FIXED_MIME) {
1772 i_mgetc = i_getc; i_getc = mime_getc;
1773 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
1776 i_bgetc = i_getc; i_getc = broken_getc;
1777 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
1779 if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
1780 set_iconv(-TRUE, e_iconv);
1781 } else if (input_f == SJIS_INPUT) {
1782 set_iconv(-TRUE, s_iconv);
1783 #ifdef UTF8_INPUT_ENABLE
1784 } else if (input_f == UTF8_INPUT) {
1785 set_iconv(-TRUE, w_iconv);
1786 } else if (input_f == UTF16LE_INPUT) {
1787 set_iconv(-TRUE, w_iconv16);
1790 set_iconv(FALSE, e_iconv);
1794 struct input_code *p = input_code_list;
1802 Conversion main loop. Code detection only.
1812 module_connection();
1817 output_mode = ASCII;
1820 #define NEXT continue /* no output, get next */
1821 #define SEND ; /* output c1 and c2, get next */
1822 #define LAST break /* end of loop, go closing */
1824 while ((c1 = (*i_getc)(f)) != EOF) {
1829 /* in case of 8th bit is on */
1831 /* in case of not established yet */
1832 /* It is still ambiguious */
1833 if (h_conv(f, c2, c1)==EOF)
1839 /* in case of already established */
1841 /* ignore bogus code */
1847 /* second byte, 7 bit code */
1848 /* it might be kanji shitfted */
1849 if ((c1 == DEL) || (c1 <= SPACE)) {
1850 /* ignore bogus first code */
1858 #ifdef UTF8_INPUT_ENABLE
1867 #ifdef NUMCHAR_OPTION
1868 } else if ((c1 & CLASS_MASK) == CLASS_UTF16){
1871 } else if (c1 > DEL) {
1873 if (!estab_f && !iso8859_f) {
1874 /* not established yet */
1877 } else { /* estab_f==TRUE */
1882 } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
1883 /* SJIS X0201 Case... */
1884 if(iso2022jp_f && x0201_f==NO_X0201) {
1885 (*oconv)(GETA1, GETA2);
1892 } else if (c1==SSO && iconv != s_iconv) {
1893 /* EUC X0201 Case */
1894 c1 = (*i_getc)(f); /* skip SSO */
1896 if (SSP<=c1 && c1<0xe0) {
1897 if(iso2022jp_f && x0201_f==NO_X0201) {
1898 (*oconv)(GETA1, GETA2);
1905 } else { /* bogus code, skip SSO and one byte */
1909 /* already established */
1914 } else if ((c1 > SPACE) && (c1 != DEL)) {
1915 /* in case of Roman characters */
1917 /* output 1 shifted byte */
1921 } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
1922 /* output 1 shifted byte */
1923 if(iso2022jp_f && x0201_f==NO_X0201) {
1924 (*oconv)(GETA1, GETA2);
1931 /* look like bogus code */
1934 } else if (input_mode == X0208) {
1935 /* in case of Kanji shifted */
1938 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
1939 /* Check MIME code */
1940 if ((c1 = (*i_getc)(f)) == EOF) {
1943 } else if (c1 == '?') {
1944 /* =? is mime conversion start sequence */
1945 if(mime_f == STRICT_MIME) {
1946 /* check in real detail */
1947 if (mime_begin_strict(f) == EOF)
1951 } else if (mime_begin(f) == EOF)
1961 /* normal ASCII code */
1964 } else if (c1 == SI) {
1967 } else if (c1 == SO) {
1970 } else if (c1 == ESC ) {
1971 if ((c1 = (*i_getc)(f)) == EOF) {
1972 /* (*oconv)(0, ESC); don't send bogus code */
1974 } else if (c1 == '$') {
1975 if ((c1 = (*i_getc)(f)) == EOF) {
1977 (*oconv)(0, ESC); don't send bogus code
1978 (*oconv)(0, '$'); */
1980 } else if (c1 == '@'|| c1 == 'B') {
1981 /* This is kanji introduction */
1984 set_input_codename("ISO-2022-JP");
1985 debug(input_codename);
1987 } else if (c1 == '(') {
1988 if ((c1 = (*i_getc)(f)) == EOF) {
1989 /* don't send bogus code
1995 } else if (c1 == '@'|| c1 == 'B') {
1996 /* This is kanji introduction */
2001 /* could be some special code */
2008 } else if (broken_f&0x2) {
2009 /* accept any ESC-(-x as broken code ... */
2019 } else if (c1 == '(') {
2020 if ((c1 = (*i_getc)(f)) == EOF) {
2021 /* don't send bogus code
2023 (*oconv)(0, '('); */
2027 /* This is X0201 kana introduction */
2028 input_mode = X0201; shift_mode = X0201;
2030 } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2031 /* This is X0208 kanji introduction */
2032 input_mode = ASCII; shift_mode = FALSE;
2034 } else if (broken_f&0x2) {
2035 input_mode = ASCII; shift_mode = FALSE;
2040 /* maintain various input_mode here */
2044 } else if ( c1 == 'N' || c1 == 'n' ){
2046 c3 = (*i_getc)(f); /* skip SS2 */
2047 if ( (SPACE<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2062 } else if ((c1 == NL || c1 == CR) && broken_f&4) {
2063 input_mode = ASCII; set_iconv(FALSE, 0);
2069 if (input_mode == X0208)
2070 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2071 else if (input_mode)
2072 (*oconv)(input_mode, c1); /* other special case */
2073 else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */
2074 int c0 = (*i_getc)(f);
2077 (*iconv)(c2, c1, c0);
2083 /* goto next_word */
2087 (*iconv)(EOF, 0, 0);
2100 /** it must NOT be in the kanji shifte sequence */
2101 /** it must NOT be written in JIS7 */
2102 /** and it must be after 2 byte 8bit code */
2109 while ((c1 = (*i_getc)(f)) != EOF) {
2115 if (push_hold_buf(c1) == EOF || estab_f){
2121 struct input_code *p = input_code_list;
2122 struct input_code *result = p;
2127 if (p->score < result->score){
2132 set_iconv(FALSE, result->iconv_func);
2137 ** 1) EOF is detected, or
2138 ** 2) Code is established, or
2139 ** 3) Buffer is FULL (but last word is pushed)
2141 ** in 1) and 3) cases, we continue to use
2142 ** Kanji codes by oconv and leave estab_f unchanged.
2147 while (wc < hold_count){
2148 c2 = hold_buf[wc++];
2150 #ifdef NUMCHAR_OPTION
2151 || (c2 & CLASS_MASK) == CLASS_UTF16
2156 }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
2157 (*iconv)(X0201, c2, 0);
2160 if (wc < hold_count){
2161 c1 = hold_buf[wc++];
2170 if ((*iconv)(c2, c1, 0) < 0){
2172 if (wc < hold_count){
2173 c0 = hold_buf[wc++];
2182 (*iconv)(c2, c1, c0);
2195 if (hold_count >= HOLD_SIZE*2)
2197 hold_buf[hold_count++] = c2;
2198 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
2201 int s2e_conv(c2, c1, p2, p1)
2205 #ifdef SHIFTJIS_CP932
2206 if (cp932_f && CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){
2207 extern unsigned short shiftjis_cp932[3][189];
2208 c1 = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
2209 if (c1 == 0) return 1;
2213 #endif /* SHIFTJIS_CP932 */
2214 c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
2216 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
2233 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2236 int ret = s2e_conv(c2, c1, &c2, &c1);
2237 if (ret) return ret;
2250 } else if (c2 == SSO){
2253 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2263 #ifdef UTF8_INPUT_ENABLE
2265 w2e_conv(c2, c1, c0, p2, p1)
2269 extern unsigned short * utf8_to_euc_2bytes[];
2270 extern unsigned short ** utf8_to_euc_3bytes[];
2273 if (0xc0 <= c2 && c2 <= 0xef) {
2274 unsigned short **pp;
2277 if (c0 == 0) return -1;
2278 pp = utf8_to_euc_3bytes[c2 - 0x80];
2279 ret = w_iconv_common(c1, c0, pp, sizeof_utf8_to_euc_C2, p2, p1);
2281 ret = w_iconv_common(c2, c1, utf8_to_euc_2bytes, sizeof_utf8_to_euc_2bytes, p2, p1);
2283 #ifdef NUMCHAR_OPTION
2286 if (p1) *p1 = CLASS_UTF16 | ww16_conv(c2, c1, c0);
2291 } else if (c2 == X0201) {
2304 int ret = w2e_conv(c2, c1, c0, &c2, &c1);
2312 w16w_conv(val, p2, p1, p0)
2320 }else if (val < 0x800){
2321 *p2 = 0xc0 | (val >> 6);
2322 *p1 = 0x80 | (val & 0x3f);
2325 *p2 = 0xe0 | (val >> 12);
2326 *p1 = 0x80 | ((val >> 6) & 0x3f);
2327 *p0 = 0x80 | (val & 0x3f);
2332 ww16_conv(c2, c1, c0)
2337 val = (c2 & 0x0f) << 12;
2338 val |= (c1 & 0x3f) << 6;
2340 }else if (c2 >= 0xc0){
2341 val = (c2 & 0x1f) << 6;
2342 val |= (c1 & 0x3f) << 6;
2350 w16e_conv(val, p2, p1)
2354 extern unsigned short * utf8_to_euc_2bytes[];
2355 extern unsigned short ** utf8_to_euc_3bytes[];
2357 unsigned short **pp;
2361 w16w_conv(val, &c2, &c1, &c0);
2364 pp = utf8_to_euc_3bytes[c2 - 0x80];
2365 psize = sizeof_utf8_to_euc_C2;
2366 ret = w_iconv_common(c1, c0, pp, psize, p2, p1);
2368 pp = utf8_to_euc_2bytes;
2369 psize = sizeof_utf8_to_euc_2bytes;
2370 ret = w_iconv_common(c2, c1, pp, psize, p2, p1);
2372 #ifdef NUMCHAR_OPTION
2375 *p1 = CLASS_UTF16 | val;
2384 w_iconv16(c2, c1, c0)
2389 if (c2==0376 && c1==0377){
2390 utf16_mode = UTF16LE_INPUT;
2392 } else if (c2==0377 && c1==0376){
2393 utf16_mode = UTF16BE_INPUT;
2396 if (c2 != EOF && utf16_mode == UTF16BE_INPUT) {
2398 tmp=c1; c1=c2; c2=tmp;
2400 if ((c2==0 && c1 < 0x80) || c2==EOF) {
2404 ret = w16e_conv(((c2<<8)&0xff00) + c1, &c2, &c1);
2405 if (ret) return ret;
2411 w_iconv_common(c1, c0, pp, psize, p2, p1)
2413 unsigned short **pp;
2421 if (pp == 0) return 1;
2424 if (c1 < 0 || psize <= c1) return 1;
2426 if (p == 0) return 1;
2429 if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1;
2431 if (val == 0) return 1;
2434 if (c2 == SO) c2 = X0201;
2443 #ifdef UTF8_OUTPUT_ENABLE
2448 extern unsigned short euc_to_utf8_1byte[];
2449 extern unsigned short * euc_to_utf8_2bytes[];
2450 extern unsigned short * euc_to_utf8_2bytes_ms[];
2454 p = euc_to_utf8_1byte;
2457 c2 = (c2&0x7f) - 0x21;
2458 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2459 p = ms_ucs_map_f ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
2464 c1 = (c1 & 0x7f) - 0x21;
2465 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
2476 #ifdef NUMCHAR_OPTION
2477 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2478 w16w_conv(c1, &c2, &c1, &c0);
2482 if (c0) (*o_putc)(c0);
2491 if (unicode_bom_f==2) {
2499 output_mode = ASCII;
2501 } else if (c2 == ISO8859_1) {
2502 output_mode = ISO8859_1;
2503 (*o_putc)(c1 | 0x080);
2506 w16w_conv((unsigned short)e2w_conv(c2, c1), &c2, &c1, &c0);
2510 if (c0) (*o_putc)(c0);
2525 if (unicode_bom_f==2) {
2527 (*o_putc)((unsigned char)'\377');
2531 (*o_putc)((unsigned char)'\377');
2536 if (c2 == ISO8859_1) {
2539 #ifdef NUMCHAR_OPTION
2540 } else if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16) {
2541 c2 = (c1 >> 8) & 0xff;
2545 unsigned short val = (unsigned short)e2w_conv(c2, c1);
2546 c2 = (val >> 8) & 0xff;
2565 #ifdef NUMCHAR_OPTION
2566 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2567 w16e_conv(c1, &c2, &c1);
2573 } else if (c2 == 0) {
2574 output_mode = ASCII;
2576 } else if (c2 == X0201) {
2577 output_mode = JAPANESE_EUC;
2578 (*o_putc)(SSO); (*o_putc)(c1|0x80);
2579 } else if (c2 == ISO8859_1) {
2580 output_mode = ISO8859_1;
2581 (*o_putc)(c1 | 0x080);
2583 if ((c1<0x21 || 0x7e<c1) ||
2584 (c2<0x21 || 0x7e<c2)) {
2585 set_iconv(FALSE, 0);
2586 return; /* too late to rescue this char */
2588 output_mode = JAPANESE_EUC;
2589 (*o_putc)(c2 | 0x080);
2590 (*o_putc)(c1 | 0x080);
2595 e2s_conv(c2, c1, p2, p1)
2596 int c2, c1, *p2, *p1;
2598 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
2599 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
2607 #ifdef NUMCHAR_OPTION
2608 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2609 w16e_conv(c1, &c2, &c1);
2615 } else if (c2 == 0) {
2616 output_mode = ASCII;
2618 } else if (c2 == X0201) {
2619 output_mode = SHIFT_JIS;
2621 } else if (c2 == ISO8859_1) {
2622 output_mode = ISO8859_1;
2623 (*o_putc)(c1 | 0x080);
2625 if ((c1<0x20 || 0x7e<c1) ||
2626 (c2<0x20 || 0x7e<c2)) {
2627 set_iconv(FALSE, 0);
2628 return; /* too late to rescue this char */
2630 output_mode = SHIFT_JIS;
2631 e2s_conv(c2, c1, &c2, &c1);
2633 #ifdef SHIFTJIS_CP932
2635 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2636 extern unsigned short cp932inv[2][189];
2637 int c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
2643 #endif /* SHIFTJIS_CP932 */
2646 if (prefix_table[(unsigned char)c1]){
2647 (*o_putc)(prefix_table[(unsigned char)c1]);
2658 #ifdef NUMCHAR_OPTION
2659 if ((c1 & CLASS_MASK) == CLASS_UTF16){
2660 w16e_conv(c1, &c2, &c1);
2664 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2667 (*o_putc)(ascii_intro);
2668 output_mode = ASCII;
2671 } else if (c2==X0201) {
2672 if (output_mode!=X0201) {
2673 output_mode = X0201;
2679 } else if (c2==ISO8859_1) {
2680 /* iso8859 introduction, or 8th bit on */
2681 /* Can we convert in 7bit form using ESC-'-'-A ?
2683 output_mode = ISO8859_1;
2685 } else if (c2 == 0) {
2686 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2689 (*o_putc)(ascii_intro);
2690 output_mode = ASCII;
2694 if (output_mode != X0208) {
2695 output_mode = X0208;
2698 (*o_putc)(kanji_intro);
2700 if (c1<0x20 || 0x7e<c1)
2702 if (c2<0x20 || 0x7e<c2)
2714 if (base64_count>50 && !mimeout_mode && c2==0 && c1==SPACE) {
2716 } else if (base64_count>66 && mimeout_mode) {
2717 (*o_base64conv)(EOF,0);
2719 (*o_putc)('\t'); base64_count += 7;
2721 (*o_base64conv)(c2,c1);
2725 static int broken_buf[3];
2726 static int broken_counter = 0;
2727 static int broken_last = 0;
2734 if (broken_counter>0) {
2735 return broken_buf[--broken_counter];
2738 if (c=='$' && broken_last != ESC
2739 && (input_mode==ASCII || input_mode==X0201)) {
2742 if (c1=='@'|| c1=='B') {
2743 broken_buf[0]=c1; broken_buf[1]=c;
2750 } else if (c=='(' && broken_last != ESC
2751 && (input_mode==X0208 || input_mode==X0201)) { /* ) */
2754 if (c1=='J'|| c1=='B') {
2755 broken_buf[0]=c1; broken_buf[1]=c;
2773 if (broken_counter<2)
2774 broken_buf[broken_counter++]=c;
2778 static int prev_cr = 0;
2786 if (! (c2==0&&c1==NL) ) {
2792 } else if (c1=='\r') {
2794 } else if (c1=='\n') {
2795 if (crmode_f==CRLF) {
2796 (*o_crconv)(0,'\r');
2797 } else if (crmode_f==CR) {
2798 (*o_crconv)(0,'\r');
2802 } else if (c1!='\032' || crmode_f!=NL){
2808 Return value of fold_conv()
2810 \n add newline and output char
2811 \r add newline and output nothing
2814 1 (or else) normal output
2816 fold state in prev (previous character)
2818 >0x80 Japanese (X0208/X0201)
2823 This fold algorthm does not preserve heading space in a line.
2824 This is the main difference from fmt.
2827 #define char_size(c2,c1) (c2?2:1)
2836 if (c1== '\r' && !fold_preserve_f) {
2837 fold_state=0; /* ignore cr */
2838 }else if (c1== '\n'&&f_prev=='\r' && fold_preserve_f) {
2840 fold_state=0; /* ignore cr */
2841 } else if (c1== BS) {
2842 if (f_line>0) f_line--;
2844 } else if (c2==EOF && f_line != 0) { /* close open last line */
2846 } else if ((c1=='\n' && !fold_preserve_f)
2847 || ((c1=='\r'||(c1=='\n'&&f_prev!='\r'))
2848 && fold_preserve_f)) {
2850 if (fold_preserve_f) {
2854 } else if ((f_prev == c1 && !fold_preserve_f)
2855 || (f_prev == '\n' && fold_preserve_f)
2856 ) { /* duplicate newline */
2859 fold_state = '\n'; /* output two newline */
2865 if (f_prev&0x80) { /* Japanese? */
2867 fold_state = 0; /* ignore given single newline */
2868 } else if (f_prev==' ') {
2872 if (++f_line<=fold_len)
2876 fold_state = '\r'; /* fold and output nothing */
2880 } else if (c1=='\f') {
2885 fold_state = '\n'; /* output newline and clear */
2886 } else if ( (c2==0 && c1==' ')||
2887 (c2==0 && c1=='\t')||
2888 (c2=='!'&& c1=='!')) {
2889 /* X0208 kankaku or ascii space */
2890 if (f_prev == ' ') {
2891 fold_state = 0; /* remove duplicate spaces */
2894 if (++f_line<=fold_len)
2895 fold_state = ' '; /* output ASCII space only */
2897 f_prev = ' '; f_line = 0;
2898 fold_state = '\r'; /* fold and output nothing */
2902 prev0 = f_prev; /* we still need this one... , but almost done */
2904 if (c2 || c2==X0201)
2905 f_prev |= 0x80; /* this is Japanese */
2906 f_line += char_size(c2,c1);
2907 if (f_line<=fold_len) { /* normal case */
2910 if (f_line>=fold_len+fold_margin) { /* too many kinsou suspension */
2911 f_line = char_size(c2,c1);
2912 fold_state = '\n'; /* We can't wait, do fold now */
2913 } else if (c2==X0201) {
2914 /* simple kinsoku rules return 1 means no folding */
2915 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
2916 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
2917 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
2918 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
2919 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
2920 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
2921 else if (SPACE<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
2923 fold_state = '\n';/* add one new f_line before this character */
2926 fold_state = '\n';/* add one new f_line before this character */
2929 /* kinsoku point in ASCII */
2930 if ( c1==')'|| /* { [ ( */
2941 /* just after special */
2942 } else if (!is_alnum(prev0)) {
2943 f_line = char_size(c2,c1);
2945 } else if ((prev0==' ') || /* ignored new f_line */
2946 (prev0=='\n')|| /* ignored new f_line */
2947 (prev0&0x80)) { /* X0208 - ASCII */
2948 f_line = char_size(c2,c1);
2949 fold_state = '\n';/* add one new f_line before this character */
2951 fold_state = 1; /* default no fold in ASCII */
2955 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
2956 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
2957 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
2958 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
2959 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
2960 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
2961 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
2962 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
2963 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
2964 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
2965 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
2966 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
2967 /* default no fold in kinsoku */
2970 f_line = char_size(c2,c1);
2971 /* add one new f_line before this character */
2974 f_line = char_size(c2,c1);
2976 /* add one new f_line before this character */
2981 /* terminator process */
2982 switch(fold_state) {
3001 int z_prev2=0,z_prev1=0;
3008 /* if (c2) c1 &= 0x7f; assertion */
3010 if (x0201_f && z_prev2==X0201) { /* X0201 */
3011 if (c1==(0xde&0x7f)) { /*
\e$BByE@
\e(B */
3013 (*o_zconv)(dv[(z_prev1-SPACE)*2],dv[(z_prev1-SPACE)*2+1]);
3015 } else if (c1==(0xdf&0x7f)&&ev[(z_prev1-SPACE)*2]) { /*
\e$BH>ByE@
\e(B */
3017 (*o_zconv)(ev[(z_prev1-SPACE)*2],ev[(z_prev1-SPACE)*2+1]);
3021 (*o_zconv)(cv[(z_prev1-SPACE)*2],cv[(z_prev1-SPACE)*2+1]);
3030 if (x0201_f && c2==X0201) {
3031 if (dv[(c1-SPACE)*2]||ev[(c1-SPACE)*2]) {
3032 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
3033 z_prev1 = c1; z_prev2 = c2;
3036 (*o_zconv)(cv[(c1-SPACE)*2],cv[(c1-SPACE)*2+1]);
3041 /* JISX0208 Alphabet */
3042 if (alpha_f && c2 == 0x23 ) {
3044 } else if (alpha_f && c2 == 0x21 ) {
3045 /* JISX0208 Kigou */
3050 } else if (alpha_f&0x4) {
3055 } else if (0x20<c1 && c1<0x7f && fv[c1-0x20]) {
3061 case '>': entity = ">"; break;
3062 case '<': entity = "<"; break;
3063 case '\"': entity = """; break;
3064 case '&': entity = "&"; break;
3067 while (*entity) (*o_zconv)(0, *entity++);
3077 #define rot13(c) ( \
3079 (c <= 'M') ? (c + 13): \
3080 (c <= 'Z') ? (c - 13): \
3082 (c <= 'm') ? (c + 13): \
3083 (c <= 'z') ? (c - 13): \
3087 #define rot47(c) ( \
3089 ( c <= 'O' ) ? (c + 47) : \
3090 ( c <= '~' ) ? (c - 47) : \
3098 if (c2==0 || c2==X0201 || c2==ISO8859_1) {
3104 (*o_rot_conv)(c2,c1);
3111 if ((hira_f & 1) && c2==0x25 && 0x20<c1 && c1<0x74) {
3113 } else if ((hira_f & 2) && c2==0x24 && 0x20<c1 && c1<0x74) {
3116 (*o_hira_conv)(c2,c1);
3121 iso2022jp_check_conv(c2,c1)
3124 static int range[RANGE_NUM_MAX][2] = {
3147 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
3151 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
3156 for (i = 0; i < RANGE_NUM_MAX; i++) {
3157 start = range[i][0];
3160 if (c >= start && c <= end) {
3165 (*o_iso2022jp_check_conv)(c2,c1);
3169 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
3171 unsigned char *mime_pattern[] = {
3172 (unsigned char *)"\075?EUC-JP?B?",
3173 (unsigned char *)"\075?SHIFT_JIS?B?",
3174 (unsigned char *)"\075?ISO-8859-1?Q?",
3175 (unsigned char *)"\075?ISO-8859-1?B?",
3176 (unsigned char *)"\075?ISO-2022-JP?B?",
3177 (unsigned char *)"\075?ISO-2022-JP?Q?",
3178 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3179 (unsigned char *)"\075?UTF-8?B?",
3180 (unsigned char *)"\075?UTF-8?Q?",
3182 (unsigned char *)"\075?US-ASCII?Q?",
3187 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
3188 int (*mime_priority_func[])PROTO((int c2, int c1, int c0)) = {
3189 e_iconv, s_iconv, 0, 0, 0, 0,
3190 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3196 int mime_encode[] = {
3197 JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201,
3198 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3205 int mime_encode_method[] = {
3206 'B', 'B','Q', 'B', 'B', 'Q',
3207 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3215 #define MAXRECOVER 20
3217 /* I don't trust portablity of toupper */
3218 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
3219 #define nkf_isdigit(c) ('0'<=c && c<='9')
3220 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
3225 if (i_getc!=mime_getc) {
3226 i_mgetc = i_getc; i_getc = mime_getc;
3227 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
3228 if(mime_f==STRICT_MIME) {
3229 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
3230 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
3236 unswitch_mime_getc()
3238 if(mime_f==STRICT_MIME) {
3239 i_mgetc = i_mgetc_buf;
3240 i_mungetc = i_mungetc_buf;
3243 i_ungetc = i_mungetc;
3247 mime_begin_strict(f)
3252 unsigned char *p,*q;
3253 int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
3255 mime_decode_mode = FALSE;
3256 /* =? has been checked */
3258 p = mime_pattern[j];
3261 for(i=2;p[i]>' ';i++) { /* start at =? */
3262 if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) {
3263 /* pattern fails, try next one */
3265 while ((p = mime_pattern[++j])) {
3266 for(k=2;k<i;k++) /* assume length(p) > i */
3267 if (p[k]!=q[k]) break;
3268 if (k==i && nkf_toupper(c1)==p[k]) break;
3270 if (p) continue; /* found next one, continue */
3271 /* all fails, output from recovery buffer */
3279 mime_decode_mode = p[i-2];
3281 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
3283 if (mime_decode_mode=='B') {
3284 mimebuf_f = unbuf_f;
3286 /* do MIME integrity check */
3287 return mime_integrity(f,mime_pattern[j]);
3299 /* we don't keep eof of Fifo, becase it contains ?= as
3300 a terminator. It was checked in mime_integrity. */
3301 return ((mimebuf_f)?
3302 (*i_mgetc_buf)(f):Fifo(mime_input++));
3306 mime_ungetc_buf(c,f)
3311 (*i_mungetc_buf)(c,f);
3313 Fifo(--mime_input)=c;
3324 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
3325 /* re-read and convert again from mime_buffer. */
3327 /* =? has been checked */
3329 Fifo(mime_last++)='='; Fifo(mime_last++)='?';
3330 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
3331 /* We accept any character type even if it is breaked by new lines */
3332 c1 = (*i_getc)(f); Fifo(mime_last++)= c1 ;
3333 if (c1=='\n'||c1==' '||c1=='\r'||
3334 c1=='-'||c1=='_'||is_alnum(c1) ) continue;
3336 /* Failed. But this could be another MIME preemble */
3344 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3345 if (!(++i<MAXRECOVER) || c1==EOF) break;
3346 if (c1=='b'||c1=='B') {
3347 mime_decode_mode = 'B';
3348 } else if (c1=='q'||c1=='Q') {
3349 mime_decode_mode = 'Q';
3353 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3354 if (!(++i<MAXRECOVER) || c1==EOF) break;
3356 mime_decode_mode = FALSE;
3362 if (!mime_decode_mode) {
3363 /* false MIME premble, restart from mime_buffer */
3364 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
3365 /* Since we are in MIME mode until buffer becomes empty, */
3366 /* we never go into mime_begin again for a while. */
3369 /* discard mime preemble, and goto MIME mode */
3371 /* do no MIME integrity check */
3372 return c1; /* used only for checking EOF */
3387 fprintf(stderr, "%s\n", str);
3393 set_input_codename (codename)
3398 strcmp(codename, "") != 0 &&
3399 strcmp(codename, input_codename) != 0)
3401 is_inputcode_mixed = TRUE;
3403 input_codename = codename;
3404 is_inputcode_set = TRUE;
3408 print_guessed_code (filename)
3411 char *codename = "BINARY";
3412 if (!is_inputcode_mixed) {
3413 if (strcmp(input_codename, "") == 0) {
3416 codename = input_codename;
3419 if (filename != NULL) printf("%s:", filename);
3420 printf("%s\n", codename);
3427 if (nkf_isdigit(x)) return x - '0';
3428 return nkf_toupper(x) - 'A' + 10;
3433 #ifdef ANSI_C_PROTOTYPE
3434 int hex_getc(int ch, FILE *f, int (*g)(FILE *f), int (*u)(int c, FILE *f))
3437 hex_getc(ch, f, g, u)
3450 if (!nkf_isxdigit(c2)){
3455 if (!nkf_isxdigit(c3)){
3460 return (hex2bin(c2) << 4) | hex2bin(c3);
3467 return hex_getc(':', f, i_cgetc, i_cungetc);
3475 return (*i_cungetc)(c, f);
3482 return hex_getc('%', f, i_ugetc, i_uungetc);
3490 return (*i_uungetc)(c, f);
3494 #ifdef NUMCHAR_OPTION
3499 int (*g)() = i_ngetc;
3500 int (*u)() = i_nungetc;
3511 if (buf[i] == 'x' || buf[i] == 'X'){
3512 for (j = 0; j < 5; j++){
3514 if (!nkf_isxdigit(buf[i])){
3521 c |= hex2bin(buf[i]);
3524 for (j = 0; j < 6; j++){
3528 if (!nkf_isdigit(buf[i])){
3535 c += hex2bin(buf[i]);
3541 return CLASS_UTF16 | c;
3551 numchar_ungetc(c, f)
3555 return (*i_nungetc)(c, f);
3564 int c1, c2, c3, c4, cc;
3565 int t1, t2, t3, t4, mode, exit_mode;
3567 if (mime_top != mime_last) { /* Something is in FIFO */
3568 return Fifo(mime_top++);
3570 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
3571 mime_decode_mode=FALSE;
3572 unswitch_mime_getc();
3573 return (*i_getc)(f);
3576 if (mimebuf_f == FIXED_MIME)
3577 exit_mode = mime_decode_mode;
3580 if (mime_decode_mode == 'Q') {
3581 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3583 if (c1=='_') return ' ';
3584 if (c1!='=' && c1!='?') {
3588 mime_decode_mode = exit_mode; /* prepare for quit */
3589 if (c1<=' ') return c1;
3590 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
3591 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
3592 /* end Q encoding */
3593 input_mode = exit_mode;
3594 while((c1=(*i_getc)(f))!=EOF && c1==SPACE
3595 /* && (c1==NL||c1==TAB||c1=='\r') */ ) ;
3598 if (c1=='='&&c2<' ') { /* this is soft wrap */
3599 while((c1 = (*i_mgetc)(f)) <=' ') {
3600 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3602 mime_decode_mode = 'Q'; /* still in MIME */
3603 goto restart_mime_q;
3606 mime_decode_mode = 'Q'; /* still in MIME */
3610 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
3611 if (c2<=' ') return c2;
3612 mime_decode_mode = 'Q'; /* still in MIME */
3613 #define hex(c) (('0'<=c&&c<='9')?(c-'0'):\
3614 ('A'<=c&&c<='F')?(c-'A'+10):('a'<=c&&c<='f')?(c-'a'+10):0)
3615 return ((hex(c2)<<4) + hex(c3));
3618 if (mime_decode_mode != 'B') {
3619 mime_decode_mode = FALSE;
3620 return (*i_mgetc)(f);
3624 /* Base64 encoding */
3626 MIME allows line break in the middle of
3627 Base64, but we are very pessimistic in decoding
3628 in unbuf mode because MIME encoded code may broken by
3629 less or editor's control sequence (such as ESC-[-K in unbuffered
3630 mode. ignore incomplete MIME.
3632 mode = mime_decode_mode;
3633 mime_decode_mode = exit_mode; /* prepare for quit */
3635 while ((c1 = (*i_mgetc)(f))<=' ') {
3640 if ((c2 = (*i_mgetc)(f))<=' ') {
3643 if (mime_f != STRICT_MIME) goto mime_c2_retry;
3644 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3647 if ((c1 == '?') && (c2 == '=')) {
3649 while((c1=(*i_getc)(f))!=EOF && c1==SPACE
3650 /* && (c1==NL||c1==TAB||c1=='\r') */ ) ;
3654 if ((c3 = (*i_mgetc)(f))<=' ') {
3657 if (mime_f != STRICT_MIME) goto mime_c3_retry;
3658 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3662 if ((c4 = (*i_mgetc)(f))<=' ') {
3665 if (mime_f != STRICT_MIME) goto mime_c4_retry;
3666 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3670 mime_decode_mode = mode; /* still in MIME sigh... */
3672 /* BASE 64 decoding */
3674 t1 = 0x3f & base64decode(c1);
3675 t2 = 0x3f & base64decode(c2);
3676 t3 = 0x3f & base64decode(c3);
3677 t4 = 0x3f & base64decode(c4);
3678 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
3680 Fifo(mime_last++) = cc;
3681 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
3683 Fifo(mime_last++) = cc;
3684 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
3686 Fifo(mime_last++) = cc;
3691 return Fifo(mime_top++);
3699 Fifo(--mime_top) = c;
3710 /* In buffered mode, read until =? or NL or buffer full
3712 mime_input = mime_top;
3713 mime_last = mime_top;
3714 while(*p) Fifo(mime_input++) = *p++;
3717 while((c=(*i_getc)(f))!=EOF) {
3718 if (((mime_input-mime_top)&MIME_BUF_MASK)==0) {
3719 break; /* buffer full */
3721 if (c=='=' && d=='?') {
3722 /* checked. skip header, start decode */
3723 Fifo(mime_input++) = c;
3724 /* mime_last_input = mime_input; */
3729 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
3731 /* Should we check length mod 4? */
3732 Fifo(mime_input++) = c;
3735 /* In case of Incomplete MIME, no MIME decode */
3736 Fifo(mime_input++) = c;
3737 mime_last = mime_input; /* point undecoded buffer */
3738 mime_decode_mode = 1; /* no decode on Fifo last in mime_getc */
3739 switch_mime_getc(); /* anyway we need buffered getc */
3750 i = c - 'A'; /* A..Z 0-25 */
3752 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
3754 } else if (c > '/') {
3755 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
3756 } else if (c == '+') {
3757 i = '>' /* 62 */ ; /* + 62 */
3759 i = '?' /* 63 */ ; /* / 63 */
3764 static char basis_64[] =
3765 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
3775 p = mime_pattern[0];
3776 for(i=0;mime_encode[i];i++) {
3777 if (mode == mime_encode[i]) {
3778 p = mime_pattern[i];
3782 mimeout_mode = mime_encode_method[i];
3784 /* (*o_mputc)(' '); */
3801 #define itoh4(c) (c>=10?c+'A'-10:c+'0')
3807 if (mimeout_f==FIXED_MIME) {
3808 if (base64_count>71) {
3816 if ( c<=DEL &&(output_mode==ASCII ||output_mode == ISO8859_1 )
3817 && mimeout_f!=FIXED_MIME) {
3818 if (mimeout_mode=='Q') {
3825 if (mimeout_mode!='B' || c!=SPACE) {
3834 } else if (!mimeout_mode && mimeout_f!=FIXED_MIME) {
3835 open_mime(output_mode);
3837 } else { /* c==EOF */
3838 switch(mimeout_mode) {
3843 (*o_mputc)(basis_64[((b64c & 0x3)<< 4)]);
3849 (*o_mputc)(basis_64[((b64c & 0xF) << 2)]);
3855 if (mimeout_f!=FIXED_MIME) {
3857 } else if (mimeout_mode != 'Q')
3862 switch(mimeout_mode) {
3866 (*o_mputc)(itoh4(((c>>4)&0xf)));
3867 (*o_mputc)(itoh4((c&0xf)));
3874 (*o_mputc)(basis_64[c>>2]);
3879 (*o_mputc)(basis_64[((b64c & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
3885 (*o_mputc)(basis_64[((b64c & 0xF) << 2) | ((c & 0xC0) >>6)]);
3886 (*o_mputc)(basis_64[c & 0x3F]);
3906 mime_f = STRICT_MIME;
3910 #if defined(MSDOS) || defined(__OS2__)
3915 iso2022jp_f = FALSE;
3917 kanji_intro = DEFAULT_J;
3918 ascii_intro = DEFAULT_R;
3920 output_conv = DEFAULT_CONV;
3921 oconv = DEFAULT_CONV;
3924 i_mungetc = std_ungetc;
3925 i_mgetc_buf = std_getc;
3926 i_mungetc_buf = std_ungetc;
3929 i_ungetc=std_ungetc;
3932 i_bungetc= std_ungetc;
3936 o_crconv = no_connection;
3937 o_rot_conv = no_connection;
3938 o_iso2022jp_check_conv = no_connection;
3939 o_hira_conv = no_connection;
3940 o_fconv = no_connection;
3941 o_zconv = no_connection;
3944 i_ungetc = std_ungetc;
3946 i_mungetc = std_ungetc;
3948 output_mode = ASCII;
3951 mime_decode_mode = FALSE;
3960 struct input_code *p = input_code_list;
3965 #ifdef UTF8_OUTPUT_ENABLE
3966 if (unicode_bom_f) {
3972 fold_preserve_f = FALSE;
3975 fold_margin = FOLD_MARGIN;
3978 z_prev2=0,z_prev1=0;
3982 for (i = 0; i < 256; i++){
3983 prefix_table[i] = 0;
3986 input_codename = "";
3987 is_inputcode_mixed = FALSE;
3988 is_inputcode_set = FALSE;
3993 no_connection(c2,c1)
3996 no_connection2(c2,c1,0);
4000 no_connection2(c2,c1,c0)
4003 fprintf(stderr,"nkf internal module connection failure.\n");
4011 fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
4012 fprintf(stderr,"Flags:\n");
4013 fprintf(stderr,"b,u Output is buffered (DEFAULT),Output is unbuffered\n");
4014 #ifdef DEFAULT_CODE_SJIS
4015 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC), UTF-8\n");
4017 #ifdef DEFAULT_CODE_JIS
4018 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC), UTF-8\n");
4020 #ifdef DEFAULT_CODE_EUC
4021 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT), UTF-8\n");
4023 #ifdef DEFAULT_CODE_UTF8
4024 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC), UTF-8 (DEFAULT)\n");
4026 #ifdef UTF8_OUTPUT_ENABLE
4027 fprintf(stderr," After 'w' you can add more options. (80?|16((B|L)0?)?) \n");
4029 fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC), UTF-8\n");
4030 #ifdef UTF8_INPUT_ENABLE
4031 fprintf(stderr," After 'W' you can add more options. (8|16(B|L)?) \n");
4033 fprintf(stderr,"t no conversion\n");
4034 fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n");
4035 fprintf(stderr,"r {de/en}crypt ROT13/47\n");
4036 fprintf(stderr,"h 1 hirakana->katakana, 2 katakana->hirakana,3 both\n");
4037 fprintf(stderr,"v Show this usage. V: show version\n");
4038 fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
4039 fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
4040 fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
4041 fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
4042 fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces,\n");
4043 fprintf(stderr," 3: Convert HTML Entity\n");
4044 fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
4045 fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
4047 fprintf(stderr,"T Text mode output\n");
4049 fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
4050 fprintf(stderr,"d,c Delete \\r in line feed and \\032, Add \\r in line feed\n");
4051 fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n");
4052 fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
4053 fprintf(stderr,"long name options\n");
4054 fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n");
4055 fprintf(stderr," --jis,--euc,--sjis,--utf8,--utf16,--mime,--base64 convert for the code\n");
4056 fprintf(stderr," --hiragana, --katakana Hiragana/Katakana Conversion\n");
4058 fprintf(stderr," --cap-input, --url-input Convert hex after ':' or '%'\n");
4060 #ifdef NUMCHAR_OPTION
4061 fprintf(stderr," --numchar-input Convert Unicode Character Reference\n");
4063 #ifdef SHIFTJIS_CP932
4064 fprintf(stderr," --no-cp932 Don't convert Shift_JIS FAxx-FCxx to equivalnet CP932\n");
4066 #ifdef UTF8_OUTPUT_ENABLE
4067 fprintf(stderr," --ms-ucs-map Microsoft UCS Mapping Compatible\n");
4070 fprintf(stderr," --overwrite Overwrite original listed files by filtered result\n");
4072 fprintf(stderr," -g, --guess Guess the input code\n");
4073 fprintf(stderr," --help,--version\n");
4080 fprintf(stderr,"Network Kanji Filter Version %s (%s) "
4081 #if defined(MSDOS) && !defined(__WIN32__) && !defined(__WIN16__)
4084 #if defined(MSDOS) && defined(__WIN16__)
4087 #if defined(MSDOS) && defined(__WIN32__)
4093 ,Version,Patchlevel);
4094 fprintf(stderr,"\n%s\n",CopyRight);
4099 **
\e$B%Q%C%A@):n<T
\e(B
4100 ** void@merope.pleiades.or.jp (Kusakabe Youichi)
4101 ** NIDE Naoyuki <nide@ics.nara-wu.ac.jp>
4102 ** ohta@src.ricoh.co.jp (Junn Ohta)
4103 ** inouet@strl.nhk.or.jp (Tomoyuki Inoue)
4104 ** kiri@pulser.win.or.jp (Tetsuaki Kiriyama)
4105 ** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp>
4106 ** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe)
4107 ** kono@ie.u-ryukyu.ac.jp (Shinji Kono)
4108 ** GHG00637@nifty-serve.or.jp (COW)