1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
5 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
6 ** Copyright (C) 1996,1998
8 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
9 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
10 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
11 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
13 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
14 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
15 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
16 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
17 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
18 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
20 ** Everyone is permitted to do anything on this program
21 ** including copying, modifying, improving,
22 ** as long as you don't try to pretend that you wrote it.
23 ** i.e., the above copyright notice has to appear in all copies.
24 ** Binary distribution requires original version messages.
25 ** You don't have to ask before copying, redistribution or publishing.
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
29 /***********************************************************************
30 ** UTF-8
\e$B%5%]!<%H$K$D$$$F
\e(B
31 **
\e$B=>Mh$N
\e(B nkf
\e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9
\e(B
32 ** nkf -e
\e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G
\e(B UTF-8
\e$B$HH=Dj$5$l$l$P!"
\e(B
33 **
\e$B$=$N$^$^
\e(B euc-jp
\e$B$KJQ49$5$l$^$9
\e(B
35 **
\e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#
\e(B
36 ** (
\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O
\e(B)
38 **
\e$B2?$+LdBj$r8+$D$1$?$i!"
\e(B
39 ** E-Mail: furukawa@tcp-ip.or.jp
40 **
\e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#
\e(B
41 ***********************************************************************/
43 #define NKF_VERSION "2.0.4"
44 #define NKF_RELEASE_DATE "2004-11-06"
47 static char *CopyRight =
48 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2004 Kono, Furukawa";
49 static char *Version =
51 static char *Patchlevel =
58 ** USAGE: nkf [flags] [file]
61 ** b Output is buffered (DEFAULT)
62 ** u Output is unbuffered
66 ** j Outout code is JIS 7 bit (DEFAULT SELECT)
67 ** s Output code is MS Kanji (DEFAULT SELECT)
68 ** e Output code is AT&T JIS (DEFAULT SELECT)
69 ** w Output code is AT&T JIS (DEFAULT SELECT)
70 ** l Output code is JIS 7bit and ISO8859-1 Latin-1
72 ** m MIME conversion for ISO-2022-JP
73 ** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
74 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
75 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
76 ** M MIME output conversion
78 ** r {de/en}crypt ROT13/47
82 ** T Text mode output (for MS-DOS)
84 ** x Do not convert X0201 kana into X0208
85 ** Z Convert X0208 alphabet to ASCII
90 ** B try to fix broken JIS, missing Escape
91 ** B[1-9] broken level
93 ** O Output to 'nkf.out' file or last file name
94 ** d Delete \r in line feed
95 ** c Add \r in line feed
96 ** -- other long option
97 ** -- ignore following option (don't use with -O )
101 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__)) && !defined(MSDOS)
103 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
116 #if defined(MSDOS) || defined(__OS2__)
123 #define setbinmode(fp) fsetbin(fp)
124 #else /* Microsoft C, Turbo C */
125 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
127 #else /* UNIX,OS/2 */
128 #define setbinmode(fp)
131 #ifdef _IOFBF /* SysV and MSDOS, Windows */
132 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
134 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
137 /*Borland C++ 4.5 EasyWin*/
138 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
147 /* added by satoru@isoternet.org */
150 #include <sys/stat.h>
151 #ifndef MSDOS /* UNIX, OS/2 */
155 #if defined(_MSC_VER) || defined(__MINGW32__) /* VC++, MinGW */
156 #include <sys/utime.h>
157 #elif defined(__TURBOC__) /* BCC */
159 #elif defined(LSI_C) /* LSI C */
171 /* state of output_mode and input_mode
188 /* Input Assumption */
192 #define LATIN1_INPUT 6
194 #define STRICT_MIME 8
199 #define JAPANESE_EUC 10
203 #define UTF8_INPUT 13
204 #define UTF16LE_INPUT 14
205 #define UTF16BE_INPUT 15
223 #define is_alnum(c) \
224 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
226 #define HOLD_SIZE 1024
227 #define IOBUF_SIZE 16384
229 #define DEFAULT_J 'B'
230 #define DEFAULT_R 'B'
232 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
233 #define SJ6394 0x0161 /* 63 - 94 ku offset */
235 #define RANGE_NUM_MAX 18
240 #if defined( UTF8_OUTPUT_ENABLE ) || defined( UTF8_INPUT_ENABLE )
241 #define sizeof_euc_utf8 94
242 #define sizeof_euc_to_utf8_1byte 94
243 #define sizeof_euc_to_utf8_2bytes 94
244 #define sizeof_utf8_to_euc_C2 64
245 #define sizeof_utf8_to_euc_E5B8 64
246 #define sizeof_utf8_to_euc_2bytes 112
247 #define sizeof_utf8_to_euc_3bytes 112
250 /* MIME preprocessor */
253 #ifdef EASYWIN /*Easy Win */
254 extern POINT _BufferSize;
257 /* function prototype */
259 #ifdef ANSI_C_PROTOTYPE
261 #define STATIC static
273 void (*status_func)PROTO((struct input_code *, int));
274 int (*iconv_func)PROTO((int c2, int c1, int c0));
278 STATIC char *input_codename = "";
280 STATIC int noconvert PROTO((FILE *f));
281 STATIC int kanji_convert PROTO((FILE *f));
282 STATIC int h_conv PROTO((FILE *f,int c2,int c1));
283 STATIC int push_hold_buf PROTO((int c2));
284 STATIC void set_iconv PROTO((int f, int (*iconv_func)()));
285 STATIC int s_iconv PROTO((int c2,int c1,int c0));
286 STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
287 STATIC int e_iconv PROTO((int c2,int c1,int c0));
288 #ifdef UTF8_INPUT_ENABLE
289 STATIC int w2e_conv PROTO((int c2,int c1,int c0,int *p2,int *p1));
290 STATIC int w_iconv PROTO((int c2,int c1,int c0));
291 STATIC int w_iconv16 PROTO((int c2,int c1,int c0));
292 STATIC int w_iconv_common PROTO((int c1,int c0,unsigned short **pp,int psize,int *p2,int *p1));
293 STATIC int ww16_conv PROTO((int c2, int c1, int c0));
295 #ifdef UTF8_OUTPUT_ENABLE
296 STATIC int e2w_conv PROTO((int c2,int c1));
297 STATIC void w_oconv PROTO((int c2,int c1));
298 STATIC void w_oconv16 PROTO((int c2,int c1));
300 STATIC void e_oconv PROTO((int c2,int c1));
301 STATIC void e2s_conv PROTO((int c2, int c1, int *p2, int *p1));
302 STATIC void s_oconv PROTO((int c2,int c1));
303 STATIC void j_oconv PROTO((int c2,int c1));
304 STATIC void fold_conv PROTO((int c2,int c1));
305 STATIC void cr_conv PROTO((int c2,int c1));
306 STATIC void z_conv PROTO((int c2,int c1));
307 STATIC void rot_conv PROTO((int c2,int c1));
308 STATIC void hira_conv PROTO((int c2,int c1));
309 STATIC void base64_conv PROTO((int c2,int c1));
310 STATIC void iso2022jp_check_conv PROTO((int c2,int c1));
311 STATIC void no_connection PROTO((int c2,int c1));
312 STATIC int no_connection2 PROTO((int c2,int c1,int c0));
314 STATIC void code_score PROTO((struct input_code *ptr));
315 STATIC void code_status PROTO((int c));
317 STATIC void std_putc PROTO((int c));
318 STATIC int std_getc PROTO((FILE *f));
319 STATIC int std_ungetc PROTO((int c,FILE *f));
321 STATIC int broken_getc PROTO((FILE *f));
322 STATIC int broken_ungetc PROTO((int c,FILE *f));
324 STATIC int mime_begin PROTO((FILE *f));
325 STATIC int mime_getc PROTO((FILE *f));
326 STATIC int mime_ungetc PROTO((int c,FILE *f));
328 STATIC int mime_begin_strict PROTO((FILE *f));
329 STATIC int mime_getc_buf PROTO((FILE *f));
330 STATIC int mime_ungetc_buf PROTO((int c,FILE *f));
331 STATIC int mime_integrity PROTO((FILE *f,unsigned char *p));
333 STATIC int base64decode PROTO((int c));
334 STATIC void mime_putc PROTO((int c));
335 STATIC void open_mime PROTO((int c));
336 STATIC void close_mime PROTO(());
337 STATIC void usage PROTO(());
338 STATIC void version PROTO(());
339 STATIC void options PROTO((unsigned char *c));
341 STATIC void reinit PROTO(());
346 static unsigned char stdibuf[IOBUF_SIZE];
347 static unsigned char stdobuf[IOBUF_SIZE];
348 static unsigned char hold_buf[HOLD_SIZE*2];
349 static int hold_count;
351 /* MIME preprocessor fifo */
353 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
354 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
355 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
356 static unsigned char mime_buf[MIME_BUF_SIZE];
357 static unsigned int mime_top = 0;
358 static unsigned int mime_last = 0; /* decoded */
359 static unsigned int mime_input = 0; /* undecoded */
362 static int unbuf_f = FALSE;
363 static int estab_f = FALSE;
364 static int nop_f = FALSE;
365 static int binmode_f = TRUE; /* binary mode */
366 static int rot_f = FALSE; /* rot14/43 mode */
367 static int hira_f = FALSE; /* hira/kata henkan */
368 static int input_f = FALSE; /* non fixed input code */
369 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
370 static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
371 static int mimebuf_f = FALSE; /* MIME buffered input */
372 static int broken_f = FALSE; /* convert ESC-less broken JIS */
373 static int iso8859_f = FALSE; /* ISO8859 through */
374 static int mimeout_f = FALSE; /* base64 mode */
375 #if defined(MSDOS) || defined(__OS2__)
376 static int x0201_f = TRUE; /* Assume JISX0201 kana */
378 static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
380 static int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
381 #ifdef UTF8_OUTPUT_ENABLE
382 static int unicode_bom_f= 0; /* Output Unicode BOM */
383 static int w_oconv16_LE = 0; /* utf-16 little endian */
384 static int ms_ucs_map_f = FALSE; /* Microsoft UCS Mapping Compatible */
388 #ifdef NUMCHAR_OPTION
390 #define CLASS_MASK 0x0f000000
391 #define CLASS_UTF16 0x01000000
395 static int cap_f = FALSE;
396 static int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */
397 static int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc;
398 STATIC int cap_getc PROTO((FILE *f));
399 STATIC int cap_ungetc PROTO((int c,FILE *f));
401 static int url_f = FALSE;
402 static int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
403 static int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc;
404 STATIC int url_getc PROTO((FILE *f));
405 STATIC int url_ungetc PROTO((int c,FILE *f));
407 static int numchar_f = FALSE;
408 static int (*i_ngetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
409 static int (*i_nungetc)PROTO((int c ,FILE *f)) = std_ungetc;
410 STATIC int numchar_getc PROTO((FILE *f));
411 STATIC int numchar_ungetc PROTO((int c,FILE *f));
415 static int noout_f = FALSE;
416 STATIC void no_putc PROTO((int c));
417 static int debug_f = FALSE;
418 STATIC void debug PROTO((char *str));
421 static int guess_f = FALSE;
422 STATIC void print_guessed_code PROTO((char *filename));
423 STATIC void set_input_codename PROTO((char *codename));
424 static int is_inputcode_mixed = FALSE;
425 static int is_inputcode_set = FALSE;
428 static int exec_f = 0;
431 #ifdef SHIFTJIS_CP932
432 STATIC int cp932_f = TRUE;
433 #define CP932_TABLE_BEGIN (0xfa)
434 #define CP932_TABLE_END (0xfc)
436 STATIC int cp932inv_f = FALSE;
437 #define CP932INV_TABLE_BEGIN (0xed)
438 #define CP932INV_TABLE_END (0xee)
440 #endif /* SHIFTJIS_CP932 */
442 STATIC unsigned char prefix_table[256];
444 STATIC void e_status PROTO((struct input_code *, int));
445 STATIC void s_status PROTO((struct input_code *, int));
447 #ifdef UTF8_INPUT_ENABLE
448 STATIC void w_status PROTO((struct input_code *, int));
449 STATIC void w16_status PROTO((struct input_code *, int));
450 static int utf16_mode = UTF16LE_INPUT;
453 struct input_code input_code_list[] = {
454 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
455 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
456 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
457 {"UTF-16", 0, 0, 0, {0, 0, 0}, w16_status, w_iconv16, 0},
461 static int mimeout_mode = 0;
462 static int base64_count = 0;
464 /* X0208 -> ASCII converter */
467 static int f_line = 0; /* chars in line */
468 static int f_prev = 0;
469 static int fold_preserve_f = FALSE; /* preserve new lines */
470 static int fold_f = FALSE;
471 static int fold_len = 0;
474 static unsigned char kanji_intro = DEFAULT_J;
475 static unsigned char ascii_intro = DEFAULT_R;
479 #define FOLD_MARGIN 10
480 #define DEFAULT_FOLD 60
482 static int fold_margin = FOLD_MARGIN;
486 #ifdef DEFAULT_CODE_JIS
487 # define DEFAULT_CONV j_oconv
489 #ifdef DEFAULT_CODE_SJIS
490 # define DEFAULT_CONV s_oconv
492 #ifdef DEFAULT_CODE_EUC
493 # define DEFAULT_CONV e_oconv
495 #ifdef DEFAULT_CODE_UTF8
496 # define DEFAULT_CONV w_oconv
499 /* process default */
500 static void (*output_conv)PROTO((int c2,int c1)) = DEFAULT_CONV;
502 static void (*oconv)PROTO((int c2,int c1)) = no_connection;
503 /* s_iconv or oconv */
504 static int (*iconv)PROTO((int c2,int c1,int c0)) = no_connection2;
506 static void (*o_zconv)PROTO((int c2,int c1)) = no_connection;
507 static void (*o_fconv)PROTO((int c2,int c1)) = no_connection;
508 static void (*o_crconv)PROTO((int c2,int c1)) = no_connection;
509 static void (*o_rot_conv)PROTO((int c2,int c1)) = no_connection;
510 static void (*o_hira_conv)PROTO((int c2,int c1)) = no_connection;
511 static void (*o_base64conv)PROTO((int c2,int c1)) = no_connection;
512 static void (*o_iso2022jp_check_conv)PROTO((int c2,int c1)) = no_connection;
514 /* static redirections */
516 static void (*o_putc)PROTO((int c)) = std_putc;
518 static int (*i_getc)PROTO((FILE *f)) = std_getc; /* general input */
519 static int (*i_ungetc)PROTO((int c,FILE *f)) =std_ungetc;
521 static int (*i_bgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
522 static int (*i_bungetc)PROTO((int c ,FILE *f)) = std_ungetc;
524 static void (*o_mputc)PROTO((int c)) = std_putc ; /* output of mputc */
526 static int (*i_mgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
527 static int (*i_mungetc)PROTO((int c ,FILE *f)) = std_ungetc;
529 /* for strict mime */
530 static int (*i_mgetc_buf)PROTO((FILE *)) = std_getc; /* input of mgetc_buf */
531 static int (*i_mungetc_buf)PROTO((int c,FILE *f)) = std_ungetc;
534 static int output_mode = ASCII, /* output kanji mode */
535 input_mode = ASCII, /* input kanji mode */
536 shift_mode = FALSE; /* TRUE shift out, or X0201 */
537 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
539 /* X0201 / X0208 conversion tables */
541 /* X0201 kana conversion table */
544 unsigned char cv[]= {
545 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
546 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
547 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
548 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
549 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
550 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
551 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
552 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
553 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
554 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
555 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
556 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
557 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
558 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
559 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
560 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
564 /* X0201 kana conversion table for daguten */
567 unsigned char dv[]= {
568 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
569 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
570 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
571 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
572 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
573 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
574 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
575 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
576 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
577 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
578 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
579 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
580 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
581 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
582 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
583 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
586 /* X0201 kana conversion table for han-daguten */
589 unsigned char ev[]= {
590 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
591 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
592 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
593 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
594 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
595 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
596 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
597 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
598 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
601 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
602 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
603 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
604 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
605 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609 /* X0208 kigou conversion table */
610 /* 0x8140 - 0x819e */
612 unsigned char fv[] = {
614 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
615 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
616 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
617 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
618 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
619 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
620 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
621 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
622 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
623 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
624 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
625 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
631 static int file_out = FALSE;
633 static int overwrite = FALSE;
636 static int crmode_f = 0; /* CR, NL, CRLF */
637 #ifdef EASYWIN /*Easy Win */
638 static int end_check;
653 #ifdef EASYWIN /*Easy Win */
654 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
657 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
658 cp = (unsigned char *)*argv;
663 if (pipe(fds) < 0 || (pid = fork()) < 0){
674 execvp(argv[1], &argv[1]);
688 if(x0201_f == WISH_TRUE)
689 x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
691 if (binmode_f == TRUE)
693 if (freopen("","wb",stdout) == NULL)
700 setbuf(stdout, (char *) NULL);
702 setvbuffer(stdout, stdobuf, IOBUF_SIZE);
705 if (binmode_f == TRUE)
707 if (freopen("","rb",stdin) == NULL) return (-1);
711 setvbuffer(stdin, stdibuf, IOBUF_SIZE);
715 kanji_convert(stdin);
716 if (guess_f) print_guessed_code(NULL);
721 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
730 /* reopen file for stdout */
731 if (file_out == TRUE) {
734 outfname = malloc(strlen(origfname)
735 + strlen(".nkftmpXXXXXX")
741 strcpy(outfname, origfname);
745 for (i = strlen(outfname); i; --i){
746 if (outfname[i - 1] == '/'
747 || outfname[i - 1] == '\\'){
753 strcat(outfname, "ntXXXXXX");
755 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC,
758 strcat(outfname, ".nkftmpXXXXXX");
759 fd = mkstemp(outfname);
762 || (fd_backup = dup(fileno(stdout))) < 0
763 || dup2(fd, fileno(stdout)) < 0
774 outfname = "nkf.out";
777 if(freopen(outfname, "w", stdout) == NULL) {
781 if (binmode_f == TRUE) {
783 if (freopen("","wb",stdout) == NULL)
790 if (binmode_f == TRUE)
792 if (freopen("","rb",fin) == NULL)
797 setvbuffer(fin, stdibuf, IOBUF_SIZE);
801 char *filename = NULL;
803 if (nfiles > 1) filename = origfname;
804 if (guess_f) print_guessed_code(filename);
810 #if defined(MSDOS) && !defined(__MINGW32__)
818 if (dup2(fd_backup, fileno(stdout)) < 0){
821 if (stat(origfname, &sb)) {
822 fprintf(stderr, "Can't stat %s\n", origfname);
824 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
825 if (chmod(outfname, sb.st_mode)) {
826 fprintf(stderr, "Can't set permission %s\n", outfname);
829 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
830 #if defined(MSDOS) && !defined(__MINGW32__)
831 tb[0] = tb[1] = sb.st_mtime;
832 if (utime(outfname, tb)) {
833 fprintf(stderr, "Can't set timestamp %s\n", outfname);
836 tb.actime = sb.st_atime;
837 tb.modtime = sb.st_mtime;
838 if (utime(outfname, &tb)) {
839 fprintf(stderr, "Can't set timestamp %s\n", outfname);
843 if (unlink(origfname)){
847 if (rename(outfname, origfname)) {
849 fprintf(stderr, "Can't rename %s to %s\n",
850 outfname, origfname);
858 #ifdef EASYWIN /*Easy Win */
859 if (file_out == FALSE)
860 scanf("%d",&end_check);
863 #else /* for Other OS */
864 if (file_out == TRUE)
894 {"katakana-hiragana","h3"},
896 #ifdef UTF8_OUTPUT_ENABLE
901 #ifdef UTF8_INPUT_ENABLE
903 {"utf16-input", "W16"},
912 #ifdef NUMCHAR_OPTION
913 {"numchar-input", ""},
919 #ifdef SHIFTJIS_CP932
930 static int option_mode = 0;
945 case '-': /* literal options */
946 if (!*cp) { /* ignore the rest of arguments */
950 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
952 p = (unsigned char *)long_option[i].name;
953 for (j=0;*p && (*p != '=') && *p == cp[j];p++, j++);
961 cp = (unsigned char *)long_option[i].alias;
964 if (strcmp(long_option[i].name, "overwrite") == 0){
971 if (strcmp(long_option[i].name, "cap-input") == 0){
975 if (strcmp(long_option[i].name, "url-input") == 0){
980 #ifdef NUMCHAR_OPTION
981 if (strcmp(long_option[i].name, "numchar-input") == 0){
987 if (strcmp(long_option[i].name, "no-output") == 0){
991 if (strcmp(long_option[i].name, "debug") == 0){
996 #ifdef SHIFTJIS_CP932
997 if (strcmp(long_option[i].name, "no-cp932") == 0){
1001 if (strcmp(long_option[i].name, "cp932inv") == 0){
1007 if (strcmp(long_option[i].name, "exec-in") == 0){
1011 if (strcmp(long_option[i].name, "exec-out") == 0){
1016 #ifdef UTF8_OUTPUT_ENABLE
1017 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1018 ms_ucs_map_f = TRUE;
1022 if (strcmp(long_option[i].name, "prefix=") == 0){
1023 if (*p == '=' && ' ' < p[1] && p[1] < 128){
1024 for (i = 2; ' ' < p[i] && p[i] < 128; i++){
1025 prefix_table[p[i]] = p[1];
1032 case 'b': /* buffered mode */
1035 case 'u': /* non bufferd mode */
1038 case 't': /* transparent mode */
1041 case 'j': /* JIS output */
1043 output_conv = j_oconv;
1045 case 'e': /* AT&T EUC output */
1046 output_conv = e_oconv;
1048 case 's': /* SJIS output */
1049 output_conv = s_oconv;
1051 case 'l': /* ISO8859 Latin-1 support, no conversion */
1052 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
1053 input_f = LATIN1_INPUT;
1055 case 'i': /* Kanji IN ESC-$-@/B */
1056 if (*cp=='@'||*cp=='B')
1057 kanji_intro = *cp++;
1059 case 'o': /* ASCII IN ESC-(-J/B */
1060 if (*cp=='J'||*cp=='B'||*cp=='H')
1061 ascii_intro = *cp++;
1068 if ('9'>= *cp && *cp>='0')
1069 hira_f |= (*cp++ -'0');
1076 #if defined(MSDOS) || defined(__OS2__)
1091 #ifdef UTF8_OUTPUT_ENABLE
1092 case 'w': /* UTF-8 output */
1093 if ('1'== cp[0] && '6'==cp[1]) {
1094 output_conv = w_oconv16; cp+=2;
1096 unicode_bom_f=2; cp++;
1099 unicode_bom_f=1; cp++;
1101 } else if (cp[0] == 'B') {
1102 unicode_bom_f=2; cp++;
1104 unicode_bom_f=1; cp++;
1107 } else if (cp[0] == '8') {
1108 output_conv = w_oconv; cp++;
1111 unicode_bom_f=1; cp++;
1114 output_conv = w_oconv;
1117 #ifdef UTF8_INPUT_ENABLE
1118 case 'W': /* UTF-8 input */
1119 if ('1'== cp[0] && '6'==cp[1]) {
1120 input_f = UTF16LE_INPUT;
1123 } else if (cp[0] == 'B') {
1125 input_f = UTF16BE_INPUT;
1127 } else if (cp[0] == '8') {
1129 input_f = UTF8_INPUT;
1131 input_f = UTF8_INPUT;
1134 /* Input code assumption */
1135 case 'J': /* JIS input */
1136 case 'E': /* AT&T EUC input */
1137 input_f = JIS_INPUT;
1139 case 'S': /* MS Kanji input */
1140 input_f = SJIS_INPUT;
1141 if (x0201_f==NO_X0201) x0201_f=TRUE;
1143 case 'Z': /* Convert X0208 alphabet to asii */
1144 /* bit:0 Convert X0208
1145 bit:1 Convert Kankaku to one space
1146 bit:2 Convert Kankaku to two spaces
1147 bit:3 Convert HTML Entity
1149 if ('9'>= *cp && *cp>='0')
1150 alpha_f |= 1<<(*cp++ -'0');
1154 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
1155 x0201_f = FALSE; /* No X0201->X0208 conversion */
1157 ESC-(-I in JIS, EUC, MS Kanji
1158 SI/SO in JIS, EUC, MS Kanji
1159 SSO in EUC, JIS, not in MS Kanji
1160 MS Kanji (0xa0-0xdf)
1162 ESC-(-I in JIS (0x20-0x5f)
1163 SSO in EUC (0xa0-0xdf)
1164 0xa0-0xd in MS Kanji (0xa0-0xdf)
1167 case 'X': /* Assume X0201 kana */
1168 /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1171 case 'F': /* prserve new lines */
1172 fold_preserve_f = TRUE;
1173 case 'f': /* folding -f60 or -f */
1176 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1178 fold_len += *cp++ - '0';
1180 if (!(0<fold_len && fold_len<BUFSIZ))
1181 fold_len = DEFAULT_FOLD;
1185 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1187 fold_margin += *cp++ - '0';
1191 case 'm': /* MIME support */
1192 if (*cp=='B'||*cp=='Q') {
1193 mime_decode_mode = *cp++;
1194 mimebuf_f = FIXED_MIME;
1195 } else if (*cp=='N') {
1196 mime_f = TRUE; cp++;
1197 } else if (*cp=='S') {
1198 mime_f = STRICT_MIME; cp++;
1199 } else if (*cp=='0') {
1200 mime_f = FALSE; cp++;
1203 case 'M': /* MIME output */
1206 mimeout_f = FIXED_MIME; cp++;
1207 } else if (*cp=='Q') {
1209 mimeout_f = FIXED_MIME; cp++;
1214 case 'B': /* Broken JIS support */
1216 bit:1 allow any x on ESC-(-x or ESC-$-x
1217 bit:2 reset to ascii on NL
1219 if ('9'>= *cp && *cp>='0')
1220 broken_f |= 1<<(*cp++ -'0');
1225 case 'O':/* for Output file */
1229 case 'c':/* add cr code */
1232 case 'd':/* delete cr code */
1235 case 'I': /* ISO-2022-JP output */
1238 case 'L': /* line mode */
1239 if (*cp=='u') { /* unix */
1240 crmode_f = NL; cp++;
1241 } else if (*cp=='m') { /* mac */
1242 crmode_f = CR; cp++;
1243 } else if (*cp=='w') { /* windows */
1244 crmode_f = CRLF; cp++;
1245 } else if (*cp=='0') { /* no conversion */
1255 /* module muliple options in a string are allowed for Perl moudle */
1256 while(*cp && *cp!='-') cp++;
1260 /* bogus option but ignored */
1266 #ifdef ANSI_C_PROTOTYPE
1267 struct input_code * find_inputcode_byfunc(int (*iconv_func)(int c2,int c1,int c0))
1269 struct input_code * find_inputcode_byfunc(iconv_func)
1270 int (*iconv_func)();
1274 struct input_code *p = input_code_list;
1276 if (iconv_func == p->iconv_func){
1285 #ifdef ANSI_C_PROTOTYPE
1286 void set_iconv(int f, int (*iconv_func)(int c2,int c1,int c0))
1288 void set_iconv(f, iconv_func)
1290 int (*iconv_func)();
1294 static int (*iconv_for_check)() = 0;
1296 #ifdef INPUT_CODE_FIX
1304 #ifdef INPUT_CODE_FIX
1305 && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1311 if (estab_f && iconv_for_check != iconv){
1312 struct input_code *p = find_inputcode_byfunc(iconv);
1314 set_input_codename(p->name);
1315 debug(input_codename);
1317 iconv_for_check = iconv;
1322 #define SCORE_L2 (1) /*
\e$BBh
\e(B2
\e$B?e=`4A;z
\e(B */
1323 #define SCORE_KANA (SCORE_L2 << 1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
1324 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
1325 #ifdef SHIFTJIS_CP932
1326 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932
\e$B$K$h$kFI$_49$(
\e(B */
1327 #define SCORE_NO_EXIST (SCORE_CP932 << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1329 #define SCORE_NO_EXIST (SCORE_DEPEND << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1331 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME
\e$B$K$h$k;XDj
\e(B */
1332 #define SCORE_ERROR (SCORE_iMIME << 1) /*
\e$B%(%i!<
\e(B */
1334 #define SCORE_INIT (SCORE_iMIME)
1336 int score_table_A0[] = {
1339 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1340 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1343 int score_table_F0[] = {
1344 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1345 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1346 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1347 SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1350 void set_code_score(ptr, score)
1351 struct input_code *ptr;
1355 ptr->score |= score;
1359 void clr_code_score(ptr, score)
1360 struct input_code *ptr;
1364 ptr->score &= ~score;
1368 void code_score(ptr)
1369 struct input_code *ptr;
1371 int c2 = ptr->buf[0];
1372 int c1 = ptr->buf[1];
1374 set_code_score(ptr, SCORE_ERROR);
1375 }else if (c2 == SSO){
1376 set_code_score(ptr, SCORE_KANA);
1377 #ifdef UTF8_OUTPUT_ENABLE
1378 }else if (!e2w_conv(c2, c1)){
1379 set_code_score(ptr, SCORE_NO_EXIST);
1381 }else if ((c2 & 0x70) == 0x20){
1382 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1383 }else if ((c2 & 0x70) == 0x70){
1384 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1385 }else if ((c2 & 0x70) >= 0x50){
1386 set_code_score(ptr, SCORE_L2);
1390 void status_disable(ptr)
1391 struct input_code *ptr;
1396 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1399 void status_push_ch(ptr, c)
1400 struct input_code *ptr;
1403 ptr->buf[ptr->index++] = c;
1406 void status_clear(ptr)
1407 struct input_code *ptr;
1413 void status_reset(ptr)
1414 struct input_code *ptr;
1417 ptr->score = SCORE_INIT;
1420 void status_reinit(ptr)
1421 struct input_code *ptr;
1424 ptr->_file_stat = 0;
1427 void status_check(ptr, c)
1428 struct input_code *ptr;
1431 if (c <= DEL && estab_f){
1436 void s_status(ptr, c)
1437 struct input_code *ptr;
1442 status_check(ptr, c);
1447 #ifdef NUMCHAR_OPTION
1448 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1451 }else if (0xa1 <= c && c <= 0xdf){
1452 status_push_ch(ptr, SSO);
1453 status_push_ch(ptr, c);
1456 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
1458 status_push_ch(ptr, c);
1459 #ifdef SHIFTJIS_CP932
1461 && CP932_TABLE_BEGIN <= c && c <= CP932_TABLE_END){
1463 status_push_ch(ptr, c);
1464 #endif /* SHIFTJIS_CP932 */
1466 status_disable(ptr);
1470 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1471 status_push_ch(ptr, c);
1472 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
1476 status_disable(ptr);
1479 #ifdef SHIFTJIS_CP932
1481 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1482 status_push_ch(ptr, c);
1483 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
1484 set_code_score(ptr, SCORE_CP932);
1489 status_disable(ptr);
1491 #endif /* SHIFTJIS_CP932 */
1495 void e_status(ptr, c)
1496 struct input_code *ptr;
1501 status_check(ptr, c);
1506 #ifdef NUMCHAR_OPTION
1507 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1510 }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
1512 status_push_ch(ptr, c);
1514 status_disable(ptr);
1518 if (0xa1 <= c && c <= 0xfe){
1519 status_push_ch(ptr, c);
1523 status_disable(ptr);
1529 #ifdef UTF8_INPUT_ENABLE
1530 void w16_status(ptr, c)
1531 struct input_code *ptr;
1538 if (ptr->_file_stat == 0){
1539 if (c == 0xfe || c == 0xff){
1541 status_push_ch(ptr, c);
1542 ptr->_file_stat = 1;
1544 status_disable(ptr);
1545 ptr->_file_stat = -1;
1547 }else if (ptr->_file_stat > 0){
1549 status_push_ch(ptr, c);
1550 }else if (ptr->_file_stat < 0){
1551 status_disable(ptr);
1557 status_disable(ptr);
1558 ptr->_file_stat = -1;
1560 status_push_ch(ptr, c);
1567 if (ptr->stat != c && (c == 0xfe || c == 0xff)){
1568 status_push_ch(ptr, c);
1571 status_disable(ptr);
1572 ptr->_file_stat = -1;
1578 void w_status(ptr, c)
1579 struct input_code *ptr;
1584 status_check(ptr, c);
1589 #ifdef NUMCHAR_OPTION
1590 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1593 }else if (0xc0 <= c && c <= 0xdf){
1595 status_push_ch(ptr, c);
1596 }else if (0xe0 <= c && c <= 0xef){
1598 status_push_ch(ptr, c);
1600 status_disable(ptr);
1605 if (0x80 <= c && c <= 0xbf){
1606 status_push_ch(ptr, c);
1607 if (ptr->index > ptr->stat){
1608 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
1609 && ptr->buf[2] == 0xbf);
1610 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
1611 &ptr->buf[0], &ptr->buf[1]);
1618 status_disable(ptr);
1629 int action_flag = 1;
1630 struct input_code *result = 0;
1631 struct input_code *p = input_code_list;
1633 (p->status_func)(p, c);
1636 }else if(p->stat == 0){
1647 if (result && !estab_f){
1648 set_iconv(TRUE, result->iconv_func);
1649 }else if (c <= DEL){
1650 struct input_code *ptr = input_code_list;
1660 #define STD_GC_BUFSIZE (256)
1661 int std_gc_buf[STD_GC_BUFSIZE];
1671 return std_gc_buf[--std_gc_ndx];
1683 if (std_gc_ndx == STD_GC_BUFSIZE){
1686 std_gc_buf[std_gc_ndx++] = c;
1706 while ((c = (*i_getc)(f)) != EOF)
1715 oconv = output_conv;
1718 /* replace continucation module, from output side */
1720 /* output redicrection */
1722 if (noout_f || guess_f){
1729 if (mimeout_f == TRUE) {
1730 o_base64conv = oconv; oconv = base64_conv;
1732 /* base64_count = 0; */
1736 o_crconv = oconv; oconv = cr_conv;
1739 o_rot_conv = oconv; oconv = rot_conv;
1742 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
1745 o_hira_conv = oconv; oconv = hira_conv;
1748 o_fconv = oconv; oconv = fold_conv;
1751 if (alpha_f || x0201_f) {
1752 o_zconv = oconv; oconv = z_conv;
1756 i_ungetc = std_ungetc;
1757 /* input redicrection */
1760 i_cgetc = i_getc; i_getc = cap_getc;
1761 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
1764 i_ugetc = i_getc; i_getc = url_getc;
1765 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
1768 #ifdef NUMCHAR_OPTION
1770 i_ngetc = i_getc; i_getc = numchar_getc;
1771 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
1774 if (mime_f && mimebuf_f==FIXED_MIME) {
1775 i_mgetc = i_getc; i_getc = mime_getc;
1776 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
1779 i_bgetc = i_getc; i_getc = broken_getc;
1780 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
1782 if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
1783 set_iconv(-TRUE, e_iconv);
1784 } else if (input_f == SJIS_INPUT) {
1785 set_iconv(-TRUE, s_iconv);
1786 #ifdef UTF8_INPUT_ENABLE
1787 } else if (input_f == UTF8_INPUT) {
1788 set_iconv(-TRUE, w_iconv);
1789 } else if (input_f == UTF16LE_INPUT) {
1790 set_iconv(-TRUE, w_iconv16);
1793 set_iconv(FALSE, e_iconv);
1797 struct input_code *p = input_code_list;
1805 Conversion main loop. Code detection only.
1815 module_connection();
1820 output_mode = ASCII;
1823 #define NEXT continue /* no output, get next */
1824 #define SEND ; /* output c1 and c2, get next */
1825 #define LAST break /* end of loop, go closing */
1827 while ((c1 = (*i_getc)(f)) != EOF) {
1832 /* in case of 8th bit is on */
1834 /* in case of not established yet */
1835 /* It is still ambiguious */
1836 if (h_conv(f, c2, c1)==EOF)
1842 /* in case of already established */
1844 /* ignore bogus code */
1850 /* second byte, 7 bit code */
1851 /* it might be kanji shitfted */
1852 if ((c1 == DEL) || (c1 <= SPACE)) {
1853 /* ignore bogus first code */
1861 #ifdef UTF8_INPUT_ENABLE
1870 #ifdef NUMCHAR_OPTION
1871 } else if ((c1 & CLASS_MASK) == CLASS_UTF16){
1874 } else if (c1 > DEL) {
1876 if (!estab_f && !iso8859_f) {
1877 /* not established yet */
1880 } else { /* estab_f==TRUE */
1885 } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
1886 /* SJIS X0201 Case... */
1887 if(iso2022jp_f && x0201_f==NO_X0201) {
1888 (*oconv)(GETA1, GETA2);
1895 } else if (c1==SSO && iconv != s_iconv) {
1896 /* EUC X0201 Case */
1897 c1 = (*i_getc)(f); /* skip SSO */
1899 if (SSP<=c1 && c1<0xe0) {
1900 if(iso2022jp_f && x0201_f==NO_X0201) {
1901 (*oconv)(GETA1, GETA2);
1908 } else { /* bogus code, skip SSO and one byte */
1912 /* already established */
1917 } else if ((c1 > SPACE) && (c1 != DEL)) {
1918 /* in case of Roman characters */
1920 /* output 1 shifted byte */
1924 } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
1925 /* output 1 shifted byte */
1926 if(iso2022jp_f && x0201_f==NO_X0201) {
1927 (*oconv)(GETA1, GETA2);
1934 /* look like bogus code */
1937 } else if (input_mode == X0208) {
1938 /* in case of Kanji shifted */
1941 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
1942 /* Check MIME code */
1943 if ((c1 = (*i_getc)(f)) == EOF) {
1946 } else if (c1 == '?') {
1947 /* =? is mime conversion start sequence */
1948 if(mime_f == STRICT_MIME) {
1949 /* check in real detail */
1950 if (mime_begin_strict(f) == EOF)
1954 } else if (mime_begin(f) == EOF)
1964 /* normal ASCII code */
1967 } else if (c1 == SI) {
1970 } else if (c1 == SO) {
1973 } else if (c1 == ESC ) {
1974 if ((c1 = (*i_getc)(f)) == EOF) {
1975 /* (*oconv)(0, ESC); don't send bogus code */
1977 } else if (c1 == '$') {
1978 if ((c1 = (*i_getc)(f)) == EOF) {
1980 (*oconv)(0, ESC); don't send bogus code
1981 (*oconv)(0, '$'); */
1983 } else if (c1 == '@'|| c1 == 'B') {
1984 /* This is kanji introduction */
1987 set_input_codename("ISO-2022-JP");
1988 debug(input_codename);
1990 } else if (c1 == '(') {
1991 if ((c1 = (*i_getc)(f)) == EOF) {
1992 /* don't send bogus code
1998 } else if (c1 == '@'|| c1 == 'B') {
1999 /* This is kanji introduction */
2004 /* could be some special code */
2011 } else if (broken_f&0x2) {
2012 /* accept any ESC-(-x as broken code ... */
2022 } else if (c1 == '(') {
2023 if ((c1 = (*i_getc)(f)) == EOF) {
2024 /* don't send bogus code
2026 (*oconv)(0, '('); */
2030 /* This is X0201 kana introduction */
2031 input_mode = X0201; shift_mode = X0201;
2033 } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2034 /* This is X0208 kanji introduction */
2035 input_mode = ASCII; shift_mode = FALSE;
2037 } else if (broken_f&0x2) {
2038 input_mode = ASCII; shift_mode = FALSE;
2043 /* maintain various input_mode here */
2047 } else if ( c1 == 'N' || c1 == 'n' ){
2049 c3 = (*i_getc)(f); /* skip SS2 */
2050 if ( (SPACE<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2065 } else if ((c1 == NL || c1 == CR) && broken_f&4) {
2066 input_mode = ASCII; set_iconv(FALSE, 0);
2072 if (input_mode == X0208)
2073 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2074 else if (input_mode)
2075 (*oconv)(input_mode, c1); /* other special case */
2076 else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */
2077 int c0 = (*i_getc)(f);
2080 (*iconv)(c2, c1, c0);
2086 /* goto next_word */
2090 (*iconv)(EOF, 0, 0);
2103 /** it must NOT be in the kanji shifte sequence */
2104 /** it must NOT be written in JIS7 */
2105 /** and it must be after 2 byte 8bit code */
2112 while ((c1 = (*i_getc)(f)) != EOF) {
2118 if (push_hold_buf(c1) == EOF || estab_f){
2124 struct input_code *p = input_code_list;
2125 struct input_code *result = p;
2130 if (p->score < result->score){
2135 set_iconv(FALSE, result->iconv_func);
2140 ** 1) EOF is detected, or
2141 ** 2) Code is established, or
2142 ** 3) Buffer is FULL (but last word is pushed)
2144 ** in 1) and 3) cases, we continue to use
2145 ** Kanji codes by oconv and leave estab_f unchanged.
2150 while (wc < hold_count){
2151 c2 = hold_buf[wc++];
2153 #ifdef NUMCHAR_OPTION
2154 || (c2 & CLASS_MASK) == CLASS_UTF16
2159 }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
2160 (*iconv)(X0201, c2, 0);
2163 if (wc < hold_count){
2164 c1 = hold_buf[wc++];
2173 if ((*iconv)(c2, c1, 0) < 0){
2175 if (wc < hold_count){
2176 c0 = hold_buf[wc++];
2185 (*iconv)(c2, c1, c0);
2198 if (hold_count >= HOLD_SIZE*2)
2200 hold_buf[hold_count++] = c2;
2201 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
2204 int s2e_conv(c2, c1, p2, p1)
2208 #ifdef SHIFTJIS_CP932
2209 if (cp932_f && CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){
2210 extern unsigned short shiftjis_cp932[3][189];
2211 c1 = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
2212 if (c1 == 0) return 1;
2216 #endif /* SHIFTJIS_CP932 */
2217 c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
2219 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
2236 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2239 int ret = s2e_conv(c2, c1, &c2, &c1);
2240 if (ret) return ret;
2253 } else if (c2 == SSO){
2256 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2266 #ifdef UTF8_INPUT_ENABLE
2268 w2e_conv(c2, c1, c0, p2, p1)
2272 extern unsigned short * utf8_to_euc_2bytes[];
2273 extern unsigned short ** utf8_to_euc_3bytes[];
2276 if (0xc0 <= c2 && c2 <= 0xef) {
2277 unsigned short **pp;
2280 if (c0 == 0) return -1;
2281 pp = utf8_to_euc_3bytes[c2 - 0x80];
2282 ret = w_iconv_common(c1, c0, pp, sizeof_utf8_to_euc_C2, p2, p1);
2284 ret = w_iconv_common(c2, c1, utf8_to_euc_2bytes, sizeof_utf8_to_euc_2bytes, p2, p1);
2286 #ifdef NUMCHAR_OPTION
2289 if (p1) *p1 = CLASS_UTF16 | ww16_conv(c2, c1, c0);
2294 } else if (c2 == X0201) {
2307 int ret = w2e_conv(c2, c1, c0, &c2, &c1);
2315 w16w_conv(val, p2, p1, p0)
2323 }else if (val < 0x800){
2324 *p2 = 0xc0 | (val >> 6);
2325 *p1 = 0x80 | (val & 0x3f);
2328 *p2 = 0xe0 | (val >> 12);
2329 *p1 = 0x80 | ((val >> 6) & 0x3f);
2330 *p0 = 0x80 | (val & 0x3f);
2335 ww16_conv(c2, c1, c0)
2340 val = (c2 & 0x0f) << 12;
2341 val |= (c1 & 0x3f) << 6;
2343 }else if (c2 >= 0xc0){
2344 val = (c2 & 0x1f) << 6;
2345 val |= (c1 & 0x3f) << 6;
2353 w16e_conv(val, p2, p1)
2357 extern unsigned short * utf8_to_euc_2bytes[];
2358 extern unsigned short ** utf8_to_euc_3bytes[];
2360 unsigned short **pp;
2364 w16w_conv(val, &c2, &c1, &c0);
2367 pp = utf8_to_euc_3bytes[c2 - 0x80];
2368 psize = sizeof_utf8_to_euc_C2;
2369 ret = w_iconv_common(c1, c0, pp, psize, p2, p1);
2371 pp = utf8_to_euc_2bytes;
2372 psize = sizeof_utf8_to_euc_2bytes;
2373 ret = w_iconv_common(c2, c1, pp, psize, p2, p1);
2375 #ifdef NUMCHAR_OPTION
2378 *p1 = CLASS_UTF16 | val;
2387 w_iconv16(c2, c1, c0)
2392 if (c2==0376 && c1==0377){
2393 utf16_mode = UTF16LE_INPUT;
2395 } else if (c2==0377 && c1==0376){
2396 utf16_mode = UTF16BE_INPUT;
2399 if (c2 != EOF && utf16_mode == UTF16BE_INPUT) {
2401 tmp=c1; c1=c2; c2=tmp;
2403 if ((c2==0 && c1 < 0x80) || c2==EOF) {
2407 ret = w16e_conv(((c2<<8)&0xff00) + c1, &c2, &c1);
2408 if (ret) return ret;
2414 w_iconv_common(c1, c0, pp, psize, p2, p1)
2416 unsigned short **pp;
2424 if (pp == 0) return 1;
2427 if (c1 < 0 || psize <= c1) return 1;
2429 if (p == 0) return 1;
2432 if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1;
2434 if (val == 0) return 1;
2437 if (c2 == SO) c2 = X0201;
2446 #ifdef UTF8_OUTPUT_ENABLE
2451 extern unsigned short euc_to_utf8_1byte[];
2452 extern unsigned short * euc_to_utf8_2bytes[];
2453 extern unsigned short * euc_to_utf8_2bytes_ms[];
2457 p = euc_to_utf8_1byte;
2460 c2 = (c2&0x7f) - 0x21;
2461 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2462 p = ms_ucs_map_f ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
2467 c1 = (c1 & 0x7f) - 0x21;
2468 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
2479 #ifdef NUMCHAR_OPTION
2480 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2481 w16w_conv(c1, &c2, &c1, &c0);
2485 if (c0) (*o_putc)(c0);
2494 if (unicode_bom_f==2) {
2502 output_mode = ASCII;
2504 } else if (c2 == ISO8859_1) {
2505 output_mode = ISO8859_1;
2506 (*o_putc)(c1 | 0x080);
2509 w16w_conv((unsigned short)e2w_conv(c2, c1), &c2, &c1, &c0);
2513 if (c0) (*o_putc)(c0);
2528 if (unicode_bom_f==2) {
2530 (*o_putc)((unsigned char)'\377');
2534 (*o_putc)((unsigned char)'\377');
2539 if (c2 == ISO8859_1) {
2542 #ifdef NUMCHAR_OPTION
2543 } else if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16) {
2544 c2 = (c1 >> 8) & 0xff;
2548 unsigned short val = (unsigned short)e2w_conv(c2, c1);
2549 c2 = (val >> 8) & 0xff;
2568 #ifdef NUMCHAR_OPTION
2569 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2570 w16e_conv(c1, &c2, &c1);
2576 } else if (c2 == 0) {
2577 output_mode = ASCII;
2579 } else if (c2 == X0201) {
2580 output_mode = JAPANESE_EUC;
2581 (*o_putc)(SSO); (*o_putc)(c1|0x80);
2582 } else if (c2 == ISO8859_1) {
2583 output_mode = ISO8859_1;
2584 (*o_putc)(c1 | 0x080);
2586 if ((c1<0x21 || 0x7e<c1) ||
2587 (c2<0x21 || 0x7e<c2)) {
2588 set_iconv(FALSE, 0);
2589 return; /* too late to rescue this char */
2591 output_mode = JAPANESE_EUC;
2592 (*o_putc)(c2 | 0x080);
2593 (*o_putc)(c1 | 0x080);
2598 e2s_conv(c2, c1, p2, p1)
2599 int c2, c1, *p2, *p1;
2601 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
2602 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
2610 #ifdef NUMCHAR_OPTION
2611 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2612 w16e_conv(c1, &c2, &c1);
2618 } else if (c2 == 0) {
2619 output_mode = ASCII;
2621 } else if (c2 == X0201) {
2622 output_mode = SHIFT_JIS;
2624 } else if (c2 == ISO8859_1) {
2625 output_mode = ISO8859_1;
2626 (*o_putc)(c1 | 0x080);
2628 if ((c1<0x20 || 0x7e<c1) ||
2629 (c2<0x20 || 0x7e<c2)) {
2630 set_iconv(FALSE, 0);
2631 return; /* too late to rescue this char */
2633 output_mode = SHIFT_JIS;
2634 e2s_conv(c2, c1, &c2, &c1);
2636 #ifdef SHIFTJIS_CP932
2638 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2639 extern unsigned short cp932inv[2][189];
2640 int c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
2646 #endif /* SHIFTJIS_CP932 */
2649 if (prefix_table[(unsigned char)c1]){
2650 (*o_putc)(prefix_table[(unsigned char)c1]);
2661 #ifdef NUMCHAR_OPTION
2662 if ((c1 & CLASS_MASK) == CLASS_UTF16){
2663 w16e_conv(c1, &c2, &c1);
2667 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2670 (*o_putc)(ascii_intro);
2671 output_mode = ASCII;
2674 } else if (c2==X0201) {
2675 if (output_mode!=X0201) {
2676 output_mode = X0201;
2682 } else if (c2==ISO8859_1) {
2683 /* iso8859 introduction, or 8th bit on */
2684 /* Can we convert in 7bit form using ESC-'-'-A ?
2686 output_mode = ISO8859_1;
2688 } else if (c2 == 0) {
2689 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2692 (*o_putc)(ascii_intro);
2693 output_mode = ASCII;
2697 if (output_mode != X0208) {
2698 output_mode = X0208;
2701 (*o_putc)(kanji_intro);
2703 if (c1<0x20 || 0x7e<c1)
2705 if (c2<0x20 || 0x7e<c2)
2717 if (base64_count>50 && !mimeout_mode && c2==0 && c1==SPACE) {
2719 } else if (base64_count>66 && mimeout_mode) {
2720 (*o_base64conv)(EOF,0);
2722 (*o_putc)('\t'); base64_count += 7;
2724 (*o_base64conv)(c2,c1);
2728 static int broken_buf[3];
2729 static int broken_counter = 0;
2730 static int broken_last = 0;
2737 if (broken_counter>0) {
2738 return broken_buf[--broken_counter];
2741 if (c=='$' && broken_last != ESC
2742 && (input_mode==ASCII || input_mode==X0201)) {
2745 if (c1=='@'|| c1=='B') {
2746 broken_buf[0]=c1; broken_buf[1]=c;
2753 } else if (c=='(' && broken_last != ESC
2754 && (input_mode==X0208 || input_mode==X0201)) { /* ) */
2757 if (c1=='J'|| c1=='B') {
2758 broken_buf[0]=c1; broken_buf[1]=c;
2776 if (broken_counter<2)
2777 broken_buf[broken_counter++]=c;
2781 static int prev_cr = 0;
2789 if (! (c2==0&&c1==NL) ) {
2795 } else if (c1=='\r') {
2797 } else if (c1=='\n') {
2798 if (crmode_f==CRLF) {
2799 (*o_crconv)(0,'\r');
2800 } else if (crmode_f==CR) {
2801 (*o_crconv)(0,'\r');
2805 } else if (c1!='\032' || crmode_f!=NL){
2811 Return value of fold_conv()
2813 \n add newline and output char
2814 \r add newline and output nothing
2817 1 (or else) normal output
2819 fold state in prev (previous character)
2821 >0x80 Japanese (X0208/X0201)
2826 This fold algorthm does not preserve heading space in a line.
2827 This is the main difference from fmt.
2830 #define char_size(c2,c1) (c2?2:1)
2839 if (c1== '\r' && !fold_preserve_f) {
2840 fold_state=0; /* ignore cr */
2841 }else if (c1== '\n'&&f_prev=='\r' && fold_preserve_f) {
2843 fold_state=0; /* ignore cr */
2844 } else if (c1== BS) {
2845 if (f_line>0) f_line--;
2847 } else if (c2==EOF && f_line != 0) { /* close open last line */
2849 } else if ((c1=='\n' && !fold_preserve_f)
2850 || ((c1=='\r'||(c1=='\n'&&f_prev!='\r'))
2851 && fold_preserve_f)) {
2853 if (fold_preserve_f) {
2857 } else if ((f_prev == c1 && !fold_preserve_f)
2858 || (f_prev == '\n' && fold_preserve_f)
2859 ) { /* duplicate newline */
2862 fold_state = '\n'; /* output two newline */
2868 if (f_prev&0x80) { /* Japanese? */
2870 fold_state = 0; /* ignore given single newline */
2871 } else if (f_prev==' ') {
2875 if (++f_line<=fold_len)
2879 fold_state = '\r'; /* fold and output nothing */
2883 } else if (c1=='\f') {
2888 fold_state = '\n'; /* output newline and clear */
2889 } else if ( (c2==0 && c1==' ')||
2890 (c2==0 && c1=='\t')||
2891 (c2=='!'&& c1=='!')) {
2892 /* X0208 kankaku or ascii space */
2893 if (f_prev == ' ') {
2894 fold_state = 0; /* remove duplicate spaces */
2897 if (++f_line<=fold_len)
2898 fold_state = ' '; /* output ASCII space only */
2900 f_prev = ' '; f_line = 0;
2901 fold_state = '\r'; /* fold and output nothing */
2905 prev0 = f_prev; /* we still need this one... , but almost done */
2907 if (c2 || c2==X0201)
2908 f_prev |= 0x80; /* this is Japanese */
2909 f_line += char_size(c2,c1);
2910 if (f_line<=fold_len) { /* normal case */
2913 if (f_line>=fold_len+fold_margin) { /* too many kinsou suspension */
2914 f_line = char_size(c2,c1);
2915 fold_state = '\n'; /* We can't wait, do fold now */
2916 } else if (c2==X0201) {
2917 /* simple kinsoku rules return 1 means no folding */
2918 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
2919 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
2920 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
2921 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
2922 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
2923 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
2924 else if (SPACE<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
2926 fold_state = '\n';/* add one new f_line before this character */
2929 fold_state = '\n';/* add one new f_line before this character */
2932 /* kinsoku point in ASCII */
2933 if ( c1==')'|| /* { [ ( */
2944 /* just after special */
2945 } else if (!is_alnum(prev0)) {
2946 f_line = char_size(c2,c1);
2948 } else if ((prev0==' ') || /* ignored new f_line */
2949 (prev0=='\n')|| /* ignored new f_line */
2950 (prev0&0x80)) { /* X0208 - ASCII */
2951 f_line = char_size(c2,c1);
2952 fold_state = '\n';/* add one new f_line before this character */
2954 fold_state = 1; /* default no fold in ASCII */
2958 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
2959 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
2960 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
2961 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
2962 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
2963 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
2964 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
2965 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
2966 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
2967 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
2968 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
2969 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
2970 /* default no fold in kinsoku */
2973 f_line = char_size(c2,c1);
2974 /* add one new f_line before this character */
2977 f_line = char_size(c2,c1);
2979 /* add one new f_line before this character */
2984 /* terminator process */
2985 switch(fold_state) {
3004 int z_prev2=0,z_prev1=0;
3011 /* if (c2) c1 &= 0x7f; assertion */
3013 if (x0201_f && z_prev2==X0201) { /* X0201 */
3014 if (c1==(0xde&0x7f)) { /*
\e$BByE@
\e(B */
3016 (*o_zconv)(dv[(z_prev1-SPACE)*2],dv[(z_prev1-SPACE)*2+1]);
3018 } else if (c1==(0xdf&0x7f)&&ev[(z_prev1-SPACE)*2]) { /*
\e$BH>ByE@
\e(B */
3020 (*o_zconv)(ev[(z_prev1-SPACE)*2],ev[(z_prev1-SPACE)*2+1]);
3024 (*o_zconv)(cv[(z_prev1-SPACE)*2],cv[(z_prev1-SPACE)*2+1]);
3033 if (x0201_f && c2==X0201) {
3034 if (dv[(c1-SPACE)*2]||ev[(c1-SPACE)*2]) {
3035 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
3036 z_prev1 = c1; z_prev2 = c2;
3039 (*o_zconv)(cv[(c1-SPACE)*2],cv[(c1-SPACE)*2+1]);
3044 /* JISX0208 Alphabet */
3045 if (alpha_f && c2 == 0x23 ) {
3047 } else if (alpha_f && c2 == 0x21 ) {
3048 /* JISX0208 Kigou */
3053 } else if (alpha_f&0x4) {
3058 } else if (0x20<c1 && c1<0x7f && fv[c1-0x20]) {
3064 case '>': entity = ">"; break;
3065 case '<': entity = "<"; break;
3066 case '\"': entity = """; break;
3067 case '&': entity = "&"; break;
3070 while (*entity) (*o_zconv)(0, *entity++);
3080 #define rot13(c) ( \
3082 (c <= 'M') ? (c + 13): \
3083 (c <= 'Z') ? (c - 13): \
3085 (c <= 'm') ? (c + 13): \
3086 (c <= 'z') ? (c - 13): \
3090 #define rot47(c) ( \
3092 ( c <= 'O' ) ? (c + 47) : \
3093 ( c <= '~' ) ? (c - 47) : \
3101 if (c2==0 || c2==X0201 || c2==ISO8859_1) {
3107 (*o_rot_conv)(c2,c1);
3114 if ((hira_f & 1) && c2==0x25 && 0x20<c1 && c1<0x74) {
3116 } else if ((hira_f & 2) && c2==0x24 && 0x20<c1 && c1<0x74) {
3119 (*o_hira_conv)(c2,c1);
3124 iso2022jp_check_conv(c2,c1)
3127 static int range[RANGE_NUM_MAX][2] = {
3150 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
3154 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
3159 for (i = 0; i < RANGE_NUM_MAX; i++) {
3160 start = range[i][0];
3163 if (c >= start && c <= end) {
3168 (*o_iso2022jp_check_conv)(c2,c1);
3172 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
3174 unsigned char *mime_pattern[] = {
3175 (unsigned char *)"\075?EUC-JP?B?",
3176 (unsigned char *)"\075?SHIFT_JIS?B?",
3177 (unsigned char *)"\075?ISO-8859-1?Q?",
3178 (unsigned char *)"\075?ISO-8859-1?B?",
3179 (unsigned char *)"\075?ISO-2022-JP?B?",
3180 (unsigned char *)"\075?ISO-2022-JP?Q?",
3181 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3182 (unsigned char *)"\075?UTF-8?B?",
3183 (unsigned char *)"\075?UTF-8?Q?",
3185 (unsigned char *)"\075?US-ASCII?Q?",
3190 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
3191 int (*mime_priority_func[])PROTO((int c2, int c1, int c0)) = {
3192 e_iconv, s_iconv, 0, 0, 0, 0,
3193 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3199 int mime_encode[] = {
3200 JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201,
3201 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3208 int mime_encode_method[] = {
3209 'B', 'B','Q', 'B', 'B', 'Q',
3210 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3218 #define MAXRECOVER 20
3220 /* I don't trust portablity of toupper */
3221 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
3222 #define nkf_isdigit(c) ('0'<=c && c<='9')
3223 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
3228 if (i_getc!=mime_getc) {
3229 i_mgetc = i_getc; i_getc = mime_getc;
3230 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
3231 if(mime_f==STRICT_MIME) {
3232 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
3233 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
3239 unswitch_mime_getc()
3241 if(mime_f==STRICT_MIME) {
3242 i_mgetc = i_mgetc_buf;
3243 i_mungetc = i_mungetc_buf;
3246 i_ungetc = i_mungetc;
3250 mime_begin_strict(f)
3255 unsigned char *p,*q;
3256 int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
3258 mime_decode_mode = FALSE;
3259 /* =? has been checked */
3261 p = mime_pattern[j];
3264 for(i=2;p[i]>' ';i++) { /* start at =? */
3265 if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) {
3266 /* pattern fails, try next one */
3268 while ((p = mime_pattern[++j])) {
3269 for(k=2;k<i;k++) /* assume length(p) > i */
3270 if (p[k]!=q[k]) break;
3271 if (k==i && nkf_toupper(c1)==p[k]) break;
3273 if (p) continue; /* found next one, continue */
3274 /* all fails, output from recovery buffer */
3282 mime_decode_mode = p[i-2];
3284 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
3286 if (mime_decode_mode=='B') {
3287 mimebuf_f = unbuf_f;
3289 /* do MIME integrity check */
3290 return mime_integrity(f,mime_pattern[j]);
3302 /* we don't keep eof of Fifo, becase it contains ?= as
3303 a terminator. It was checked in mime_integrity. */
3304 return ((mimebuf_f)?
3305 (*i_mgetc_buf)(f):Fifo(mime_input++));
3309 mime_ungetc_buf(c,f)
3314 (*i_mungetc_buf)(c,f);
3316 Fifo(--mime_input)=c;
3327 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
3328 /* re-read and convert again from mime_buffer. */
3330 /* =? has been checked */
3332 Fifo(mime_last++)='='; Fifo(mime_last++)='?';
3333 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
3334 /* We accept any character type even if it is breaked by new lines */
3335 c1 = (*i_getc)(f); Fifo(mime_last++)= c1 ;
3336 if (c1=='\n'||c1==' '||c1=='\r'||
3337 c1=='-'||c1=='_'||is_alnum(c1) ) continue;
3339 /* Failed. But this could be another MIME preemble */
3347 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3348 if (!(++i<MAXRECOVER) || c1==EOF) break;
3349 if (c1=='b'||c1=='B') {
3350 mime_decode_mode = 'B';
3351 } else if (c1=='q'||c1=='Q') {
3352 mime_decode_mode = 'Q';
3356 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3357 if (!(++i<MAXRECOVER) || c1==EOF) break;
3359 mime_decode_mode = FALSE;
3365 if (!mime_decode_mode) {
3366 /* false MIME premble, restart from mime_buffer */
3367 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
3368 /* Since we are in MIME mode until buffer becomes empty, */
3369 /* we never go into mime_begin again for a while. */
3372 /* discard mime preemble, and goto MIME mode */
3374 /* do no MIME integrity check */
3375 return c1; /* used only for checking EOF */
3390 fprintf(stderr, "%s\n", str);
3396 set_input_codename (codename)
3401 strcmp(codename, "") != 0 &&
3402 strcmp(codename, input_codename) != 0)
3404 is_inputcode_mixed = TRUE;
3406 input_codename = codename;
3407 is_inputcode_set = TRUE;
3411 print_guessed_code (filename)
3414 char *codename = "BINARY";
3415 if (!is_inputcode_mixed) {
3416 if (strcmp(input_codename, "") == 0) {
3419 codename = input_codename;
3422 if (filename != NULL) printf("%s:", filename);
3423 printf("%s\n", codename);
3430 if (nkf_isdigit(x)) return x - '0';
3431 return nkf_toupper(x) - 'A' + 10;
3436 #ifdef ANSI_C_PROTOTYPE
3437 int hex_getc(int ch, FILE *f, int (*g)(FILE *f), int (*u)(int c, FILE *f))
3440 hex_getc(ch, f, g, u)
3453 if (!nkf_isxdigit(c2)){
3458 if (!nkf_isxdigit(c3)){
3463 return (hex2bin(c2) << 4) | hex2bin(c3);
3470 return hex_getc(':', f, i_cgetc, i_cungetc);
3478 return (*i_cungetc)(c, f);
3485 return hex_getc('%', f, i_ugetc, i_uungetc);
3493 return (*i_uungetc)(c, f);
3497 #ifdef NUMCHAR_OPTION
3502 int (*g)() = i_ngetc;
3503 int (*u)() = i_nungetc;
3514 if (buf[i] == 'x' || buf[i] == 'X'){
3515 for (j = 0; j < 5; j++){
3517 if (!nkf_isxdigit(buf[i])){
3524 c |= hex2bin(buf[i]);
3527 for (j = 0; j < 6; j++){
3531 if (!nkf_isdigit(buf[i])){
3538 c += hex2bin(buf[i]);
3544 return CLASS_UTF16 | c;
3554 numchar_ungetc(c, f)
3558 return (*i_nungetc)(c, f);
3567 int c1, c2, c3, c4, cc;
3568 int t1, t2, t3, t4, mode, exit_mode;
3570 if (mime_top != mime_last) { /* Something is in FIFO */
3571 return Fifo(mime_top++);
3573 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
3574 mime_decode_mode=FALSE;
3575 unswitch_mime_getc();
3576 return (*i_getc)(f);
3579 if (mimebuf_f == FIXED_MIME)
3580 exit_mode = mime_decode_mode;
3583 if (mime_decode_mode == 'Q') {
3584 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3586 if (c1=='_') return ' ';
3587 if (c1!='=' && c1!='?') {
3591 mime_decode_mode = exit_mode; /* prepare for quit */
3592 if (c1<=' ') return c1;
3593 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
3594 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
3595 /* end Q encoding */
3596 input_mode = exit_mode;
3597 while((c1=(*i_getc)(f))!=EOF && c1==SPACE
3598 /* && (c1==NL||c1==TAB||c1=='\r') */ ) ;
3601 if (c1=='='&&c2<' ') { /* this is soft wrap */
3602 while((c1 = (*i_mgetc)(f)) <=' ') {
3603 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3605 mime_decode_mode = 'Q'; /* still in MIME */
3606 goto restart_mime_q;
3609 mime_decode_mode = 'Q'; /* still in MIME */
3613 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
3614 if (c2<=' ') return c2;
3615 mime_decode_mode = 'Q'; /* still in MIME */
3616 #define hex(c) (('0'<=c&&c<='9')?(c-'0'):\
3617 ('A'<=c&&c<='F')?(c-'A'+10):('a'<=c&&c<='f')?(c-'a'+10):0)
3618 return ((hex(c2)<<4) + hex(c3));
3621 if (mime_decode_mode != 'B') {
3622 mime_decode_mode = FALSE;
3623 return (*i_mgetc)(f);
3627 /* Base64 encoding */
3629 MIME allows line break in the middle of
3630 Base64, but we are very pessimistic in decoding
3631 in unbuf mode because MIME encoded code may broken by
3632 less or editor's control sequence (such as ESC-[-K in unbuffered
3633 mode. ignore incomplete MIME.
3635 mode = mime_decode_mode;
3636 mime_decode_mode = exit_mode; /* prepare for quit */
3638 while ((c1 = (*i_mgetc)(f))<=' ') {
3643 if ((c2 = (*i_mgetc)(f))<=' ') {
3646 if (mime_f != STRICT_MIME) goto mime_c2_retry;
3647 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3650 if ((c1 == '?') && (c2 == '=')) {
3652 while((c1=(*i_getc)(f))!=EOF && c1==SPACE
3653 /* && (c1==NL||c1==TAB||c1=='\r') */ ) ;
3657 if ((c3 = (*i_mgetc)(f))<=' ') {
3660 if (mime_f != STRICT_MIME) goto mime_c3_retry;
3661 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3665 if ((c4 = (*i_mgetc)(f))<=' ') {
3668 if (mime_f != STRICT_MIME) goto mime_c4_retry;
3669 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3673 mime_decode_mode = mode; /* still in MIME sigh... */
3675 /* BASE 64 decoding */
3677 t1 = 0x3f & base64decode(c1);
3678 t2 = 0x3f & base64decode(c2);
3679 t3 = 0x3f & base64decode(c3);
3680 t4 = 0x3f & base64decode(c4);
3681 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
3683 Fifo(mime_last++) = cc;
3684 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
3686 Fifo(mime_last++) = cc;
3687 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
3689 Fifo(mime_last++) = cc;
3694 return Fifo(mime_top++);
3702 Fifo(--mime_top) = c;
3713 /* In buffered mode, read until =? or NL or buffer full
3715 mime_input = mime_top;
3716 mime_last = mime_top;
3717 while(*p) Fifo(mime_input++) = *p++;
3720 while((c=(*i_getc)(f))!=EOF) {
3721 if (((mime_input-mime_top)&MIME_BUF_MASK)==0) {
3722 break; /* buffer full */
3724 if (c=='=' && d=='?') {
3725 /* checked. skip header, start decode */
3726 Fifo(mime_input++) = c;
3727 /* mime_last_input = mime_input; */
3732 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
3734 /* Should we check length mod 4? */
3735 Fifo(mime_input++) = c;
3738 /* In case of Incomplete MIME, no MIME decode */
3739 Fifo(mime_input++) = c;
3740 mime_last = mime_input; /* point undecoded buffer */
3741 mime_decode_mode = 1; /* no decode on Fifo last in mime_getc */
3742 switch_mime_getc(); /* anyway we need buffered getc */
3753 i = c - 'A'; /* A..Z 0-25 */
3755 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
3757 } else if (c > '/') {
3758 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
3759 } else if (c == '+') {
3760 i = '>' /* 62 */ ; /* + 62 */
3762 i = '?' /* 63 */ ; /* / 63 */
3767 static char basis_64[] =
3768 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
3778 p = mime_pattern[0];
3779 for(i=0;mime_encode[i];i++) {
3780 if (mode == mime_encode[i]) {
3781 p = mime_pattern[i];
3785 mimeout_mode = mime_encode_method[i];
3787 /* (*o_mputc)(' '); */
3804 #define itoh4(c) (c>=10?c+'A'-10:c+'0')
3810 if (mimeout_f==FIXED_MIME) {
3811 if (base64_count>71) {
3819 if ( c<=DEL &&(output_mode==ASCII ||output_mode == ISO8859_1 )
3820 && mimeout_f!=FIXED_MIME) {
3821 if (mimeout_mode=='Q') {
3828 if (mimeout_mode!='B' || c!=SPACE) {
3837 } else if (!mimeout_mode && mimeout_f!=FIXED_MIME) {
3838 open_mime(output_mode);
3840 } else { /* c==EOF */
3841 switch(mimeout_mode) {
3846 (*o_mputc)(basis_64[((b64c & 0x3)<< 4)]);
3852 (*o_mputc)(basis_64[((b64c & 0xF) << 2)]);
3858 if (mimeout_f!=FIXED_MIME) {
3860 } else if (mimeout_mode != 'Q')
3865 switch(mimeout_mode) {
3869 (*o_mputc)(itoh4(((c>>4)&0xf)));
3870 (*o_mputc)(itoh4((c&0xf)));
3877 (*o_mputc)(basis_64[c>>2]);
3882 (*o_mputc)(basis_64[((b64c & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
3888 (*o_mputc)(basis_64[((b64c & 0xF) << 2) | ((c & 0xC0) >>6)]);
3889 (*o_mputc)(basis_64[c & 0x3F]);
3902 struct input_code *p = input_code_list;
3915 mime_f = STRICT_MIME;
3920 #if defined(MSDOS) || defined(__OS2__)
3925 iso2022jp_f = FALSE;
3926 #ifdef UTF8_OUTPUT_ENABLE
3929 ms_ucs_map_f = FALSE;
3941 is_inputcode_mixed = FALSE;
3942 is_inputcode_set = FALSE;
3946 #ifdef SHIFTJIS_CP932
3952 for (i = 0; i < 256; i++){
3953 prefix_table[i] = 0;
3956 #ifdef UTF8_INPUT_ENABLE
3957 utf16_mode = UTF16LE_INPUT;
3963 fold_preserve_f = FALSE;
3966 kanji_intro = DEFAULT_J;
3967 ascii_intro = DEFAULT_R;
3968 fold_margin = FOLD_MARGIN;
3969 output_conv = DEFAULT_CONV;
3970 oconv = DEFAULT_CONV;
3971 o_zconv = no_connection;
3972 o_fconv = no_connection;
3973 o_crconv = no_connection;
3974 o_rot_conv = no_connection;
3975 o_hira_conv = no_connection;
3976 o_base64conv = no_connection;
3977 o_iso2022jp_check_conv = no_connection;
3980 i_ungetc = std_ungetc;
3982 i_bungetc = std_ungetc;
3985 i_mungetc = std_ungetc;
3986 i_mgetc_buf = std_getc;
3987 i_mungetc_buf = std_ungetc;
3988 output_mode = ASCII;
3991 mime_decode_mode = FALSE;
3997 z_prev2=0,z_prev1=0;
4003 no_connection(c2,c1)
4006 no_connection2(c2,c1,0);
4010 no_connection2(c2,c1,c0)
4013 fprintf(stderr,"nkf internal module connection failure.\n");
4021 fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
4022 fprintf(stderr,"Flags:\n");
4023 fprintf(stderr,"b,u Output is buffered (DEFAULT),Output is unbuffered\n");
4024 #ifdef DEFAULT_CODE_SJIS
4025 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC), UTF-8\n");
4027 #ifdef DEFAULT_CODE_JIS
4028 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC), UTF-8\n");
4030 #ifdef DEFAULT_CODE_EUC
4031 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT), UTF-8\n");
4033 #ifdef DEFAULT_CODE_UTF8
4034 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC), UTF-8 (DEFAULT)\n");
4036 #ifdef UTF8_OUTPUT_ENABLE
4037 fprintf(stderr," After 'w' you can add more options. (80?|16((B|L)0?)?) \n");
4039 fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC), UTF-8\n");
4040 #ifdef UTF8_INPUT_ENABLE
4041 fprintf(stderr," After 'W' you can add more options. (8|16(B|L)?) \n");
4043 fprintf(stderr,"t no conversion\n");
4044 fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n");
4045 fprintf(stderr,"r {de/en}crypt ROT13/47\n");
4046 fprintf(stderr,"h 1 hirakana->katakana, 2 katakana->hirakana,3 both\n");
4047 fprintf(stderr,"v Show this usage. V: show version\n");
4048 fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
4049 fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
4050 fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
4051 fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
4052 fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces,\n");
4053 fprintf(stderr," 3: Convert HTML Entity\n");
4054 fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
4055 fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
4057 fprintf(stderr,"T Text mode output\n");
4059 fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
4060 fprintf(stderr,"d,c Delete \\r in line feed and \\032, Add \\r in line feed\n");
4061 fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n");
4062 fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
4063 fprintf(stderr,"long name options\n");
4064 fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n");
4065 fprintf(stderr," --jis,--euc,--sjis,--utf8,--utf16,--mime,--base64 convert for the code\n");
4066 fprintf(stderr," --hiragana, --katakana Hiragana/Katakana Conversion\n");
4068 fprintf(stderr," --cap-input, --url-input Convert hex after ':' or '%'\n");
4070 #ifdef NUMCHAR_OPTION
4071 fprintf(stderr," --numchar-input Convert Unicode Character Reference\n");
4073 #ifdef SHIFTJIS_CP932
4074 fprintf(stderr," --no-cp932 Don't convert Shift_JIS FAxx-FCxx to equivalnet CP932\n");
4076 #ifdef UTF8_OUTPUT_ENABLE
4077 fprintf(stderr," --ms-ucs-map Microsoft UCS Mapping Compatible\n");
4080 fprintf(stderr," --overwrite Overwrite original listed files by filtered result\n");
4082 fprintf(stderr," -g, --guess Guess the input code\n");
4083 fprintf(stderr," --help,--version\n");
4090 fprintf(stderr,"Network Kanji Filter Version %s (%s) "
4091 #if defined(MSDOS) && !defined(__WIN32__) && !defined(__WIN16__)
4094 #if defined(MSDOS) && defined(__WIN16__)
4097 #if defined(MSDOS) && defined(__WIN32__)
4103 ,Version,Patchlevel);
4104 fprintf(stderr,"\n%s\n",CopyRight);
4109 **
\e$B%Q%C%A@):n<T
\e(B
4110 ** void@merope.pleiades.or.jp (Kusakabe Youichi)
4111 ** NIDE Naoyuki <nide@ics.nara-wu.ac.jp>
4112 ** ohta@src.ricoh.co.jp (Junn Ohta)
4113 ** inouet@strl.nhk.or.jp (Tomoyuki Inoue)
4114 ** kiri@pulser.win.or.jp (Tetsuaki Kiriyama)
4115 ** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp>
4116 ** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe)
4117 ** kono@ie.u-ryukyu.ac.jp (Shinji Kono)
4118 ** GHG00637@nifty-serve.or.jp (COW)