1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
5 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
6 ** Copyright (C) 1996,1998
8 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
9 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
10 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
11 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
13 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
14 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
15 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
16 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
17 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
18 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
20 ** Everyone is permitted to do anything on this program
21 ** including copying, modifying, improving,
22 ** as long as you don't try to pretend that you wrote it.
23 ** i.e., the above copyright notice has to appear in all copies.
24 ** Binary distribution requires original version messages.
25 ** You don't have to ask before copying, redistribution or publishing.
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
29 /***********************************************************************
30 ** UTF-8
\e$B%5%]!<%H$K$D$$$F
\e(B
31 **
\e$B=>Mh$N
\e(B nkf
\e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9
\e(B
32 ** nkf -e
\e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G
\e(B UTF-8
\e$B$HH=Dj$5$l$l$P!"
\e(B
33 **
\e$B$=$N$^$^
\e(B euc-jp
\e$B$KJQ49$5$l$^$9
\e(B
35 **
\e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#
\e(B
36 ** (
\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O
\e(B)
38 **
\e$B2?$+LdBj$r8+$D$1$?$i!"
\e(B
39 ** E-Mail: furukawa@tcp-ip.or.jp
40 **
\e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#
\e(B
41 ***********************************************************************/
42 /* $Id: nkf.c,v 1.55 2005/01/24 08:19:34 naruse Exp $ */
43 #define NKF_VERSION "2.0.4"
44 #define NKF_RELEASE_DATE "2005-01-01"
47 static char *CopyRight =
48 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2004 Kono, Furukawa";
55 ** USAGE: nkf [flags] [file]
58 ** b Output is buffered (DEFAULT)
59 ** u Output is unbuffered
63 ** j Outout code is JIS 7 bit (DEFAULT SELECT)
64 ** s Output code is MS Kanji (DEFAULT SELECT)
65 ** e Output code is AT&T JIS (DEFAULT SELECT)
66 ** w Output code is AT&T JIS (DEFAULT SELECT)
67 ** l Output code is JIS 7bit and ISO8859-1 Latin-1
69 ** m MIME conversion for ISO-2022-JP
70 ** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
71 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
72 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
73 ** M MIME output conversion
75 ** r {de/en}crypt ROT13/47
79 ** T Text mode output (for MS-DOS)
81 ** x Do not convert X0201 kana into X0208
82 ** Z Convert X0208 alphabet to ASCII
87 ** B try to fix broken JIS, missing Escape
88 ** B[1-9] broken level
90 ** O Output to 'nkf.out' file or last file name
91 ** d Delete \r in line feed
92 ** c Add \r in line feed
93 ** -- other long option
94 ** -- ignore following option (don't use with -O )
98 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__)) && !defined(MSDOS)
100 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
115 #if defined(MSDOS) || defined(__OS2__)
122 #define setbinmode(fp) fsetbin(fp)
123 #else /* Microsoft C, Turbo C */
124 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
126 #else /* UNIX,OS/2 */
127 #define setbinmode(fp)
130 #ifdef _IOFBF /* SysV and MSDOS, Windows */
131 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
133 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
136 /*Borland C++ 4.5 EasyWin*/
137 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
146 /* added by satoru@isoternet.org */
148 #include <sys/stat.h>
149 #ifndef MSDOS /* UNIX, OS/2 */
153 #if defined(_MSC_VER) || defined(__MINGW32__) /* VC++, MinGW */
154 #include <sys/utime.h>
155 #elif defined(__TURBOC__) /* BCC */
157 #elif defined(LSI_C) /* LSI C */
169 /* state of output_mode and input_mode
187 /* Input Assumption */
191 #define LATIN1_INPUT 6
193 #define STRICT_MIME 8
198 #define JAPANESE_EUC 10
202 #define UTF8_INPUT 13
203 #define UTF16LE_INPUT 14
204 #define UTF16BE_INPUT 15
224 #define is_alnum(c) \
225 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
227 #define HOLD_SIZE 1024
228 #define IOBUF_SIZE 16384
230 #define DEFAULT_J 'B'
231 #define DEFAULT_R 'B'
233 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
234 #define SJ6394 0x0161 /* 63 - 94 ku offset */
236 #define RANGE_NUM_MAX 18
241 #if defined( UTF8_OUTPUT_ENABLE ) || defined( UTF8_INPUT_ENABLE )
242 #define sizeof_euc_utf8 94
243 #define sizeof_euc_to_utf8_1byte 94
244 #define sizeof_euc_to_utf8_2bytes 94
245 #define sizeof_utf8_to_euc_C2 64
246 #define sizeof_utf8_to_euc_E5B8 64
247 #define sizeof_utf8_to_euc_2bytes 112
248 #define sizeof_utf8_to_euc_3bytes 112
251 /* MIME preprocessor */
254 #ifdef EASYWIN /*Easy Win */
255 extern POINT _BufferSize;
258 /* function prototype */
260 #ifdef ANSI_C_PROTOTYPE
262 #define STATIC static
274 void (*status_func)PROTO((struct input_code *, int));
275 int (*iconv_func)PROTO((int c2, int c1, int c0));
279 STATIC char *input_codename = "";
281 STATIC int noconvert PROTO((FILE *f));
282 STATIC int kanji_convert PROTO((FILE *f));
283 STATIC int h_conv PROTO((FILE *f,int c2,int c1));
284 STATIC int push_hold_buf PROTO((int c2));
285 STATIC void set_iconv PROTO((int f, int (*iconv_func)()));
286 STATIC int s_iconv PROTO((int c2,int c1,int c0));
287 STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
288 STATIC int e_iconv PROTO((int c2,int c1,int c0));
289 #ifdef UTF8_INPUT_ENABLE
290 STATIC int w2e_conv PROTO((int c2,int c1,int c0,int *p2,int *p1));
291 STATIC int w_iconv PROTO((int c2,int c1,int c0));
292 STATIC int w_iconv16 PROTO((int c2,int c1,int c0));
293 STATIC int w_iconv_common PROTO((int c1,int c0,unsigned short **pp,int psize,int *p2,int *p1));
294 STATIC int ww16_conv PROTO((int c2, int c1, int c0));
296 #ifdef UTF8_OUTPUT_ENABLE
297 STATIC int e2w_conv PROTO((int c2,int c1));
298 STATIC void w_oconv PROTO((int c2,int c1));
299 STATIC void w_oconv16 PROTO((int c2,int c1));
301 STATIC void e_oconv PROTO((int c2,int c1));
302 STATIC int e2s_conv PROTO((int c2, int c1, int *p2, int *p1));
303 STATIC void s_oconv PROTO((int c2,int c1));
304 STATIC void j_oconv PROTO((int c2,int c1));
305 STATIC void fold_conv PROTO((int c2,int c1));
306 STATIC void cr_conv PROTO((int c2,int c1));
307 STATIC void z_conv PROTO((int c2,int c1));
308 STATIC void rot_conv PROTO((int c2,int c1));
309 STATIC void hira_conv PROTO((int c2,int c1));
310 STATIC void base64_conv PROTO((int c2,int c1));
311 STATIC void iso2022jp_check_conv PROTO((int c2,int c1));
312 STATIC void no_connection PROTO((int c2,int c1));
313 STATIC int no_connection2 PROTO((int c2,int c1,int c0));
315 STATIC void code_score PROTO((struct input_code *ptr));
316 STATIC void code_status PROTO((int c));
318 STATIC void std_putc PROTO((int c));
319 STATIC int std_getc PROTO((FILE *f));
320 STATIC int std_ungetc PROTO((int c,FILE *f));
322 STATIC int broken_getc PROTO((FILE *f));
323 STATIC int broken_ungetc PROTO((int c,FILE *f));
325 STATIC int mime_begin PROTO((FILE *f));
326 STATIC int mime_getc PROTO((FILE *f));
327 STATIC int mime_ungetc PROTO((int c,FILE *f));
329 STATIC int mime_begin_strict PROTO((FILE *f));
330 STATIC int mime_getc_buf PROTO((FILE *f));
331 STATIC int mime_ungetc_buf PROTO((int c,FILE *f));
332 STATIC int mime_integrity PROTO((FILE *f,unsigned char *p));
334 STATIC int base64decode PROTO((int c));
335 STATIC void mime_putc PROTO((int c));
336 STATIC void open_mime PROTO((int c));
337 STATIC void close_mime PROTO(());
338 STATIC void usage PROTO(());
339 STATIC void version PROTO(());
340 STATIC void options PROTO((unsigned char *c));
342 STATIC void reinit PROTO(());
347 static unsigned char stdibuf[IOBUF_SIZE];
348 static unsigned char stdobuf[IOBUF_SIZE];
349 static unsigned char hold_buf[HOLD_SIZE*2];
350 static int hold_count;
352 /* MIME preprocessor fifo */
354 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
355 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
356 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
357 static unsigned char mime_buf[MIME_BUF_SIZE];
358 static unsigned int mime_top = 0;
359 static unsigned int mime_last = 0; /* decoded */
360 static unsigned int mime_input = 0; /* undecoded */
363 static int unbuf_f = FALSE;
364 static int estab_f = FALSE;
365 static int nop_f = FALSE;
366 static int binmode_f = TRUE; /* binary mode */
367 static int rot_f = FALSE; /* rot14/43 mode */
368 static int hira_f = FALSE; /* hira/kata henkan */
369 static int input_f = FALSE; /* non fixed input code */
370 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
371 static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
372 static int mimebuf_f = FALSE; /* MIME buffered input */
373 static int broken_f = FALSE; /* convert ESC-less broken JIS */
374 static int iso8859_f = FALSE; /* ISO8859 through */
375 static int mimeout_f = FALSE; /* base64 mode */
376 #if defined(MSDOS) || defined(__OS2__)
377 static int x0201_f = TRUE; /* Assume JISX0201 kana */
379 static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
381 static int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
382 #ifdef UTF8_OUTPUT_ENABLE
383 static int unicode_bom_f= 0; /* Output Unicode BOM */
384 static int w_oconv16_LE = 0; /* utf-16 little endian */
385 static int ms_ucs_map_f = FALSE; /* Microsoft UCS Mapping Compatible */
389 #ifdef NUMCHAR_OPTION
391 #define CLASS_MASK 0x0f000000
392 #define CLASS_UTF16 0x01000000
396 static int cap_f = FALSE;
397 static int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */
398 static int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc;
399 STATIC int cap_getc PROTO((FILE *f));
400 STATIC int cap_ungetc PROTO((int c,FILE *f));
402 static int url_f = FALSE;
403 static int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
404 static int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc;
405 STATIC int url_getc PROTO((FILE *f));
406 STATIC int url_ungetc PROTO((int c,FILE *f));
408 static int numchar_f = FALSE;
409 static int (*i_ngetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
410 static int (*i_nungetc)PROTO((int c ,FILE *f)) = std_ungetc;
411 STATIC int numchar_getc PROTO((FILE *f));
412 STATIC int numchar_ungetc PROTO((int c,FILE *f));
416 static int noout_f = FALSE;
417 STATIC void no_putc PROTO((int c));
418 static int debug_f = FALSE;
419 STATIC void debug PROTO((char *str));
422 static int guess_f = FALSE;
423 STATIC void print_guessed_code PROTO((char *filename));
424 STATIC void set_input_codename PROTO((char *codename));
425 static int is_inputcode_mixed = FALSE;
426 static int is_inputcode_set = FALSE;
429 static int exec_f = 0;
432 #ifdef SHIFTJIS_CP932
433 STATIC int cp932_f = TRUE;
434 #define CP932_TABLE_BEGIN (0xfa)
435 #define CP932_TABLE_END (0xfc)
437 STATIC int cp932inv_f = TRUE;
438 #define CP932INV_TABLE_BEGIN (0xed)
439 #define CP932INV_TABLE_END (0xee)
441 /* STATIC int cp932_conv PROTO((int c2, int c1)); */
442 #endif /* SHIFTJIS_CP932 */
445 STATIC int x0212_f = FALSE;
446 static int x0212_shift PROTO((int c));
447 static int x0212_unshift PROTO((int c));
450 STATIC unsigned char prefix_table[256];
452 STATIC void e_status PROTO((struct input_code *, int));
453 STATIC void s_status PROTO((struct input_code *, int));
455 #ifdef UTF8_INPUT_ENABLE
456 STATIC void w_status PROTO((struct input_code *, int));
457 STATIC void w16_status PROTO((struct input_code *, int));
458 static int utf16_mode = UTF16LE_INPUT;
461 struct input_code input_code_list[] = {
462 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
463 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
464 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
465 {"UTF-16", 0, 0, 0, {0, 0, 0}, w16_status, w_iconv16, 0},
469 static int mimeout_mode = 0;
470 static int base64_count = 0;
472 /* X0208 -> ASCII converter */
475 static int f_line = 0; /* chars in line */
476 static int f_prev = 0;
477 static int fold_preserve_f = FALSE; /* preserve new lines */
478 static int fold_f = FALSE;
479 static int fold_len = 0;
482 static unsigned char kanji_intro = DEFAULT_J;
483 static unsigned char ascii_intro = DEFAULT_R;
487 #define FOLD_MARGIN 10
488 #define DEFAULT_FOLD 60
490 static int fold_margin = FOLD_MARGIN;
494 #ifdef DEFAULT_CODE_JIS
495 # define DEFAULT_CONV j_oconv
497 #ifdef DEFAULT_CODE_SJIS
498 # define DEFAULT_CONV s_oconv
500 #ifdef DEFAULT_CODE_EUC
501 # define DEFAULT_CONV e_oconv
503 #ifdef DEFAULT_CODE_UTF8
504 # define DEFAULT_CONV w_oconv
507 /* process default */
508 static void (*output_conv)PROTO((int c2,int c1)) = DEFAULT_CONV;
510 static void (*oconv)PROTO((int c2,int c1)) = no_connection;
511 /* s_iconv or oconv */
512 static int (*iconv)PROTO((int c2,int c1,int c0)) = no_connection2;
514 static void (*o_zconv)PROTO((int c2,int c1)) = no_connection;
515 static void (*o_fconv)PROTO((int c2,int c1)) = no_connection;
516 static void (*o_crconv)PROTO((int c2,int c1)) = no_connection;
517 static void (*o_rot_conv)PROTO((int c2,int c1)) = no_connection;
518 static void (*o_hira_conv)PROTO((int c2,int c1)) = no_connection;
519 static void (*o_base64conv)PROTO((int c2,int c1)) = no_connection;
520 static void (*o_iso2022jp_check_conv)PROTO((int c2,int c1)) = no_connection;
522 /* static redirections */
524 static void (*o_putc)PROTO((int c)) = std_putc;
526 static int (*i_getc)PROTO((FILE *f)) = std_getc; /* general input */
527 static int (*i_ungetc)PROTO((int c,FILE *f)) =std_ungetc;
529 static int (*i_bgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
530 static int (*i_bungetc)PROTO((int c ,FILE *f)) = std_ungetc;
532 static void (*o_mputc)PROTO((int c)) = std_putc ; /* output of mputc */
534 static int (*i_mgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
535 static int (*i_mungetc)PROTO((int c ,FILE *f)) = std_ungetc;
537 /* for strict mime */
538 static int (*i_mgetc_buf)PROTO((FILE *)) = std_getc; /* input of mgetc_buf */
539 static int (*i_mungetc_buf)PROTO((int c,FILE *f)) = std_ungetc;
542 static int output_mode = ASCII, /* output kanji mode */
543 input_mode = ASCII, /* input kanji mode */
544 shift_mode = FALSE; /* TRUE shift out, or X0201 */
545 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
547 /* X0201 / X0208 conversion tables */
549 /* X0201 kana conversion table */
552 unsigned char cv[]= {
553 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
554 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
555 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
556 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
557 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
558 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
559 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
560 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
561 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
562 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
563 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
564 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
565 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
566 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
567 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
568 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
572 /* X0201 kana conversion table for daguten */
575 unsigned char dv[]= {
576 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
577 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
578 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
579 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
580 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
581 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
582 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
583 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
584 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
585 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
586 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
587 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
588 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
589 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
590 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
591 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
594 /* X0201 kana conversion table for han-daguten */
597 unsigned char ev[]= {
598 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
601 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
602 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
603 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
604 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
605 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
606 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
607 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
608 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
609 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
610 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
611 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
617 /* X0208 kigou conversion table */
618 /* 0x8140 - 0x819e */
620 unsigned char fv[] = {
622 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
623 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
624 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
625 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
626 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
627 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
628 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
629 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
630 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
631 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
632 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
633 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
639 static int file_out = FALSE;
641 static int overwrite = FALSE;
644 static int crmode_f = 0; /* CR, NL, CRLF */
645 #ifdef EASYWIN /*Easy Win */
646 static int end_check;
658 char *outfname = NULL;
661 #ifdef EASYWIN /*Easy Win */
662 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
665 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
666 cp = (unsigned char *)*argv;
671 if (pipe(fds) < 0 || (pid = fork()) < 0){
682 execvp(argv[1], &argv[1]);
696 if(x0201_f == WISH_TRUE)
697 x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
699 if (binmode_f == TRUE)
701 if (freopen("","wb",stdout) == NULL)
708 setbuf(stdout, (char *) NULL);
710 setvbuffer(stdout, stdobuf, IOBUF_SIZE);
713 if (binmode_f == TRUE)
715 if (freopen("","rb",stdin) == NULL) return (-1);
719 setvbuffer(stdin, stdibuf, IOBUF_SIZE);
723 kanji_convert(stdin);
724 if (guess_f) print_guessed_code(NULL);
729 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
738 /* reopen file for stdout */
739 if (file_out == TRUE) {
742 outfname = malloc(strlen(origfname)
743 + strlen(".nkftmpXXXXXX")
749 strcpy(outfname, origfname);
753 for (i = strlen(outfname); i; --i){
754 if (outfname[i - 1] == '/'
755 || outfname[i - 1] == '\\'){
761 strcat(outfname, "ntXXXXXX");
763 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC,
766 strcat(outfname, ".nkftmpXXXXXX");
767 fd = mkstemp(outfname);
770 || (fd_backup = dup(fileno(stdout))) < 0
771 || dup2(fd, fileno(stdout)) < 0
782 outfname = "nkf.out";
785 if(freopen(outfname, "w", stdout) == NULL) {
789 if (binmode_f == TRUE) {
791 if (freopen("","wb",stdout) == NULL)
798 if (binmode_f == TRUE)
800 if (freopen("","rb",fin) == NULL)
805 setvbuffer(fin, stdibuf, IOBUF_SIZE);
809 char *filename = NULL;
811 if (nfiles > 1) filename = origfname;
812 if (guess_f) print_guessed_code(filename);
818 #if defined(MSDOS) && !defined(__MINGW32__)
826 if (dup2(fd_backup, fileno(stdout)) < 0){
829 if (stat(origfname, &sb)) {
830 fprintf(stderr, "Can't stat %s\n", origfname);
832 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
833 if (chmod(outfname, sb.st_mode)) {
834 fprintf(stderr, "Can't set permission %s\n", outfname);
837 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
838 #if defined(MSDOS) && !defined(__MINGW32__)
839 tb[0] = tb[1] = sb.st_mtime;
840 if (utime(outfname, tb)) {
841 fprintf(stderr, "Can't set timestamp %s\n", outfname);
844 tb.actime = sb.st_atime;
845 tb.modtime = sb.st_mtime;
846 if (utime(outfname, &tb)) {
847 fprintf(stderr, "Can't set timestamp %s\n", outfname);
851 if (unlink(origfname)){
855 if (rename(outfname, origfname)) {
857 fprintf(stderr, "Can't rename %s to %s\n",
858 outfname, origfname);
866 #ifdef EASYWIN /*Easy Win */
867 if (file_out == FALSE)
868 scanf("%d",&end_check);
871 #else /* for Other OS */
872 if (file_out == TRUE)
902 {"katakana-hiragana","h3"},
909 #ifdef UTF8_OUTPUT_ENABLE
914 #ifdef UTF8_INPUT_ENABLE
916 {"utf16-input", "W16"},
925 #ifdef NUMCHAR_OPTION
926 {"numchar-input", ""},
932 #ifdef SHIFTJIS_CP932
942 static int option_mode = 0;
949 unsigned char *p = NULL;
961 case '-': /* literal options */
962 if (!*cp) { /* ignore the rest of arguments */
966 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
968 p = (unsigned char *)long_option[i].name;
969 for (j=0;*p && (*p != '=') && *p == cp[j];p++, j++);
977 cp = (unsigned char *)long_option[i].alias;
980 if (strcmp(long_option[i].name, "overwrite") == 0){
987 if (strcmp(long_option[i].name, "cap-input") == 0){
991 if (strcmp(long_option[i].name, "url-input") == 0){
996 #ifdef NUMCHAR_OPTION
997 if (strcmp(long_option[i].name, "numchar-input") == 0){
1003 if (strcmp(long_option[i].name, "no-output") == 0){
1007 if (strcmp(long_option[i].name, "debug") == 0){
1012 if (strcmp(long_option[i].name, "cp932") == 0){
1013 #ifdef SHIFTJIS_CP932
1017 #ifdef UTF8_OUTPUT_ENABLE
1018 ms_ucs_map_f = TRUE;
1022 if (strcmp(long_option[i].name, "no-cp932") == 0){
1023 #ifdef SHIFTJIS_CP932
1027 #ifdef UTF8_OUTPUT_ENABLE
1028 ms_ucs_map_f = FALSE;
1032 #ifdef SHIFTJIS_CP932
1033 if (strcmp(long_option[i].name, "cp932inv") == 0){
1040 if (strcmp(long_option[i].name, "x0212") == 0){
1047 if (strcmp(long_option[i].name, "exec-in") == 0){
1051 if (strcmp(long_option[i].name, "exec-out") == 0){
1056 #ifdef UTF8_OUTPUT_ENABLE
1057 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1058 ms_ucs_map_f = TRUE;
1062 if (strcmp(long_option[i].name, "prefix=") == 0){
1063 if (*p == '=' && ' ' < p[1] && p[1] < 128){
1064 for (i = 2; ' ' < p[i] && p[i] < 128; i++){
1065 prefix_table[p[i]] = p[1];
1072 case 'b': /* buffered mode */
1075 case 'u': /* non bufferd mode */
1078 case 't': /* transparent mode */
1081 case 'j': /* JIS output */
1083 output_conv = j_oconv;
1085 case 'e': /* AT&T EUC output */
1086 output_conv = e_oconv;
1088 case 's': /* SJIS output */
1089 output_conv = s_oconv;
1091 case 'l': /* ISO8859 Latin-1 support, no conversion */
1092 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
1093 input_f = LATIN1_INPUT;
1095 case 'i': /* Kanji IN ESC-$-@/B */
1096 if (*cp=='@'||*cp=='B')
1097 kanji_intro = *cp++;
1099 case 'o': /* ASCII IN ESC-(-J/B */
1100 if (*cp=='J'||*cp=='B'||*cp=='H')
1101 ascii_intro = *cp++;
1108 if ('9'>= *cp && *cp>='0')
1109 hira_f |= (*cp++ -'0');
1116 #if defined(MSDOS) || defined(__OS2__)
1131 #ifdef UTF8_OUTPUT_ENABLE
1132 case 'w': /* UTF-8 output */
1133 if ('1'== cp[0] && '6'==cp[1]) {
1134 output_conv = w_oconv16; cp+=2;
1136 unicode_bom_f=2; cp++;
1139 unicode_bom_f=1; cp++;
1141 } else if (cp[0] == 'B') {
1142 unicode_bom_f=2; cp++;
1144 unicode_bom_f=1; cp++;
1147 } else if (cp[0] == '8') {
1148 output_conv = w_oconv; cp++;
1151 unicode_bom_f=1; cp++;
1154 output_conv = w_oconv;
1157 #ifdef UTF8_INPUT_ENABLE
1158 case 'W': /* UTF-8 input */
1159 if ('1'== cp[0] && '6'==cp[1]) {
1160 input_f = UTF16LE_INPUT;
1163 } else if (cp[0] == 'B') {
1165 input_f = UTF16BE_INPUT;
1167 } else if (cp[0] == '8') {
1169 input_f = UTF8_INPUT;
1171 input_f = UTF8_INPUT;
1174 /* Input code assumption */
1175 case 'J': /* JIS input */
1176 case 'E': /* AT&T EUC input */
1177 input_f = JIS_INPUT;
1179 case 'S': /* MS Kanji input */
1180 input_f = SJIS_INPUT;
1181 if (x0201_f==NO_X0201) x0201_f=TRUE;
1183 case 'Z': /* Convert X0208 alphabet to asii */
1184 /* bit:0 Convert X0208
1185 bit:1 Convert Kankaku to one space
1186 bit:2 Convert Kankaku to two spaces
1187 bit:3 Convert HTML Entity
1189 if ('9'>= *cp && *cp>='0')
1190 alpha_f |= 1<<(*cp++ -'0');
1194 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
1195 x0201_f = FALSE; /* No X0201->X0208 conversion */
1197 ESC-(-I in JIS, EUC, MS Kanji
1198 SI/SO in JIS, EUC, MS Kanji
1199 SSO in EUC, JIS, not in MS Kanji
1200 MS Kanji (0xa0-0xdf)
1202 ESC-(-I in JIS (0x20-0x5f)
1203 SSO in EUC (0xa0-0xdf)
1204 0xa0-0xd in MS Kanji (0xa0-0xdf)
1207 case 'X': /* Assume X0201 kana */
1208 /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1211 case 'F': /* prserve new lines */
1212 fold_preserve_f = TRUE;
1213 case 'f': /* folding -f60 or -f */
1216 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1218 fold_len += *cp++ - '0';
1220 if (!(0<fold_len && fold_len<BUFSIZ))
1221 fold_len = DEFAULT_FOLD;
1225 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1227 fold_margin += *cp++ - '0';
1231 case 'm': /* MIME support */
1232 if (*cp=='B'||*cp=='Q') {
1233 mime_decode_mode = *cp++;
1234 mimebuf_f = FIXED_MIME;
1235 } else if (*cp=='N') {
1236 mime_f = TRUE; cp++;
1237 } else if (*cp=='S') {
1238 mime_f = STRICT_MIME; cp++;
1239 } else if (*cp=='0') {
1240 mime_f = FALSE; cp++;
1243 case 'M': /* MIME output */
1246 mimeout_f = FIXED_MIME; cp++;
1247 } else if (*cp=='Q') {
1249 mimeout_f = FIXED_MIME; cp++;
1254 case 'B': /* Broken JIS support */
1256 bit:1 allow any x on ESC-(-x or ESC-$-x
1257 bit:2 reset to ascii on NL
1259 if ('9'>= *cp && *cp>='0')
1260 broken_f |= 1<<(*cp++ -'0');
1265 case 'O':/* for Output file */
1269 case 'c':/* add cr code */
1272 case 'd':/* delete cr code */
1275 case 'I': /* ISO-2022-JP output */
1278 case 'L': /* line mode */
1279 if (*cp=='u') { /* unix */
1280 crmode_f = NL; cp++;
1281 } else if (*cp=='m') { /* mac */
1282 crmode_f = CR; cp++;
1283 } else if (*cp=='w') { /* windows */
1284 crmode_f = CRLF; cp++;
1285 } else if (*cp=='0') { /* no conversion */
1295 /* module muliple options in a string are allowed for Perl moudle */
1296 while(*cp && *cp!='-') cp++;
1300 /* bogus option but ignored */
1306 #ifdef ANSI_C_PROTOTYPE
1307 struct input_code * find_inputcode_byfunc(int (*iconv_func)(int c2,int c1,int c0))
1309 struct input_code * find_inputcode_byfunc(iconv_func)
1310 int (*iconv_func)();
1314 struct input_code *p = input_code_list;
1316 if (iconv_func == p->iconv_func){
1326 static int (*iconv_for_check)() = 0;
1329 #ifdef ANSI_C_PROTOTYPE
1330 void set_iconv(int f, int (*iconv_func)(int c2,int c1,int c0))
1332 void set_iconv(f, iconv_func)
1334 int (*iconv_func)();
1337 #ifdef INPUT_CODE_FIX
1345 #ifdef INPUT_CODE_FIX
1346 && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1352 if (estab_f && iconv_for_check != iconv){
1353 struct input_code *p = find_inputcode_byfunc(iconv);
1355 set_input_codename(p->name);
1356 debug(input_codename);
1358 iconv_for_check = iconv;
1363 #define SCORE_L2 (1) /*
\e$BBh
\e(B2
\e$B?e=`4A;z
\e(B */
1364 #define SCORE_KANA (SCORE_L2 << 1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
1365 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
1366 #ifdef SHIFTJIS_CP932
1367 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932
\e$B$K$h$kFI$_49$(
\e(B */
1368 #define SCORE_NO_EXIST (SCORE_CP932 << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1370 #define SCORE_NO_EXIST (SCORE_DEPEND << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1372 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME
\e$B$K$h$k;XDj
\e(B */
1373 #define SCORE_ERROR (SCORE_iMIME << 1) /*
\e$B%(%i!<
\e(B */
1375 #define SCORE_INIT (SCORE_iMIME)
1377 int score_table_A0[] = {
1380 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1381 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1384 int score_table_F0[] = {
1385 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1386 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1387 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1388 SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1391 void set_code_score(ptr, score)
1392 struct input_code *ptr;
1396 ptr->score |= score;
1400 void clr_code_score(ptr, score)
1401 struct input_code *ptr;
1405 ptr->score &= ~score;
1409 void code_score(ptr)
1410 struct input_code *ptr;
1412 int c2 = ptr->buf[0];
1413 int c1 = ptr->buf[1];
1415 set_code_score(ptr, SCORE_ERROR);
1416 }else if (c2 == SSO){
1417 set_code_score(ptr, SCORE_KANA);
1418 #ifdef UTF8_OUTPUT_ENABLE
1419 }else if (!e2w_conv(c2, c1)){
1420 set_code_score(ptr, SCORE_NO_EXIST);
1422 }else if ((c2 & 0x70) == 0x20){
1423 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1424 }else if ((c2 & 0x70) == 0x70){
1425 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1426 }else if ((c2 & 0x70) >= 0x50){
1427 set_code_score(ptr, SCORE_L2);
1431 void status_disable(ptr)
1432 struct input_code *ptr;
1437 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1440 void status_push_ch(ptr, c)
1441 struct input_code *ptr;
1444 ptr->buf[ptr->index++] = c;
1447 void status_clear(ptr)
1448 struct input_code *ptr;
1454 void status_reset(ptr)
1455 struct input_code *ptr;
1458 ptr->score = SCORE_INIT;
1461 void status_reinit(ptr)
1462 struct input_code *ptr;
1465 ptr->_file_stat = 0;
1468 void status_check(ptr, c)
1469 struct input_code *ptr;
1472 if (c <= DEL && estab_f){
1477 void s_status(ptr, c)
1478 struct input_code *ptr;
1483 status_check(ptr, c);
1488 #ifdef NUMCHAR_OPTION
1489 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1492 }else if (0xa1 <= c && c <= 0xdf){
1493 status_push_ch(ptr, SSO);
1494 status_push_ch(ptr, c);
1497 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
1499 status_push_ch(ptr, c);
1500 #ifdef SHIFTJIS_CP932
1502 && CP932_TABLE_BEGIN <= c && c <= CP932_TABLE_END){
1504 status_push_ch(ptr, c);
1505 #endif /* SHIFTJIS_CP932 */
1507 }else if (x0212_f && 0xf0 <= c && c <= 0xfc){
1509 status_push_ch(ptr, c);
1510 #endif /* X0212_ENABLE */
1512 status_disable(ptr);
1516 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1517 status_push_ch(ptr, c);
1518 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
1522 status_disable(ptr);
1526 #ifdef SHIFTJIS_CP932
1527 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
1528 status_push_ch(ptr, c);
1529 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
1530 set_code_score(ptr, SCORE_CP932);
1535 #endif /* SHIFTJIS_CP932 */
1536 #ifndef X0212_ENABLE
1537 status_disable(ptr);
1543 void e_status(ptr, c)
1544 struct input_code *ptr;
1549 status_check(ptr, c);
1554 #ifdef NUMCHAR_OPTION
1555 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1558 }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
1560 status_push_ch(ptr, c);
1562 }else if (0x8f == c){
1564 status_push_ch(ptr, c);
1565 #endif /* X0212_ENABLE */
1567 status_disable(ptr);
1571 if (0xa1 <= c && c <= 0xfe){
1572 status_push_ch(ptr, c);
1576 status_disable(ptr);
1581 if (0xa1 <= c && c <= 0xfe){
1583 status_push_ch(ptr, c);
1585 status_disable(ptr);
1587 #endif /* X0212_ENABLE */
1591 #ifdef UTF8_INPUT_ENABLE
1592 void w16_status(ptr, c)
1593 struct input_code *ptr;
1600 if (ptr->_file_stat == 0){
1601 if (c == 0xfe || c == 0xff){
1603 status_push_ch(ptr, c);
1604 ptr->_file_stat = 1;
1606 status_disable(ptr);
1607 ptr->_file_stat = -1;
1609 }else if (ptr->_file_stat > 0){
1611 status_push_ch(ptr, c);
1612 }else if (ptr->_file_stat < 0){
1613 status_disable(ptr);
1619 status_disable(ptr);
1620 ptr->_file_stat = -1;
1622 status_push_ch(ptr, c);
1629 if (ptr->stat != c && (c == 0xfe || c == 0xff)){
1630 status_push_ch(ptr, c);
1633 status_disable(ptr);
1634 ptr->_file_stat = -1;
1640 void w_status(ptr, c)
1641 struct input_code *ptr;
1646 status_check(ptr, c);
1651 #ifdef NUMCHAR_OPTION
1652 }else if ((c & CLASS_MASK) == CLASS_UTF16){
1655 }else if (0xc0 <= c && c <= 0xdf){
1657 status_push_ch(ptr, c);
1658 }else if (0xe0 <= c && c <= 0xef){
1660 status_push_ch(ptr, c);
1662 status_disable(ptr);
1667 if (0x80 <= c && c <= 0xbf){
1668 status_push_ch(ptr, c);
1669 if (ptr->index > ptr->stat){
1670 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
1671 && ptr->buf[2] == 0xbf);
1672 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
1673 &ptr->buf[0], &ptr->buf[1]);
1680 status_disable(ptr);
1691 int action_flag = 1;
1692 struct input_code *result = 0;
1693 struct input_code *p = input_code_list;
1695 (p->status_func)(p, c);
1698 }else if(p->stat == 0){
1709 if (result && !estab_f){
1710 set_iconv(TRUE, result->iconv_func);
1711 }else if (c <= DEL){
1712 struct input_code *ptr = input_code_list;
1721 #define STD_GC_BUFSIZE (256)
1722 int std_gc_buf[STD_GC_BUFSIZE];
1730 return std_gc_buf[--std_gc_ndx];
1740 if (std_gc_ndx == STD_GC_BUFSIZE){
1743 std_gc_buf[std_gc_ndx++] = c;
1761 while ((c = (*i_getc)(f)) != EOF)
1770 oconv = output_conv;
1773 /* replace continucation module, from output side */
1775 /* output redicrection */
1777 if (noout_f || guess_f){
1784 if (mimeout_f == TRUE) {
1785 o_base64conv = oconv; oconv = base64_conv;
1787 /* base64_count = 0; */
1791 o_crconv = oconv; oconv = cr_conv;
1794 o_rot_conv = oconv; oconv = rot_conv;
1797 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
1800 o_hira_conv = oconv; oconv = hira_conv;
1803 o_fconv = oconv; oconv = fold_conv;
1806 if (alpha_f || x0201_f) {
1807 o_zconv = oconv; oconv = z_conv;
1811 i_ungetc = std_ungetc;
1812 /* input redicrection */
1815 i_cgetc = i_getc; i_getc = cap_getc;
1816 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
1819 i_ugetc = i_getc; i_getc = url_getc;
1820 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
1823 #ifdef NUMCHAR_OPTION
1825 i_ngetc = i_getc; i_getc = numchar_getc;
1826 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
1829 if (mime_f && mimebuf_f==FIXED_MIME) {
1830 i_mgetc = i_getc; i_getc = mime_getc;
1831 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
1834 i_bgetc = i_getc; i_getc = broken_getc;
1835 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
1837 if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
1838 set_iconv(-TRUE, e_iconv);
1839 } else if (input_f == SJIS_INPUT) {
1840 set_iconv(-TRUE, s_iconv);
1841 #ifdef UTF8_INPUT_ENABLE
1842 } else if (input_f == UTF8_INPUT) {
1843 set_iconv(-TRUE, w_iconv);
1844 } else if (input_f == UTF16LE_INPUT) {
1845 set_iconv(-TRUE, w_iconv16);
1848 set_iconv(FALSE, e_iconv);
1852 struct input_code *p = input_code_list;
1860 Conversion main loop. Code detection only.
1870 module_connection();
1875 output_mode = ASCII;
1878 #define NEXT continue /* no output, get next */
1879 #define SEND ; /* output c1 and c2, get next */
1880 #define LAST break /* end of loop, go closing */
1882 while ((c1 = (*i_getc)(f)) != EOF) {
1887 /* in case of 8th bit is on */
1889 /* in case of not established yet */
1890 /* It is still ambiguious */
1891 if (h_conv(f, c2, c1)==EOF)
1897 /* in case of already established */
1899 /* ignore bogus code */
1905 /* second byte, 7 bit code */
1906 /* it might be kanji shitfted */
1907 if ((c1 == DEL) || (c1 <= SPACE)) {
1908 /* ignore bogus first code */
1916 #ifdef UTF8_INPUT_ENABLE
1925 #ifdef NUMCHAR_OPTION
1926 } else if ((c1 & CLASS_MASK) == CLASS_UTF16){
1929 } else if (c1 > DEL) {
1931 if (!estab_f && !iso8859_f) {
1932 /* not established yet */
1935 } else { /* estab_f==TRUE */
1940 } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
1941 /* SJIS X0201 Case... */
1942 if(iso2022jp_f && x0201_f==NO_X0201) {
1943 (*oconv)(GETA1, GETA2);
1950 } else if (c1==SSO && iconv != s_iconv) {
1951 /* EUC X0201 Case */
1952 c1 = (*i_getc)(f); /* skip SSO */
1954 if (SSP<=c1 && c1<0xe0) {
1955 if(iso2022jp_f && x0201_f==NO_X0201) {
1956 (*oconv)(GETA1, GETA2);
1963 } else { /* bogus code, skip SSO and one byte */
1967 /* already established */
1972 } else if ((c1 > SPACE) && (c1 != DEL)) {
1973 /* in case of Roman characters */
1975 /* output 1 shifted byte */
1979 } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
1980 /* output 1 shifted byte */
1981 if(iso2022jp_f && x0201_f==NO_X0201) {
1982 (*oconv)(GETA1, GETA2);
1989 /* look like bogus code */
1992 } else if (input_mode == X0208) {
1993 /* in case of Kanji shifted */
1996 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
1997 /* Check MIME code */
1998 if ((c1 = (*i_getc)(f)) == EOF) {
2001 } else if (c1 == '?') {
2002 /* =? is mime conversion start sequence */
2003 if(mime_f == STRICT_MIME) {
2004 /* check in real detail */
2005 if (mime_begin_strict(f) == EOF)
2009 } else if (mime_begin(f) == EOF)
2019 /* normal ASCII code */
2022 } else if (c1 == SI) {
2025 } else if (c1 == SO) {
2028 } else if (c1 == ESC ) {
2029 if ((c1 = (*i_getc)(f)) == EOF) {
2030 /* (*oconv)(0, ESC); don't send bogus code */
2032 } else if (c1 == '$') {
2033 if ((c1 = (*i_getc)(f)) == EOF) {
2035 (*oconv)(0, ESC); don't send bogus code
2036 (*oconv)(0, '$'); */
2038 } else if (c1 == '@'|| c1 == 'B') {
2039 /* This is kanji introduction */
2042 set_input_codename("ISO-2022-JP");
2043 debug(input_codename);
2045 } else if (c1 == '(') {
2046 if ((c1 = (*i_getc)(f)) == EOF) {
2047 /* don't send bogus code
2053 } else if (c1 == '@'|| c1 == 'B') {
2054 /* This is kanji introduction */
2059 } else if (c1 == 'D'){
2063 #endif /* X0212_ENABLE */
2065 /* could be some special code */
2072 } else if (broken_f&0x2) {
2073 /* accept any ESC-(-x as broken code ... */
2083 } else if (c1 == '(') {
2084 if ((c1 = (*i_getc)(f)) == EOF) {
2085 /* don't send bogus code
2087 (*oconv)(0, '('); */
2091 /* This is X0201 kana introduction */
2092 input_mode = X0201; shift_mode = X0201;
2094 } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2095 /* This is X0208 kanji introduction */
2096 input_mode = ASCII; shift_mode = FALSE;
2098 } else if (broken_f&0x2) {
2099 input_mode = ASCII; shift_mode = FALSE;
2104 /* maintain various input_mode here */
2108 } else if ( c1 == 'N' || c1 == 'n' ){
2110 c3 = (*i_getc)(f); /* skip SS2 */
2111 if ( (SPACE<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2126 } else if ((c1 == NL || c1 == CR) && broken_f&4) {
2127 input_mode = ASCII; set_iconv(FALSE, 0);
2130 } else if (c1 == NL && mime_f && !mime_decode_mode ) {
2131 if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2139 } else if (c1 == CR && mime_f && !mime_decode_mode ) {
2140 if ((c1=(*i_getc)(f))!=EOF) {
2144 } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2161 if (input_mode == X0208)
2162 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2164 else if (input_mode == X0212)
2165 (*oconv)((0x8f << 8) | c2, c1);
2166 #endif /* X0212_ENABLE */
2167 else if (input_mode)
2168 (*oconv)(input_mode, c1); /* other special case */
2169 else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */
2170 int c0 = (*i_getc)(f);
2173 (*iconv)(c2, c1, c0);
2179 /* goto next_word */
2183 (*iconv)(EOF, 0, 0);
2196 /** it must NOT be in the kanji shifte sequence */
2197 /** it must NOT be written in JIS7 */
2198 /** and it must be after 2 byte 8bit code */
2205 while ((c1 = (*i_getc)(f)) != EOF) {
2211 if (push_hold_buf(c1) == EOF || estab_f){
2217 struct input_code *p = input_code_list;
2218 struct input_code *result = p;
2223 if (p->score < result->score){
2228 set_iconv(FALSE, result->iconv_func);
2233 ** 1) EOF is detected, or
2234 ** 2) Code is established, or
2235 ** 3) Buffer is FULL (but last word is pushed)
2237 ** in 1) and 3) cases, we continue to use
2238 ** Kanji codes by oconv and leave estab_f unchanged.
2243 while (wc < hold_count){
2244 c2 = hold_buf[wc++];
2246 #ifdef NUMCHAR_OPTION
2247 || (c2 & CLASS_MASK) == CLASS_UTF16
2252 }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
2253 (*iconv)(X0201, c2, 0);
2256 if (wc < hold_count){
2257 c1 = hold_buf[wc++];
2266 if ((*iconv)(c2, c1, 0) < 0){
2268 if (wc < hold_count){
2269 c0 = hold_buf[wc++];
2278 (*iconv)(c2, c1, c0);
2291 if (hold_count >= HOLD_SIZE*2)
2293 hold_buf[hold_count++] = c2;
2294 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
2297 int s2e_conv(c2, c1, p2, p1)
2302 #ifdef SHIFTJIS_CP932
2303 if (cp932_f && CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){
2304 extern unsigned short shiftjis_cp932[3][189];
2305 val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
2311 #endif /* SHIFTJIS_CP932 */
2313 if (x0212_f && 0xfa <= c2 && c2 <= 0xfc){
2314 extern unsigned short shiftjis_x0212[3][189];
2315 val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
2318 c2 = (0x8f << 8) | (val >> 8);
2330 c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
2332 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
2340 c2 = x0212_unshift(c2);
2355 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2358 int ret = s2e_conv(c2, c1, &c2, &c1);
2359 if (ret) return ret;
2373 }else if (c2 == 0x8f){
2377 c2 = (c2 << 8) | (c1 & 0x7f);
2379 #ifdef SHIFTJIS_CP932
2382 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2383 s2e_conv(s2, s1, &c2, &c1);
2384 if ((c2 & 0xff00) == 0){
2390 #endif /* SHIFTJIS_CP932 */
2391 #endif /* X0212_ENABLE */
2392 } else if (c2 == SSO){
2395 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2405 #ifdef UTF8_INPUT_ENABLE
2407 w2e_conv(c2, c1, c0, p2, p1)
2411 extern unsigned short * utf8_to_euc_2bytes[];
2412 extern unsigned short ** utf8_to_euc_3bytes[];
2415 if (0xc0 <= c2 && c2 <= 0xef) {
2416 unsigned short **pp;
2419 if (c0 == 0) return -1;
2420 pp = utf8_to_euc_3bytes[c2 - 0x80];
2421 ret = w_iconv_common(c1, c0, pp, sizeof_utf8_to_euc_C2, p2, p1);
2423 ret = w_iconv_common(c2, c1, utf8_to_euc_2bytes, sizeof_utf8_to_euc_2bytes, p2, p1);
2425 #ifdef NUMCHAR_OPTION
2428 if (p1) *p1 = CLASS_UTF16 | ww16_conv(c2, c1, c0);
2433 } else if (c2 == X0201) {
2446 int ret = w2e_conv(c2, c1, c0, &c2, &c1);
2454 w16w_conv(val, p2, p1, p0)
2462 }else if (val < 0x800){
2463 *p2 = 0xc0 | (val >> 6);
2464 *p1 = 0x80 | (val & 0x3f);
2467 *p2 = 0xe0 | (val >> 12);
2468 *p1 = 0x80 | ((val >> 6) & 0x3f);
2469 *p0 = 0x80 | (val & 0x3f);
2474 ww16_conv(c2, c1, c0)
2479 val = (c2 & 0x0f) << 12;
2480 val |= (c1 & 0x3f) << 6;
2482 }else if (c2 >= 0xc0){
2483 val = (c2 & 0x1f) << 6;
2492 w16e_conv(val, p2, p1)
2496 extern unsigned short * utf8_to_euc_2bytes[];
2497 extern unsigned short ** utf8_to_euc_3bytes[];
2499 unsigned short **pp;
2503 w16w_conv(val, &c2, &c1, &c0);
2506 pp = utf8_to_euc_3bytes[c2 - 0x80];
2507 psize = sizeof_utf8_to_euc_C2;
2508 ret = w_iconv_common(c1, c0, pp, psize, p2, p1);
2510 pp = utf8_to_euc_2bytes;
2511 psize = sizeof_utf8_to_euc_2bytes;
2512 ret = w_iconv_common(c2, c1, pp, psize, p2, p1);
2514 #ifdef NUMCHAR_OPTION
2517 *p1 = CLASS_UTF16 | val;
2529 w_iconv16(c2, c1, c0)
2534 if (c2==0376 && c1==0377){
2535 utf16_mode = UTF16LE_INPUT;
2537 } else if (c2==0377 && c1==0376){
2538 utf16_mode = UTF16BE_INPUT;
2541 if (c2 != EOF && utf16_mode == UTF16BE_INPUT) {
2543 tmp=c1; c1=c2; c2=tmp;
2545 if ((c2==0 && c1 < 0x80) || c2==EOF) {
2549 ret = w16e_conv(((c2<<8)&0xff00) + c1, &c2, &c1);
2550 if (ret) return ret;
2556 w_iconv_common(c1, c0, pp, psize, p2, p1)
2558 unsigned short **pp;
2566 if (pp == 0) return 1;
2569 if (c1 < 0 || psize <= c1) return 1;
2571 if (p == 0) return 1;
2574 if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1;
2576 if (val == 0) return 1;
2583 if (c2 == SO) c2 = X0201;
2592 #ifdef UTF8_OUTPUT_ENABLE
2597 extern unsigned short euc_to_utf8_1byte[];
2598 extern unsigned short * euc_to_utf8_2bytes[];
2599 extern unsigned short * euc_to_utf8_2bytes_ms[];
2603 p = euc_to_utf8_1byte;
2605 } else if (c2 >> 8 == 0x8f){
2606 extern unsigned short * x0212_to_utf8_2bytes[];
2607 c2 = (c2&0x7f) - 0x21;
2608 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2609 p = x0212_to_utf8_2bytes[c2];
2615 c2 = (c2&0x7f) - 0x21;
2616 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2617 p = ms_ucs_map_f ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
2622 c1 = (c1 & 0x7f) - 0x21;
2623 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
2639 if (unicode_bom_f==2) {
2646 #ifdef NUMCHAR_OPTION
2647 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2648 w16w_conv(c1, &c2, &c1, &c0);
2652 if (c0) (*o_putc)(c0);
2659 output_mode = ASCII;
2661 } else if (c2 == ISO8859_1) {
2662 output_mode = ISO8859_1;
2663 (*o_putc)(c1 | 0x080);
2667 val = e2w_conv(c2, c1);
2669 w16w_conv(val, &c2, &c1, &c0);
2673 if (c0) (*o_putc)(c0);
2689 if (unicode_bom_f==2) {
2691 (*o_putc)((unsigned char)'\377');
2695 (*o_putc)((unsigned char)'\377');
2700 if (c2 == ISO8859_1) {
2703 #ifdef NUMCHAR_OPTION
2704 } else if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16) {
2705 c2 = (c1 >> 8) & 0xff;
2709 unsigned short val = e2w_conv(c2, c1);
2710 c2 = (val >> 8) & 0xff;
2729 #ifdef NUMCHAR_OPTION
2730 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2731 w16e_conv(c1, &c2, &c1);
2732 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2740 } else if (c2 == 0) {
2741 output_mode = ASCII;
2743 } else if (c2 == X0201) {
2744 output_mode = JAPANESE_EUC;
2745 (*o_putc)(SSO); (*o_putc)(c1|0x80);
2746 } else if (c2 == ISO8859_1) {
2747 output_mode = ISO8859_1;
2748 (*o_putc)(c1 | 0x080);
2750 } else if ((c2 & 0xff00) >> 8 == 0x8f){
2751 output_mode = JAPANESE_EUC;
2752 #ifdef SHIFTJIS_CP932
2755 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2756 s2e_conv(s2, s1, &c2, &c1);
2760 if ((c2 & 0xff00) >> 8 == 0x8f){
2763 (*o_putc)((c2 & 0x7f) | 0x080);
2764 (*o_putc)(c1 | 0x080);
2767 (*o_putc)((c2 & 0x7f) | 0x080);
2768 (*o_putc)(c1 | 0x080);
2772 if ((c1<0x21 || 0x7e<c1) ||
2773 (c2<0x21 || 0x7e<c2)) {
2774 set_iconv(FALSE, 0);
2775 return; /* too late to rescue this char */
2777 output_mode = JAPANESE_EUC;
2778 (*o_putc)(c2 | 0x080);
2779 (*o_putc)(c1 | 0x080);
2789 if ((ret & 0xff00) == 0x8f00){
2790 if (0x75 <= c && c <= 0x7f){
2791 ret = c + (0x109 - 0x75);
2794 if (0x75 <= c && c <= 0x7f){
2795 ret = c + (0x113 - 0x75);
2802 int x0212_unshift(c)
2806 if (0x7f <= c && c <= 0x88){
2807 ret = c + (0x75 - 0x7f);
2808 }else if (0x89 <= c && c <= 0x92){
2809 ret = (0x8f << 8) | 0x80 | (c + (0x75 - 0x89));
2813 #endif /* X0212_ENABLE */
2816 e2s_conv(c2, c1, p2, p1)
2817 int c2, c1, *p2, *p1;
2820 unsigned short *ptr;
2822 extern unsigned short *x0212_shiftjis[];
2824 if ((c2 & 0xff00) == 0x8f00){
2826 if (0x21 <= ndx && ndx <= 0x7e){
2827 ptr = x0212_shiftjis[ndx - 0x21];
2829 val = ptr[(c1 & 0x7f) - 0x21];
2839 c2 = x0212_shift(c2);
2841 #endif /* X0212_ENABLE */
2842 if ((c2 & 0xff00) == 0x8f00){
2845 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
2846 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
2855 #ifdef NUMCHAR_OPTION
2856 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
2857 w16e_conv(c1, &c2, &c1);
2863 } else if (c2 == 0) {
2864 output_mode = ASCII;
2866 } else if (c2 == X0201) {
2867 output_mode = SHIFT_JIS;
2869 } else if (c2 == ISO8859_1) {
2870 output_mode = ISO8859_1;
2871 (*o_putc)(c1 | 0x080);
2873 } else if ((c2 & 0xff00) >> 8 == 0x8f){
2874 output_mode = SHIFT_JIS;
2875 if (e2s_conv(c2, c1, &c2, &c1) == 0){
2881 if ((c1<0x20 || 0x7e<c1) ||
2882 (c2<0x20 || 0x7e<c2)) {
2883 set_iconv(FALSE, 0);
2884 return; /* too late to rescue this char */
2886 output_mode = SHIFT_JIS;
2887 e2s_conv(c2, c1, &c2, &c1);
2889 #ifdef SHIFTJIS_CP932
2891 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2892 extern unsigned short cp932inv[2][189];
2893 int c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
2899 #endif /* SHIFTJIS_CP932 */
2902 if (prefix_table[(unsigned char)c1]){
2903 (*o_putc)(prefix_table[(unsigned char)c1]);
2914 #ifdef NUMCHAR_OPTION
2915 if ((c1 & CLASS_MASK) == CLASS_UTF16){
2916 w16e_conv(c1, &c2, &c1);
2920 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2923 (*o_putc)(ascii_intro);
2924 output_mode = ASCII;
2928 } else if ((c2 & 0xff00) >> 8 == 0x8f){
2929 if (output_mode!=X0212) {
2930 output_mode = X0212;
2936 (*o_putc)(c2 & 0x7f);
2939 } else if (c2==X0201) {
2940 if (output_mode!=X0201) {
2941 output_mode = X0201;
2947 } else if (c2==ISO8859_1) {
2948 /* iso8859 introduction, or 8th bit on */
2949 /* Can we convert in 7bit form using ESC-'-'-A ?
2951 output_mode = ISO8859_1;
2953 } else if (c2 == 0) {
2954 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
2957 (*o_putc)(ascii_intro);
2958 output_mode = ASCII;
2962 if (output_mode != X0208) {
2963 output_mode = X0208;
2966 (*o_putc)(kanji_intro);
2968 if (c1<0x20 || 0x7e<c1)
2970 if (c2<0x20 || 0x7e<c2)
2982 if (base64_count>50 && !mimeout_mode && c2==0 && c1==SPACE) {
2985 } else if (base64_count>66 && mimeout_mode) {
2986 (*o_base64conv)(EOF,0);
2987 (*o_base64conv)(NL,0);
2988 (*o_base64conv)(SPACE,0);
2990 (*o_base64conv)(c2,c1);
2994 static int broken_buf[3];
2995 static int broken_counter = 0;
2996 static int broken_last = 0;
3003 if (broken_counter>0) {
3004 return broken_buf[--broken_counter];
3007 if (c=='$' && broken_last != ESC
3008 && (input_mode==ASCII || input_mode==X0201)) {
3011 if (c1=='@'|| c1=='B') {
3012 broken_buf[0]=c1; broken_buf[1]=c;
3019 } else if (c=='(' && broken_last != ESC
3020 && (input_mode==X0208 || input_mode==X0201)) { /* ) */
3023 if (c1=='J'|| c1=='B') {
3024 broken_buf[0]=c1; broken_buf[1]=c;
3042 if (broken_counter<2)
3043 broken_buf[broken_counter++]=c;
3047 static int prev_cr = 0;
3055 if (! (c2==0&&c1==NL) ) {
3061 } else if (c1=='\r') {
3063 } else if (c1=='\n') {
3064 if (crmode_f==CRLF) {
3065 (*o_crconv)(0,'\r');
3066 } else if (crmode_f==CR) {
3067 (*o_crconv)(0,'\r');
3071 } else if (c1!='\032' || crmode_f!=NL){
3077 Return value of fold_conv()
3079 \n add newline and output char
3080 \r add newline and output nothing
3083 1 (or else) normal output
3085 fold state in prev (previous character)
3087 >0x80 Japanese (X0208/X0201)
3092 This fold algorthm does not preserve heading space in a line.
3093 This is the main difference from fmt.
3096 #define char_size(c2,c1) (c2?2:1)
3105 if (c1== '\r' && !fold_preserve_f) {
3106 fold_state=0; /* ignore cr */
3107 }else if (c1== '\n'&&f_prev=='\r' && fold_preserve_f) {
3109 fold_state=0; /* ignore cr */
3110 } else if (c1== BS) {
3111 if (f_line>0) f_line--;
3113 } else if (c2==EOF && f_line != 0) { /* close open last line */
3115 } else if ((c1=='\n' && !fold_preserve_f)
3116 || ((c1=='\r'||(c1=='\n'&&f_prev!='\r'))
3117 && fold_preserve_f)) {
3119 if (fold_preserve_f) {
3123 } else if ((f_prev == c1 && !fold_preserve_f)
3124 || (f_prev == '\n' && fold_preserve_f)
3125 ) { /* duplicate newline */
3128 fold_state = '\n'; /* output two newline */
3134 if (f_prev&0x80) { /* Japanese? */
3136 fold_state = 0; /* ignore given single newline */
3137 } else if (f_prev==' ') {
3141 if (++f_line<=fold_len)
3145 fold_state = '\r'; /* fold and output nothing */
3149 } else if (c1=='\f') {
3154 fold_state = '\n'; /* output newline and clear */
3155 } else if ( (c2==0 && c1==' ')||
3156 (c2==0 && c1=='\t')||
3157 (c2=='!'&& c1=='!')) {
3158 /* X0208 kankaku or ascii space */
3159 if (f_prev == ' ') {
3160 fold_state = 0; /* remove duplicate spaces */
3163 if (++f_line<=fold_len)
3164 fold_state = ' '; /* output ASCII space only */
3166 f_prev = ' '; f_line = 0;
3167 fold_state = '\r'; /* fold and output nothing */
3171 prev0 = f_prev; /* we still need this one... , but almost done */
3173 if (c2 || c2==X0201)
3174 f_prev |= 0x80; /* this is Japanese */
3175 f_line += char_size(c2,c1);
3176 if (f_line<=fold_len) { /* normal case */
3179 if (f_line>=fold_len+fold_margin) { /* too many kinsou suspension */
3180 f_line = char_size(c2,c1);
3181 fold_state = '\n'; /* We can't wait, do fold now */
3182 } else if (c2==X0201) {
3183 /* simple kinsoku rules return 1 means no folding */
3184 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
3185 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
3186 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
3187 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
3188 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
3189 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
3190 else if (SPACE<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
3192 fold_state = '\n';/* add one new f_line before this character */
3195 fold_state = '\n';/* add one new f_line before this character */
3198 /* kinsoku point in ASCII */
3199 if ( c1==')'|| /* { [ ( */
3210 /* just after special */
3211 } else if (!is_alnum(prev0)) {
3212 f_line = char_size(c2,c1);
3214 } else if ((prev0==' ') || /* ignored new f_line */
3215 (prev0=='\n')|| /* ignored new f_line */
3216 (prev0&0x80)) { /* X0208 - ASCII */
3217 f_line = char_size(c2,c1);
3218 fold_state = '\n';/* add one new f_line before this character */
3220 fold_state = 1; /* default no fold in ASCII */
3224 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
3225 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
3226 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
3227 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
3228 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
3229 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
3230 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
3231 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
3232 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
3233 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
3234 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
3235 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
3236 /* default no fold in kinsoku */
3239 f_line = char_size(c2,c1);
3240 /* add one new f_line before this character */
3243 f_line = char_size(c2,c1);
3245 /* add one new f_line before this character */
3250 /* terminator process */
3251 switch(fold_state) {
3270 int z_prev2=0,z_prev1=0;
3277 /* if (c2) c1 &= 0x7f; assertion */
3279 if (x0201_f && z_prev2==X0201) { /* X0201 */
3280 if (c1==(0xde&0x7f)) { /*
\e$BByE@
\e(B */
3282 (*o_zconv)(dv[(z_prev1-SPACE)*2],dv[(z_prev1-SPACE)*2+1]);
3284 } else if (c1==(0xdf&0x7f)&&ev[(z_prev1-SPACE)*2]) { /*
\e$BH>ByE@
\e(B */
3286 (*o_zconv)(ev[(z_prev1-SPACE)*2],ev[(z_prev1-SPACE)*2+1]);
3290 (*o_zconv)(cv[(z_prev1-SPACE)*2],cv[(z_prev1-SPACE)*2+1]);
3299 if (x0201_f && c2==X0201) {
3300 if (dv[(c1-SPACE)*2]||ev[(c1-SPACE)*2]) {
3301 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
3302 z_prev1 = c1; z_prev2 = c2;
3305 (*o_zconv)(cv[(c1-SPACE)*2],cv[(c1-SPACE)*2+1]);
3310 /* JISX0208 Alphabet */
3311 if (alpha_f && c2 == 0x23 ) {
3313 } else if (alpha_f && c2 == 0x21 ) {
3314 /* JISX0208 Kigou */
3319 } else if (alpha_f&0x4) {
3324 } else if (0x20<c1 && c1<0x7f && fv[c1-0x20]) {
3330 case '>': entity = ">"; break;
3331 case '<': entity = "<"; break;
3332 case '\"': entity = """; break;
3333 case '&': entity = "&"; break;
3336 while (*entity) (*o_zconv)(0, *entity++);
3346 #define rot13(c) ( \
3348 (c <= 'M') ? (c + 13): \
3349 (c <= 'Z') ? (c - 13): \
3351 (c <= 'm') ? (c + 13): \
3352 (c <= 'z') ? (c - 13): \
3356 #define rot47(c) ( \
3358 ( c <= 'O' ) ? (c + 47) : \
3359 ( c <= '~' ) ? (c - 47) : \
3367 if (c2==0 || c2==X0201 || c2==ISO8859_1) {
3373 (*o_rot_conv)(c2,c1);
3380 if ((hira_f & 1) && c2==0x25 && 0x20<c1 && c1<0x74) {
3382 } else if ((hira_f & 2) && c2==0x24 && 0x20<c1 && c1<0x74) {
3385 (*o_hira_conv)(c2,c1);
3390 iso2022jp_check_conv(c2,c1)
3393 static int range[RANGE_NUM_MAX][2] = {
3416 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
3420 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
3425 for (i = 0; i < RANGE_NUM_MAX; i++) {
3426 start = range[i][0];
3429 if (c >= start && c <= end) {
3434 (*o_iso2022jp_check_conv)(c2,c1);
3438 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
3440 unsigned char *mime_pattern[] = {
3441 (unsigned char *)"\075?EUC-JP?B?",
3442 (unsigned char *)"\075?SHIFT_JIS?B?",
3443 (unsigned char *)"\075?ISO-8859-1?Q?",
3444 (unsigned char *)"\075?ISO-8859-1?B?",
3445 (unsigned char *)"\075?ISO-2022-JP?B?",
3446 (unsigned char *)"\075?ISO-2022-JP?Q?",
3447 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3448 (unsigned char *)"\075?UTF-8?B?",
3449 (unsigned char *)"\075?UTF-8?Q?",
3451 (unsigned char *)"\075?US-ASCII?Q?",
3456 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
3457 int (*mime_priority_func[])PROTO((int c2, int c1, int c0)) = {
3458 e_iconv, s_iconv, 0, 0, 0, 0,
3459 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3465 int mime_encode[] = {
3466 JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201,
3467 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3474 int mime_encode_method[] = {
3475 'B', 'B','Q', 'B', 'B', 'Q',
3476 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3484 #define MAXRECOVER 20
3486 /* I don't trust portablity of toupper */
3487 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
3488 #define nkf_isdigit(c) ('0'<=c && c<='9')
3489 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
3494 if (i_getc!=mime_getc) {
3495 i_mgetc = i_getc; i_getc = mime_getc;
3496 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
3497 if(mime_f==STRICT_MIME) {
3498 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
3499 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
3505 unswitch_mime_getc()
3507 if(mime_f==STRICT_MIME) {
3508 i_mgetc = i_mgetc_buf;
3509 i_mungetc = i_mungetc_buf;
3512 i_ungetc = i_mungetc;
3516 mime_begin_strict(f)
3521 unsigned char *p,*q;
3522 int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
3524 mime_decode_mode = FALSE;
3525 /* =? has been checked */
3527 p = mime_pattern[j];
3530 for(i=2;p[i]>' ';i++) { /* start at =? */
3531 if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) {
3532 /* pattern fails, try next one */
3534 while ((p = mime_pattern[++j])) {
3535 for(k=2;k<i;k++) /* assume length(p) > i */
3536 if (p[k]!=q[k]) break;
3537 if (k==i && nkf_toupper(c1)==p[k]) break;
3539 if (p) continue; /* found next one, continue */
3540 /* all fails, output from recovery buffer */
3548 mime_decode_mode = p[i-2];
3550 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
3552 if (mime_decode_mode=='B') {
3553 mimebuf_f = unbuf_f;
3555 /* do MIME integrity check */
3556 return mime_integrity(f,mime_pattern[j]);
3568 /* we don't keep eof of Fifo, becase it contains ?= as
3569 a terminator. It was checked in mime_integrity. */
3570 return ((mimebuf_f)?
3571 (*i_mgetc_buf)(f):Fifo(mime_input++));
3575 mime_ungetc_buf(c,f)
3580 (*i_mungetc_buf)(c,f);
3582 Fifo(--mime_input)=c;
3593 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
3594 /* re-read and convert again from mime_buffer. */
3596 /* =? has been checked */
3598 Fifo(mime_last++)='='; Fifo(mime_last++)='?';
3599 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
3600 /* We accept any character type even if it is breaked by new lines */
3601 c1 = (*i_getc)(f); Fifo(mime_last++)= c1 ;
3602 if (c1=='\n'||c1==' '||c1=='\r'||
3603 c1=='-'||c1=='_'||is_alnum(c1) ) continue;
3605 /* Failed. But this could be another MIME preemble */
3613 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3614 if (!(++i<MAXRECOVER) || c1==EOF) break;
3615 if (c1=='b'||c1=='B') {
3616 mime_decode_mode = 'B';
3617 } else if (c1=='q'||c1=='Q') {
3618 mime_decode_mode = 'Q';
3622 c1 = (*i_getc)(f); Fifo(mime_last++) = c1;
3623 if (!(++i<MAXRECOVER) || c1==EOF) break;
3625 mime_decode_mode = FALSE;
3631 if (!mime_decode_mode) {
3632 /* false MIME premble, restart from mime_buffer */
3633 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
3634 /* Since we are in MIME mode until buffer becomes empty, */
3635 /* we never go into mime_begin again for a while. */
3638 /* discard mime preemble, and goto MIME mode */
3640 /* do no MIME integrity check */
3641 return c1; /* used only for checking EOF */
3656 fprintf(stderr, "%s\n", str);
3662 set_input_codename (codename)
3667 strcmp(codename, "") != 0 &&
3668 strcmp(codename, input_codename) != 0)
3670 is_inputcode_mixed = TRUE;
3672 input_codename = codename;
3673 is_inputcode_set = TRUE;
3677 print_guessed_code (filename)
3680 char *codename = "BINARY";
3681 if (!is_inputcode_mixed) {
3682 if (strcmp(input_codename, "") == 0) {
3685 codename = input_codename;
3688 if (filename != NULL) printf("%s:", filename);
3689 printf("%s\n", codename);
3696 if (nkf_isdigit(x)) return x - '0';
3697 return nkf_toupper(x) - 'A' + 10;
3702 #ifdef ANSI_C_PROTOTYPE
3703 int hex_getc(int ch, FILE *f, int (*g)(FILE *f), int (*u)(int c, FILE *f))
3706 hex_getc(ch, f, g, u)
3719 if (!nkf_isxdigit(c2)){
3724 if (!nkf_isxdigit(c3)){
3729 return (hex2bin(c2) << 4) | hex2bin(c3);
3736 return hex_getc(':', f, i_cgetc, i_cungetc);
3744 return (*i_cungetc)(c, f);
3751 return hex_getc('%', f, i_ugetc, i_uungetc);
3759 return (*i_uungetc)(c, f);
3763 #ifdef NUMCHAR_OPTION
3768 int (*g)() = i_ngetc;
3769 int (*u)() = i_nungetc;
3780 if (buf[i] == 'x' || buf[i] == 'X'){
3781 for (j = 0; j < 5; j++){
3783 if (!nkf_isxdigit(buf[i])){
3790 c |= hex2bin(buf[i]);
3793 for (j = 0; j < 6; j++){
3797 if (!nkf_isdigit(buf[i])){
3804 c += hex2bin(buf[i]);
3810 return CLASS_UTF16 | c;
3820 numchar_ungetc(c, f)
3824 return (*i_nungetc)(c, f);
3833 int c1, c2, c3, c4, cc;
3834 int t1, t2, t3, t4, mode, exit_mode;
3838 int lwsp_size = 128;
3840 if (mime_top != mime_last) { /* Something is in FIFO */
3841 return Fifo(mime_top++);
3843 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
3844 mime_decode_mode=FALSE;
3845 unswitch_mime_getc();
3846 return (*i_getc)(f);
3849 if (mimebuf_f == FIXED_MIME)
3850 exit_mode = mime_decode_mode;
3853 if (mime_decode_mode == 'Q') {
3854 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3856 if (c1=='_') return ' ';
3857 if (c1!='=' && c1!='?') {
3861 mime_decode_mode = exit_mode; /* prepare for quit */
3862 if (c1<=' ') return c1;
3863 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
3864 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
3865 /* end Q encoding */
3866 input_mode = exit_mode;
3868 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
3869 if (lwsp_buf==NULL) {
3870 perror("can't malloc");
3873 while ((c1=(*i_getc)(f))!=EOF) {
3878 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
3886 if ((c1=(*i_getc)(f))!=EOF && c1 == NL) {
3887 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
3902 lwsp_buf[lwsp_count] = c1;
3903 if (lwsp_count++>lwsp_size){
3905 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
3906 if (lwsp_buf_new==NULL) {
3909 perror("can't realloc");
3912 lwsp_buf = lwsp_buf_new;
3918 if (lwsp_count > 0) {
3919 if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) {
3923 for(lwsp_count--;lwsp_count>0;lwsp_count--)
3924 i_ungetc(lwsp_buf[lwsp_count],f);
3932 if (c1=='='&&c2<' ') { /* this is soft wrap */
3933 while((c1 = (*i_mgetc)(f)) <=' ') {
3934 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
3936 mime_decode_mode = 'Q'; /* still in MIME */
3937 goto restart_mime_q;
3940 mime_decode_mode = 'Q'; /* still in MIME */
3944 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
3945 if (c2<=' ') return c2;
3946 mime_decode_mode = 'Q'; /* still in MIME */
3947 #define hex(c) (('0'<=c&&c<='9')?(c-'0'):\
3948 ('A'<=c&&c<='F')?(c-'A'+10):('a'<=c&&c<='f')?(c-'a'+10):0)
3949 return ((hex(c2)<<4) + hex(c3));
3952 if (mime_decode_mode != 'B') {
3953 mime_decode_mode = FALSE;
3954 return (*i_mgetc)(f);
3958 /* Base64 encoding */
3960 MIME allows line break in the middle of
3961 Base64, but we are very pessimistic in decoding
3962 in unbuf mode because MIME encoded code may broken by
3963 less or editor's control sequence (such as ESC-[-K in unbuffered
3964 mode. ignore incomplete MIME.
3966 mode = mime_decode_mode;
3967 mime_decode_mode = exit_mode; /* prepare for quit */
3969 while ((c1 = (*i_mgetc)(f))<=' ') {
3974 if ((c2 = (*i_mgetc)(f))<=' ') {
3977 if (mime_f != STRICT_MIME) goto mime_c2_retry;
3978 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
3981 if ((c1 == '?') && (c2 == '=')) {
3984 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
3985 if (lwsp_buf==NULL) {
3986 perror("can't malloc");
3989 while ((c1=(*i_getc)(f))!=EOF) {
3994 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4002 if ((c1=(*i_getc)(f))!=EOF) {
4006 } else if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4021 lwsp_buf[lwsp_count] = c1;
4022 if (lwsp_count++>lwsp_size){
4024 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4025 if (lwsp_buf_new==NULL) {
4028 perror("can't realloc");
4031 lwsp_buf = lwsp_buf_new;
4037 if (lwsp_count > 0) {
4038 if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) {
4042 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4043 i_ungetc(lwsp_buf[lwsp_count],f);
4052 if ((c3 = (*i_mgetc)(f))<=' ') {
4055 if (mime_f != STRICT_MIME) goto mime_c3_retry;
4056 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4060 if ((c4 = (*i_mgetc)(f))<=' ') {
4063 if (mime_f != STRICT_MIME) goto mime_c4_retry;
4064 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4068 mime_decode_mode = mode; /* still in MIME sigh... */
4070 /* BASE 64 decoding */
4072 t1 = 0x3f & base64decode(c1);
4073 t2 = 0x3f & base64decode(c2);
4074 t3 = 0x3f & base64decode(c3);
4075 t4 = 0x3f & base64decode(c4);
4076 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
4078 Fifo(mime_last++) = cc;
4079 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
4081 Fifo(mime_last++) = cc;
4082 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
4084 Fifo(mime_last++) = cc;
4089 return Fifo(mime_top++);
4097 Fifo(--mime_top) = c;
4108 /* In buffered mode, read until =? or NL or buffer full
4110 mime_input = mime_top;
4111 mime_last = mime_top;
4112 while(*p) Fifo(mime_input++) = *p++;
4115 while((c=(*i_getc)(f))!=EOF) {
4116 if (((mime_input-mime_top)&MIME_BUF_MASK)==0) {
4117 break; /* buffer full */
4119 if (c=='=' && d=='?') {
4120 /* checked. skip header, start decode */
4121 Fifo(mime_input++) = c;
4122 /* mime_last_input = mime_input; */
4127 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
4129 /* Should we check length mod 4? */
4130 Fifo(mime_input++) = c;
4133 /* In case of Incomplete MIME, no MIME decode */
4134 Fifo(mime_input++) = c;
4135 mime_last = mime_input; /* point undecoded buffer */
4136 mime_decode_mode = 1; /* no decode on Fifo last in mime_getc */
4137 switch_mime_getc(); /* anyway we need buffered getc */
4148 i = c - 'A'; /* A..Z 0-25 */
4150 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
4152 } else if (c > '/') {
4153 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
4154 } else if (c == '+') {
4155 i = '>' /* 62 */ ; /* + 62 */
4157 i = '?' /* 63 */ ; /* / 63 */
4162 static char basis_64[] =
4163 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
4166 #define MIMEOUT_BUF_LENGTH (60)
4167 char mimeout_buf[MIMEOUT_BUF_LENGTH+1];
4168 int mimeout_buf_count = 0;
4169 int mimeout_preserve_space = 0;
4170 #define itoh4(c) (c>=10?c+'A'-10:c+'0')
4179 p = mime_pattern[0];
4180 for(i=0;mime_encode[i];i++) {
4181 if (mode == mime_encode[i]) {
4182 p = mime_pattern[i];
4186 mimeout_mode = mime_encode_method[i];
4189 if (base64_count>45) {
4193 if (!mimeout_preserve_space && mimeout_buf_count>0
4194 && (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
4195 || mimeout_buf[i]==CR || mimeout_buf[i]==NL )) {
4199 if (!mimeout_preserve_space) {
4200 for (;i<mimeout_buf_count;i++) {
4201 if (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
4202 || mimeout_buf[i]==CR || mimeout_buf[i]==NL ) {
4203 (*o_mputc)(mimeout_buf[i]);
4210 mimeout_preserve_space = FALSE;
4216 j = mimeout_buf_count;
4217 mimeout_buf_count = 0;
4219 mime_putc(mimeout_buf[i]);
4235 switch(mimeout_mode) {
4240 (*o_mputc)(basis_64[((b64c & 0x3)<< 4)]);
4246 (*o_mputc)(basis_64[((b64c & 0xF) << 2)]);
4252 if (mimeout_f!=FIXED_MIME) {
4254 } else if (mimeout_mode != 'Q')
4263 switch(mimeout_mode) {
4268 } else if (c==CR||c==NL) {
4271 } else if(c<SPACE||c=='='||c=='?'||c=='_'||DEL<=c) {
4273 (*o_mputc)(itoh4(((c>>4)&0xf)));
4274 (*o_mputc)(itoh4((c&0xf)));
4283 (*o_mputc)(basis_64[c>>2]);
4288 (*o_mputc)(basis_64[((b64c & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
4294 (*o_mputc)(basis_64[((b64c & 0xF) << 2) | ((c & 0xC0) >>6)]);
4295 (*o_mputc)(basis_64[c & 0x3F]);
4313 if (mimeout_f==FIXED_MIME && base64_count>71) {
4314 if (mimeout_mode=='Q') {
4315 if (c!=CR && c!=NL) {
4324 } else if (mimeout_f!=FIXED_MIME && !mimeout_mode && (c==CR||c==NL)) {
4327 if (c!=EOF && mimeout_f!=FIXED_MIME) {
4328 if ( c<=DEL &&(output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
4329 if (mimeout_mode=='Q') {
4338 } else if (mimeout_mode) {
4339 if (mimeout_buf_count>0
4340 && (mimeout_buf[mimeout_buf_count-1]==CR || mimeout_buf[mimeout_buf_count-1]==NL)) {
4341 if (c==SPACE || c==TAB) {
4342 for (i=0;i<mimeout_buf_count;i++) {
4343 mimeout_addchar(mimeout_buf[i]);
4345 mimeout_buf_count = 0;
4346 } else if (SPACE<c && c<DEL) {
4348 for (i=0;i<mimeout_buf_count;i++) {
4349 (*o_mputc)(mimeout_buf[i]);
4352 mimeout_buf_count = 0;
4355 if (c==SPACE || c==TAB || c==CR || c==NL) {
4356 for (i=0;i<mimeout_buf_count;i++) {
4357 if (SPACE<mimeout_buf[i] && mimeout_buf[i]<DEL) {
4359 for (i=0;i<mimeout_buf_count;i++) {
4360 (*o_mputc)(mimeout_buf[i]);
4363 mimeout_buf_count = 0;
4366 mimeout_buf[mimeout_buf_count++] = c;
4367 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
4369 for (i=0;i<mimeout_buf_count;i++) {
4370 (*o_mputc)(mimeout_buf[i]);
4373 mimeout_buf_count = 0;
4378 if (mimeout_buf_count>0 && SPACE<c && c!='=') {
4379 mimeout_buf[mimeout_buf_count++] = c;
4380 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
4381 j = mimeout_buf_count;
4382 mimeout_buf_count = 0;
4384 mimeout_addchar(mimeout_buf[i]);
4389 } else if (!mimeout_mode) {
4390 if (c==SPACE || c==TAB || c==CR || c==NL) {
4391 if ((c==CR || c==NL)
4392 &&(mimeout_buf[mimeout_buf_count-1]==SPACE
4393 || mimeout_buf[mimeout_buf_count-1]==TAB)) {
4394 mimeout_buf_count--;
4396 for (i=0;i<mimeout_buf_count;i++) {
4397 (*o_mputc)(mimeout_buf[i]);
4400 mimeout_buf_count = 0;
4402 mimeout_buf[mimeout_buf_count++] = c;
4403 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
4404 open_mime(output_mode);
4408 } else if (!mimeout_mode) {
4409 if (mimeout_buf_count>0 && mimeout_buf[mimeout_buf_count-1]==SPACE) {
4410 for (i=0;i<mimeout_buf_count-1;i++) {
4411 (*o_mputc)(mimeout_buf[i]);
4414 mimeout_buf[0] = SPACE;
4415 mimeout_buf_count = 1;
4417 open_mime(output_mode);
4419 } else if (c == EOF) { /* c==EOF */
4420 j = mimeout_buf_count;
4421 mimeout_buf_count = 0;
4424 if (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
4425 || mimeout_buf[i]==CR || mimeout_buf[i]==NL)
4427 mimeout_addchar(mimeout_buf[i]);
4431 (*o_mputc)(mimeout_buf[i]);
4437 if (mimeout_buf_count>0) {
4438 j = mimeout_buf_count;
4439 mimeout_buf_count = 0;
4441 if (mimeout_buf[i]==CR || mimeout_buf[i]==NL)
4443 mimeout_addchar(mimeout_buf[i]);
4449 (*o_mputc)(mimeout_buf[i]);
4451 open_mime(output_mode);
4463 struct input_code *p = input_code_list;
4476 mime_f = STRICT_MIME;
4481 #if defined(MSDOS) || defined(__OS2__)
4486 iso2022jp_f = FALSE;
4487 #ifdef UTF8_OUTPUT_ENABLE
4490 ms_ucs_map_f = FALSE;
4502 is_inputcode_mixed = FALSE;
4503 is_inputcode_set = FALSE;
4507 #ifdef SHIFTJIS_CP932
4513 for (i = 0; i < 256; i++){
4514 prefix_table[i] = 0;
4517 #ifdef UTF8_INPUT_ENABLE
4518 utf16_mode = UTF16LE_INPUT;
4520 mimeout_buf_count = 0;
4525 fold_preserve_f = FALSE;
4528 kanji_intro = DEFAULT_J;
4529 ascii_intro = DEFAULT_R;
4530 fold_margin = FOLD_MARGIN;
4531 output_conv = DEFAULT_CONV;
4532 oconv = DEFAULT_CONV;
4533 o_zconv = no_connection;
4534 o_fconv = no_connection;
4535 o_crconv = no_connection;
4536 o_rot_conv = no_connection;
4537 o_hira_conv = no_connection;
4538 o_base64conv = no_connection;
4539 o_iso2022jp_check_conv = no_connection;
4542 i_ungetc = std_ungetc;
4544 i_bungetc = std_ungetc;
4547 i_mungetc = std_ungetc;
4548 i_mgetc_buf = std_getc;
4549 i_mungetc_buf = std_ungetc;
4550 output_mode = ASCII;
4553 mime_decode_mode = FALSE;
4559 z_prev2=0,z_prev1=0;
4561 iconv_for_check = 0;
4567 no_connection(c2,c1)
4570 no_connection2(c2,c1,0);
4574 no_connection2(c2,c1,c0)
4577 fprintf(stderr,"nkf internal module connection failure.\n");
4585 fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
4586 fprintf(stderr,"Flags:\n");
4587 fprintf(stderr,"b,u Output is buffered (DEFAULT),Output is unbuffered\n");
4588 #ifdef DEFAULT_CODE_SJIS
4589 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC), UTF-8\n");
4591 #ifdef DEFAULT_CODE_JIS
4592 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC), UTF-8\n");
4594 #ifdef DEFAULT_CODE_EUC
4595 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT), UTF-8\n");
4597 #ifdef DEFAULT_CODE_UTF8
4598 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC), UTF-8 (DEFAULT)\n");
4600 #ifdef UTF8_OUTPUT_ENABLE
4601 fprintf(stderr," After 'w' you can add more options. (80?|16((B|L)0?)?) \n");
4603 fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC), UTF-8\n");
4604 #ifdef UTF8_INPUT_ENABLE
4605 fprintf(stderr," After 'W' you can add more options. (8|16(B|L)?) \n");
4607 fprintf(stderr,"t no conversion\n");
4608 fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n");
4609 fprintf(stderr,"r {de/en}crypt ROT13/47\n");
4610 fprintf(stderr,"h 1 hirakana->katakana, 2 katakana->hirakana,3 both\n");
4611 fprintf(stderr,"v Show this usage. V: show version\n");
4612 fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
4613 fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
4614 fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
4615 fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
4616 fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces,\n");
4617 fprintf(stderr," 3: Convert HTML Entity\n");
4618 fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
4619 fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
4621 fprintf(stderr,"T Text mode output\n");
4623 fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
4624 fprintf(stderr,"d,c Delete \\r in line feed and \\032, Add \\r in line feed\n");
4625 fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n");
4626 fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
4627 fprintf(stderr,"long name options\n");
4628 fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n");
4629 fprintf(stderr," --jis,--euc,--sjis,--utf8,--utf16,--mime,--base64 convert for the code\n");
4630 fprintf(stderr," --hiragana, --katakana Hiragana/Katakana Conversion\n");
4631 fprintf(stderr," --cp932, --no-cp932 CP932 compatible\n");
4633 fprintf(stderr," --cap-input, --url-input Convert hex after ':' or '%%'\n");
4635 #ifdef NUMCHAR_OPTION
4636 fprintf(stderr," --numchar-input Convert Unicode Character Reference\n");
4638 #ifdef UTF8_OUTPUT_ENABLE
4639 fprintf(stderr," --ms-ucs-map Microsoft UCS Mapping Compatible\n");
4642 fprintf(stderr," --overwrite Overwrite original listed files by filtered result\n");
4644 fprintf(stderr," -g, --guess Guess the input code\n");
4645 fprintf(stderr," --help,--version\n");
4652 fprintf(stderr,"Network Kanji Filter Version %s (%s) "
4653 #if defined(MSDOS) && !defined(__WIN32__) && !defined(__WIN16__)
4656 #if defined(MSDOS) && defined(__WIN16__)
4659 #if defined(MSDOS) && defined(__WIN32__)
4665 ,NKF_VERSION,NKF_RELEASE_DATE);
4666 fprintf(stderr,"\n%s\n",CopyRight);
4671 **
\e$B%Q%C%A@):n<T
\e(B
4672 ** void@merope.pleiades.or.jp (Kusakabe Youichi)
4673 ** NIDE Naoyuki <nide@ics.nara-wu.ac.jp>
4674 ** ohta@src.ricoh.co.jp (Junn Ohta)
4675 ** inouet@strl.nhk.or.jp (Tomoyuki Inoue)
4676 ** kiri@pulser.win.or.jp (Tetsuaki Kiriyama)
4677 ** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp>
4678 ** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe)
4679 ** kono@ie.u-ryukyu.ac.jp (Shinji Kono)
4680 ** GHG00637@nifty-serve.or.jp (COW)