Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif /* HAVE_CONFIG_H */
#include "timidity.h"
#ifdef JAPANESE
+
/** Network Kanji Filter. (PDS Version)
************************************************************************
** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
-** Ï¢ÍíÀ衧 ¡Ê³ô¡ËÉÙ»ÎÄ̸¦µæ½ê¡¡¥½¥Õ¥È£³¸¦¡¡»ÔÀî¡¡»ê
+** Ï¢ÍíÀ衧 ¡Ê³ô¡ËÉÙ»ÎÄ̸¦µæ½ê¡¡¥½¥Õ¥È£³¸¦¡¡»ÔÀî¡¡»ê
** ¡ÊE-Mail Address: ichikawa@flab.fujitsu.co.jp¡Ë
-** ±ÄÍø¤òÌÜŪ¤È¤·¤Ê¤¤¸Â¤ê¡¢¤³¤Î¥½¡¼¥¹¤Î¤¤¤«¤Ê¤ë
-** Ê£¼Ì¡¤²þÊÑ¡¤½¤Àµ¤âµöÂú¤·¤Þ¤¹¡£¤½¤ÎºÝ¤Ë¤Ï¡¢¤³¤ÎÉôʬ¤ò»Ä¤¹¤³¤È¡£
+** Copyright (C) 1996,1998
+** Ï¢ÍíÀ衧 ΰµåÂç³Ø¾ðÊó¹©³Ø²Ê ²ÏÌî ¿¿¼£ mine/X0208 support
+** ¡ÊE-Mail Address: kono@ie.u-ryukyu.ac.jp¡Ë
+** Ï¢ÍíÀ衧 COW for DOS & Win16 & Win32 & OS/2
+** ¡ÊE-Mail Address: GHG00637@niftyserve.or.p¡Ë
+** ¤³¤Î¥½¡¼¥¹¤Î¤¤¤«¤Ê¤ëÊ£¼Ì¡¤²þÊÑ¡¤½¤Àµ¤âµöÂú¤·¤Þ¤¹¡£¤¿¤À¤·¡¢
+** ¤½¤ÎºÝ¤Ë¤Ï¡¢Ã¯¤¬¹×¸¥¤·¤¿¤ò¼¨¤¹¤³¤ÎÉôʬ¤ò»Ä¤¹¤³¤È¡£
+** ºÆÇÛÉۤ仨»ï¤ÎÉÕÏ¿¤Ê¤É¤ÎÌ䤤¹ç¤ï¤»¤âɬÍפ¢¤ê¤Þ¤»¤ó¡£
** ¤³¤Î¥×¥í¥°¥é¥à¤Ë¤Ä¤¤¤Æ¤ÏÆä˲¿¤ÎÊݾڤ⤷¤Ê¤¤¡¢°¤·¤«¤é¤º¡£
-** Everyone is permitted to do anything on this program
-** including copying, modifying, improving
-** as long as you don't try to make money off it,
-** or pretend that you wrote it.
+** Everyone is permitted to do anything on this program
+** including copying, modifying, improving.
+** as long as you don't try to pretend that you wrote it.
** i.e., the above copyright notice has to appear in all copies.
+** You don't have to ask before copying or publishing.
** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
***********************************************************************/
²þÊÑ¼Ô ½Ð±ÀÀµ¾° 1997
*/
+/*
+ 1.7¥Ù¡¼¥¹¤ËÊѹ¹¡£
+ ²þÊÑ¼Ô ÀÄÌÚÂçÊå 2000.10
+*/
+
/* ¤â¤·¡¤EUC-Japan ¤Î´°Á´¤Ê¥Á¥§¥Ã¥¯¤ò¤¹¤ë¾ì¹ç¤Ï EUC_STRICT_CHECK ¤òÄêµÁ
* ¤·¤Æ¤¯¤À¤µ¤¤¡¥¤¿¤À¤·¡¤1 ¥Ð¥¤¥È¤Ç¤â EUC-Japan ¤Î̤ÄêµÁʸ»ú¤¬´Þ¤Þ¤ì¤Æ¤¤¤ë¤È
* EUC ¤È¤ß¤Ê¤µ¤ì¤Ê¤¯¤Ê¤Ã¤Æ¤·¤Þ¤¤¤Þ¤¹¡¥Â¾¤Î¥×¥í¥°¥é¥à¤Ç´Á»ú¥³¡¼¥É¤ò EUC ¤ËÊÑ´¹
#if 0
static char *CopyRight =
- "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa)";
+ "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),1998 S. Kono, COW";
static char *Version =
- "1.5";
+ "1.7";
static char *Patchlevel =
- "1/9503/Shinji Kono";
+ "0/9711/Shinji Kono";
#endif
+/*
+**
+**
+**
+** USAGE: nkf [flags] [file]
+**
+** Flags:
+** b Output is bufferred (DEFAULT)
+** u Output is unbufferred
+**
+** t no operation
+**
+** j Outout code is JIS 7 bit (DEFAULT SELECT)
+** s Output code is MS Kanji (DEFAULT SELECT)
+** e Output code is AT&T JIS (DEFAULT SELECT)
+** l Output code is JIS 7bit and ISO8859-1 Latin-1
+**
+** m MIME conversion for ISO-2022-JP
+** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
+** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
+**
+** r {de/en}crypt ROT13/47
+**
+** v display Version
+**
+** T Text mode output (for MS-DOS)
+**
+** x Do not convert X0201 kana into X0208
+** Z Convert X0208 alphabet to ASCII
+**
+** f60 fold option
+**
+** m MIME decode
+** B try to fix broken JIS, missing Escape
+** B[1-9] broken level
+**
+** O Output to 'nkf.out' file
+** d Delete \r in line feed
+** c Add \r in line feed
+**/
+/******************************/
+/* ¥Ç¥Õ¥©¥ë¥È¤Î½ÐÎÏ¥³¡¼¥ÉÁªÂò */
+/* Select DEFAULT_CODE */
+#define DEFAULT_CODE_JIS
+/* #define DEFAULT_CODE_SJIS */
+/* #define DEFAULT_CODE_EUC */
+/******************************/
+
#if (defined(__TURBOC__) || defined(LSI_C)) && !defined(MSDOS)
#define MSDOS
#endif
#include <strings.h>
#endif
+#ifdef MSDOS
+#ifdef LSI_C
+#define setbinmode(fp) fsetbin(fp)
+#else /* Microsoft C, Turbo C */
+#define setbinmode(fp) setmode(fileno(fp), O_BINARY)
+#endif
+#else /* UNIX,OS/2 */
+#define setbinmode(fp)
+#endif
+
+#ifdef _IOFBF /* SysV and MSDOS */
+#define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
+#else /* BSD */
+#define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
+#endif
+
#include "common.h"
#include "nkflib.h"
+#define VOIDVOID 0
+
#ifndef FALSE
#define FALSE 0
#endif /* FALSE */
#define TRUE 1
#endif /* TRUE */
-/* state of output_mode and input_mode */
+/* state of output_mode and input_mode */
+
+#define ASCII 0
+#define X0208 1
+#define X0201 2
+#define NO_X0201 3
+#define JIS_INPUT 4
+#define SJIS_INPUT 5
+#define LATIN1_INPUT 6
+#define FIXED_MIME 7
+#define DOUBLE_SPACE -2
+#define EUC_INPUT 8
-#define ASCII 0
-#define X0208 1
-#define X0201 2
-#define NO_X0201 3
-#define JIS_INPUT 4
-#define SJIS_INPUT 5
-#define LATIN1_INPUT 6
-#define EUC_INPUT 7
+#define NL 0x0a
+#define ESC 0x1b
+#define SPACE 0x20
+#define AT 0x40
+#define SSP 0xa0
+#define DEL 0x7f
+#define SI 0x0f
+#define SO 0x0e
+#define SSO 0x8e
-#define ESC 0x1b
-#define SP 0x20
-#define AT 0x40
-#define SSP 0xa0
-#define DEL 0x7f
-#define SI 0x0f
-#define SO 0x0e
-#define SSO 0x8e
+#define HOLD_SIZE 32
+#define IOBUF_SIZE 16384
-#define HOLD_SIZE 32
+#define DEFAULT_J 'B'
+#define DEFAULT_R 'B'
-#define DEFAULT_J 'B'
-#define DEFAULT_R 'B'
+#define SJ0162 0x00e1 /* 01 - 62 ku offset */
+#define SJ6394 0x0161 /* 63 - 94 ku offset */
-#define SJ0162 0x00e1 /* 01 - 62 ku offset */
-#define SJ6394 0x0161 /* 63 - 94 ku offset */
/* SFILE begin */
/* ʸ»úÎó ¤ò FILE ¤ß¤¿¤¤¤Ë°·¤¦¾®ºÙ¹© */
char *nkf_convert(char *si,char *so,int maxsize,char *in_mode,char *out_mode);
char *nkf_conv(char *si,char *so,char *out_mode);
-/* mime related */
-static int base64decode(int c);
-static int mime_getc(SFILE *f);
-static int mime_begin(SFILE *f);
-static int mime_ungetc(unsigned int c);
-#define VOIDVOID 0
+static int check_kanji_code(unsigned char *p);
/* MIME preprocessor */
-#define GETC(p) ((!mime_mode)?sgetc(p):mime_getc(p))
-#define UNGETC(c,p) ((!mime_mode)?sungetc(c,p):mime_ungetc(c))
+
+#undef STRICT_MIME /* do stupid strict mime integrity check */
+#define GETC(p) ((!mime_mode)?sgetc(p):mime_getc(p))
+#define UNGETC(c,p) ((!mime_mode)?sungetc(c,p):mime_ungetc(c))
+
+
+#ifdef EASYWIN /*Easy Win */
+extern POINT _BufferSize;
+#endif
+
+/* function prototype */
+
+static int noconvert(SFILE *f);
+static int kanji_convert(SFILE *f);
+static int h_conv(SFILE *f,int c2,int c1);
+static int push_hold_buf(int c2,int c1);
+static int s_iconv(int c2,int c1);
+static int e_oconv(int c2,int c1);
+static int s_oconv(int c2,int c1);
+static int j_oconv(int c2,int c1);
+static int line_fold(int c2,int c1);
+static int pre_convert(int c1,int c2);
+static int mime_begin(SFILE *f);
+static int mime_getc(SFILE *f);
+static int mime_ungetc(unsigned int c);
+static int mime_integrity(SFILE *f,unsigned char *p);
+static int base64decode(int c);
+static int usage(void);
+static void arguments(char *c);
+static void reinit();
/* buffers */
+
+static char stdibuf[IOBUF_SIZE];
+static char stdobuf[IOBUF_SIZE];
static unsigned char hold_buf[HOLD_SIZE*2];
-static int hold_count;
+static int hold_count;
/* MIME preprocessor fifo */
-#define MIME_BUF_SIZE 4 /* 2^n ring buffer */
-#define MIME_BUF_MASK (MIME_BUF_SIZE-1)
-#define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
-static unsigned char mime_buf[MIME_BUF_SIZE];
-static unsigned int mime_top = 0;
-static unsigned int mime_last = 0;
+
+#define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
+#define MIME_BUF_MASK (MIME_BUF_SIZE-1)
+#define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
+static unsigned char mime_buf[MIME_BUF_SIZE];
+static unsigned int mime_top = 0;
+static unsigned int mime_last = 0; /* decoded */
+static unsigned int mime_input = 0; /* undecoded */
/* flags */
-static int unbuf_f = FALSE,
- estab_f = FALSE;
-static int rot_f = FALSE; /* rot14/43 mode */
-static int input_f = FALSE; /* non fixed input code */
-static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
-static int mime_f = FALSE; /* convert MIME base64 */
-static int broken_f = FALSE; /* convert ESC-less broken JIS */
-static int iso8859_f = FALSE; /* ISO8859 through */
-#ifdef MSDOS
-static int x0201_f = TRUE; /* Assume JISX0201 kana */
+static int unbuf_f = FALSE;
+static int estab_f = FALSE;
+static int nop_f = FALSE;
+static int binmode_f = TRUE; /* binary mode */
+static int rot_f = FALSE; /* rot14/43 mode */
+static int input_f = FALSE; /* non fixed input code */
+static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
+static int mime_f = TRUE; /* convert MIME B base64 or Q */
+static int mimebuf_f = FALSE; /* MIME buffered input */
+static int broken_f = FALSE; /* convert ESC-less broken JIS */
+static int iso8859_f = FALSE; /* ISO8859 through */
+#if defined(MSDOS) || defined(__OS2__)
+static int x0201_f = TRUE; /* Assume JISX0201 kana */
#else
-static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
+static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
#endif
/* X0208 -> ASCII converter */
-static int c1_return;
+static int c1_return;
/* fold parameter */
-static int line = 0; /* chars in line */
+static int line = 0; /* chars in line */
static int prev = 0;
-static int fold_f = FALSE;
-static int fold_len = 0;
+static int fold_f = FALSE;
+static int fold_len = 0;
/* options */
-static char kanji_intro = DEFAULT_J,
- ascii_intro = DEFAULT_R;
+static char kanji_intro = DEFAULT_J,
+ ascii_intro = DEFAULT_R;
/* Folding */
-static int fold(int c2, int c1);
+
+int line_fold();
#define FOLD_MARGIN 10
+#define DEFAULT_FOLD 60
/* converters */
-static int s_iconv (int c2, int c1);
-static int e_oconv (int c2, int c1);
-static int j_oconv (int c2, int c1);
-static int s_oconv (int c2, int c1);
-static int convert (SFILE *f);
-static int h_conv (SFILE *f, int c1, int c2);
-static int push_hold_buf (int c2, int c1);
-static int (*iconv) (int c2, int c1); /* s_iconv or oconv */
-static int (*oconv) (int c2, int c1); /* [ejs]_oconv */
-static int pre_convert(int c1, int c2);/* rot13 or JISX0201 -> JISX0208 */
-static int check_kanji_code(unsigned char *p);
+
+#ifdef DEFAULT_CODE_JIS
+# define DEFAULT_CONV j_oconv
+#endif
+#ifdef DEFAULT_CODE_SJIS
+# define DEFAULT_CONV s_oconv
+#endif
+#ifdef DEFAULT_CODE_EUC
+# define DEFAULT_CONV e_oconv
+#endif
+
+static int (*iconv)(int c2,int c1);
+ /* s_iconv or oconv */
+static int (*oconv)(int c2,int c1) = DEFAULT_CONV;
+ /* [ejs]_oconv */
/* Global states */
-static int output_mode = ASCII, /* output kanji mode */
- input_mode = ASCII, /* input kanji mode */
- shift_mode = FALSE; /* TRUE shift out, or X0201 */
-static int mime_mode = FALSE; /* MIME mode B base64, Q hex */
+static int output_mode = ASCII, /* output kanji mode */
+ input_mode = ASCII, /* input kanji mode */
+ shift_mode = FALSE; /* TRUE shift out, or X0201 */
+static int mime_mode = FALSE; /* MIME mode B base64, Q hex */
-/* X0208 -> ASCII translation table */
/* X0201 / X0208 conversion tables */
/* X0201 kana conversion table */
/* 90-9F A0-DF */
-static unsigned char cv[]= {
+unsigned char cv[]= {
0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
0x00,0x00};
+
/* X0201 kana conversion table for daguten */
/* 90-9F A0-DF */
-static unsigned char dv[]= {
+unsigned char dv[]= {
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
/* X0201 kana conversion table for han-daguten */
/* 90-9F A0-DF */
-static unsigned char ev[]= {
+unsigned char ev[]= {
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00};
+
/* X0208 kigou conversion table */
/* 0x8140 - 0x819e */
-static unsigned char fv[] = {
+unsigned char fv[] = {
+
0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
-0x5c,0x7e,0x00,0x7c,0x00,0x00,0x60,0x27,
+0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
-};
+} ;
/* SFILE ´ØÏ¢´Ø¿ô */
SFILE xfi,xfo;
int a;
+ reinit(); /* »ÈÍÑ¡© */
+
if(maxsize == -1)
maxsize = SAFE_CONVERT_LENGTH(strlen(si));
else if(maxsize == 0)
x0201_f = TRUE;
}
-
/* ¥³¥ó¥Ð¡¼¥È */
- convert(fi);
+ kanji_convert(fi);
/* ¸å½èÍý */
sputchar('\0');
mode = ASCII;
while(*p)
{
- if(*p < SP || *p >= DEL)
+ if(*p < SPACE || *p >= DEL)
{
if(*p == ESC)
return JIS_INPUT;
}
#endif /* EUC_STRICT_CHECK */
-static int
-convert(SFILE *f)
+
+static int file_out = FALSE;
+static int add_cr = FALSE;
+static int del_cr = FALSE;
+static int end_check;
+
+#if 0
+#ifndef PERL_XS
+int
+main(argc, argv)
+ int argc;
+ char **argv;
+{
+ FILE *fin;
+ char *cp;
+
+#ifdef EASYWIN /*Easy Win */
+ _BufferSize.y = 400;/*Set Scroll Buffer Size*/
+#endif
+
+ for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
+ cp = *argv;
+ arguments(cp);
+ }
+
+ if(iso8859_f && (oconv != j_oconv || !x0201_f )) {
+ fprintf(stderr,"Mixed ISO8859/JISX0201/SJIS/EUC output is not allowed.\n");
+ exit(1);
+ }
+
+ if(binmode_f == TRUE)
+#ifdef __OS2__
+ if(freopen("","wb",stdout) == NULL)
+ return (-1);
+#else
+ setbinmode(stdout);
+#endif
+
+ if(unbuf_f)
+ setbuf(stdout, (char *) NULL);
+ else
+ setvbuffer(stdout, stdobuf, IOBUF_SIZE);
+
+ if(argc == 0) {
+ if(binmode_f == TRUE)
+#ifdef __OS2__
+ if(freopen("","rb",stdin) == NULL) return (-1);
+#else
+ setbinmode(stdin);
+#endif
+ setvbuffer(stdin, stdibuf, IOBUF_SIZE);
+ if(nop_f)
+ noconvert(stdin);
+ else
+ kanji_convert(stdin);
+ } else {
+ while (argc--) {
+ if((fin = fopen(*argv++, "r")) == NULL) {
+ perror(*--argv);
+ return(-1);
+ } else {
+/* reopen file for stdout */
+ if(file_out == TRUE){
+ if(argc == 1 ) {
+ if(freopen(*argv++, "w", stdout) == NULL) {
+ perror(*--argv);
+ return (-1);
+ }
+ argc--;
+ } else {
+ if(freopen("nkf.out", "w", stdout) == NULL) {
+ perror(*--argv);
+ return (-1);
+ }
+ }
+ if(binmode_f == TRUE) {
+#ifdef __OS2__
+ if(freopen("","wb",stdout) == NULL)
+ return (-1);
+#else
+ setbinmode(stdout);
+#endif
+ }
+ }
+ if(binmode_f == TRUE)
+#ifdef __OS2__
+ if(freopen("","rb",fin) == NULL)
+ return (-1);
+#else
+ setbinmode(fin);
+#endif
+ setvbuffer(fin, stdibuf, IOBUF_SIZE);
+ if(nop_f)
+ noconvert(fin);
+ else
+ kanji_convert(fin);
+ fclose(fin);
+ }
+ }
+ }
+#ifdef EASYWIN /*Easy Win */
+ if(file_out == FALSE)
+ scanf("%d",&end_check);
+ else
+ fclose(stdout);
+#else /* for Other OS */
+ if(file_out == TRUE)
+ fclose(stdout);
+#endif
+ return (0);
+}
+#endif
+
+void
+arguments(char *cp)
+{
+ while (*cp) {
+ switch (*cp++) {
+ case 'b': /* buffered mode */
+ unbuf_f = FALSE;
+ continue;
+ case 'u': /* non bufferd mode */
+ unbuf_f = TRUE;
+ continue;
+ case 't': /* transparent mode */
+ nop_f = TRUE;
+ continue;
+ case 'j': /* JIS output */
+ case 'n':
+ oconv = j_oconv;
+ continue;
+ case 'e': /* AT&T EUC output */
+ oconv = e_oconv;
+ continue;
+ case 's': /* SJIS output */
+ oconv = s_oconv;
+ continue;
+ case 'l': /* ISO8859 Latin-1 support, no conversion */
+ iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
+ input_f = LATIN1_INPUT;
+ continue;
+ case 'i': /* Kanji IN ESC-$-@/B */
+ if(*cp=='@'||*cp=='B')
+ kanji_intro = *cp++;
+ continue;
+ case 'o': /* ASCII IN ESC-(-J/B */
+ if(*cp=='J'||*cp=='B'||*cp=='H')
+ ascii_intro = *cp++;
+ continue;
+ case 'r':
+ rot_f = TRUE;
+ continue;
+#if defined(MSDOS) || defined(__OS2__)
+ case 'T':
+ binmode_f = FALSE;
+ continue;
+#endif
+#ifndef PERL_XS
+ case 'v':
+ usage();
+ exit(1);
+ break;
+#endif
+ /* Input code assumption */
+ case 'J': /* JIS input */
+ case 'E': /* AT&T EUC input */
+ input_f = JIS_INPUT;
+ continue;
+ case 'S': /* MS Kanji input */
+ input_f = SJIS_INPUT;
+ if(x0201_f==NO_X0201) x0201_f=TRUE;
+ continue;
+ case 'Z': /* Convert X0208 alphabet to asii */
+ /* bit:0 Convert X0208
+ bit:1 Convert Kankaku to one space
+ bit:2 Convert Kankaku to two spaces
+ */
+ if('9'>= *cp && *cp>='0')
+ alpha_f |= 1<<(*cp++ -'0');
+ else
+ alpha_f |= TRUE;
+ continue;
+ case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
+ x0201_f = FALSE; /* No X0201->X0208 conversion */
+ /* accept X0201
+ ESC-(-I in JIS, EUC, MS Kanji
+ SI/SO in JIS, EUC, MS Kanji
+ SSO in EUC, JIS, not in MS Kanji
+ MS Kanji (0xa0-0xdf)
+ output X0201
+ ESC-(-I in JIS (0x20-0x5f)
+ SSO in EUC (0xa0-0xdf)
+ 0xa0-0xd in MS Kanji (0xa0-0xdf)
+ */
+ continue;
+ case 'X': /* Assume X0201 kana */
+ /* Default value is NO_X0201 for EUC/MS-Kanji mix */
+ x0201_f = TRUE;
+ continue;
+ case 'f': /* folding -f60 or -f */
+ fold_f = TRUE;
+ fold_len = atoi(cp);
+ if(!(0<fold_len && fold_len<BUFSIZ))
+ fold_len = DEFAULT_FOLD;
+ while('0'<= *cp && *cp <='9') cp++;
+ continue;
+ case 'm': /* MIME support */
+ mime_f = TRUE;
+ if(*cp=='B'||*cp=='Q') {
+ mime_mode = *cp++;
+ mimebuf_f = FIXED_MIME;
+ } else if (*cp=='0') {
+ mime_f = FALSE;
+ }
+ continue;
+ case 'M': /* MIME output */
+ oconv = j_oconv; /* sorry... not yet done.. */
+ continue;
+ case 'B': /* Broken JIS support */
+ /* bit:0 no ESC JIS
+ bit:1 allow any x on ESC-(-x or ESC-$-x
+ bit:2 reset to ascii on NL
+ */
+ if('9'>= *cp && *cp>='0')
+ broken_f |= 1<<(*cp++ -'0');
+ else
+ broken_f |= TRUE;
+ continue;
+#ifndef PERL_XS
+ case 'O':/* for Output file */
+ file_out = TRUE;
+ continue;
+#endif
+ case 'c':/* add cr code */
+ add_cr = TRUE;
+ continue;
+ case 'd':/* delete cr code */
+ del_cr = TRUE;
+ continue;
+ default:
+ /* bogus option but ignored */
+ continue;
+ }
+ }
+}
+#endif
+
+int
+noconvert(f)
+ SFILE *f;
+{
+ int c;
+
+ while ((c = sgetc(f)) != EOF)
+ sputchar(c);
+ return 1;
+}
+
+
+
+
+int
+kanji_convert(SFILE *f)
{
- register int c1,
- c2;
+ int c1, c2;
c2 = 0;
- if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
- estab_f = TRUE; iconv = oconv;
- } else if (input_f == SJIS_INPUT) {
- estab_f = TRUE; iconv = s_iconv;
+ if(input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
+ estab_f = TRUE; iconv = oconv;
+ } else if(input_f == SJIS_INPUT) {
+ estab_f = TRUE; iconv = s_iconv;
} else {
- estab_f = FALSE; iconv = oconv;
+ estab_f = FALSE; iconv = oconv;
}
input_mode = ASCII;
output_mode = ASCII;
shift_mode = FALSE;
- mime_mode = FALSE;
-#define NEXT continue /* no output, get next */
-#define SEND ; /* output c1 and c2, get next */
-#define LAST break /* end of loop, go closing */
-
- while ((c1 = GETC (f)) != SEOF) {
- if (!c2 && !input_mode && c1<DEL && !mime_mode && !output_mode
- && !shift_mode && !fold_f && !rot_f) {
- /* plain ASCII loop, no conversion and no fold */
- while(c1!='=' && c1!=SO && c1!=SEOF &&
- c1!=ESC && c1!='$' && c1<DEL) {
- sputchar(c1);
- c1 = sgetc(f);
- }
- if(c1==SEOF) LAST;
- }
- if (c2) {
- /* second byte */
- if (c2 > DEL) {
- /* in case of 8th bit is on */
- if (!estab_f) {
- /* in case of not established yet */
- if (c1 > SSP) {
- /* It is still ambiguious */
- h_conv (f, c2, c1);
- c2 = 0;
- NEXT;
- } else if (c1 < AT) {
- /* ignore bogus code */
- c2 = 0;
- NEXT;
- } else {
- /* established */
- /* it seems to be MS Kanji */
- estab_f = TRUE;
- iconv = s_iconv;
- SEND;
- }
- } else
- /* in case of already established */
- if (c1 < AT) {
- /* ignore bogus code */
- c2 = 0;
- NEXT;
- } else
- SEND;
- } else
- /* 7 bit code */
- /* it might be kanji shitfted */
- if ((c1 == DEL) || (c1 <= SP)) {
- /* ignore bogus first code */
- c2 = 0;
- NEXT;
- } else
- SEND;
- } else {
- /* first byte */
- if (c1 > DEL) {
- /* 8 bit code */
- if (!estab_f && !iso8859_f) {
- /* not established yet */
- if (c1 < SSP) {
- /* it seems to be MS Kanji */
- estab_f = TRUE;
- iconv = s_iconv;
- } else if (c1 < 0xe0) {
- /* it seems to be EUC */
- estab_f = TRUE;
- iconv = oconv;
- } else {
- /* still ambiguious */
- }
- c2 = c1;
- NEXT;
- } else { /* estab_f==TRUE */
- if(iso8859_f) {
- SEND;
- } else if(SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
- /* SJIS X0201 Case... */
- /* This is too arrogant, but ... */
- if(x0201_f==NO_X0201) {
- iconv = oconv;
- c2 = c1;
- NEXT;
- } else
- if(x0201_f) {
- if(dv[(c1-SSP)*2]||ev[(c1-SSP)*2]) {
- /* look ahead for X0201/X0208conversion */
- if ((c2 = GETC (f)) == SEOF) {
- (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
- LAST;
- } else if (c2==(0xde)) { /* ÂùÅÀ */
- (*oconv)(dv[(c1-SSP)*2],dv[(c1-SSP)*2+1]);
- c2=0;
- NEXT;
- } else if (c2==(0xdf)&&ev[(c1-SSP)*2]) {
- /* ȾÂùÅÀ */
- (*oconv)(ev[(c1-SSP)*2],ev[(c1-SSP)*2+1]);
- c2=0;
- NEXT;
- }
- UNGETC(c2,f); c2 = 0;
- }
- (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
- NEXT;
- } else
- SEND;
- } else if(c1==SSO && iconv != s_iconv) {
- /* EUC X0201 Case */
- /* This is too arrogant
- if(x0201_f == NO_X0201) {
- estab_f = FALSE;
- c2 = 0;
- NEXT;
- } */
- c1 = GETC (f); /* skip SSO */
- euc_1byte_check:
- if(x0201_f && SSP<=c1 && c1<0xe0) {
- if(dv[(c1-SSP)*2]||ev[(c1-SSP)*2]) {
- if ((c2 = GETC (f)) == SEOF) {
- (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
- LAST;
- }
- /* forward lookup ÂùÅÀ/ȾÂùÅÀ */
- if (c2 != SSO) {
- UNGETC(c2,f); c2 = 0;
- (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
- NEXT;
- } else if ((c2 = GETC (f)) == SEOF) {
- (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
- (*oconv)(0,SSO);
- LAST;
- } else if (c2==(0xde)) { /* ÂùÅÀ */
- (*oconv)(dv[(c1-SSP)*2],dv[(c1-SSP)*2+1]);
- c2=0;
- NEXT;
- } else if (c2==(0xdf)&&ev[(c1-SSP)*2]) {
- /* ȾÂùÅÀ */
- (*oconv)(ev[(c1-SSP)*2],ev[(c1-SSP)*2+1]);
- c2=0;
- NEXT;
- } else {
- (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
- /* we have to check this c2 */
- /* and no way to push back SSO */
- c1 = c2; c2 = 0;
- goto euc_1byte_check;
- }
- }
- (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
- NEXT;
- } else
- SEND;
- } else if(c1 < SSP && iconv != s_iconv) {
- /* strange code in EUC */
- iconv = s_iconv; /* try SJIS */
- c2 = c1;
- NEXT;
- } else {
- /* already established */
- c2 = c1;
- NEXT;
- }
- }
- } else if ((c1 > SP) && (c1 != DEL)) {
- /* in case of Roman characters */
- if (shift_mode) {
- c1 |= 0x80;
- /* output 1 shifted byte */
- if(x0201_f && (!iso8859_f||input_mode==X0201) &&
- SSP<=c1 && c1<0xe0 ) {
- if(dv[(c1-SSP)*2]||ev[(c1-SSP)*2]) {
- if ((c2 = GETC (f)) == SEOF) {
- (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
- LAST;
- } else if (c2==(0xde&0x7f)) { /* ÂùÅÀ */
- (*oconv)(dv[(c1-SSP)*2],dv[(c1-SSP)*2+1]);
- c2=0;
- NEXT;
- } else if (c2==(0xdf&0x7f)&&ev[(c1-SSP)*2]) {
- /* ȾÂùÅÀ */
- (*oconv)(ev[(c1-SSP)*2],ev[(c1-SSP)*2+1]);
- c2=0;
- NEXT;
- }
- UNGETC(c2,f); c2 = 0;
- }
- (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
- NEXT;
- } else
- SEND;
- } else if(c1 == '(' && broken_f && input_mode == X0208
- && !mime_mode ) {
- /* Try to recover missing escape */
- if ((c1 = GETC (f)) == SEOF) {
- (*oconv) (0, '(');
- LAST;
- } else {
- if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
- input_mode = ASCII; shift_mode = FALSE;
- NEXT;
- } else {
- (*oconv) (0, '(');
- /* do not modify various input_mode */
- /* It can be vt100 sequence */
- SEND;
- }
- }
- } else if (input_mode == X0208) {
- /* in case of Kanji shifted */
- c2 = c1;
- NEXT;
- /* goto next_byte */
- } else if(mime_f && !mime_mode && c1 == '=') {
- if ((c1 = sgetc (f)) == SEOF) {
- (*oconv) (0, '=');
- LAST;
- } else if (c1 == '?') {
- /* =? is mime conversion start sequence */
- if(mime_begin(f) == SEOF) /* check in detail */
+#define NEXT continue /* no output, get next */
+#define SEND ; /* output c1 and c2, get next */
+#define LAST break /* end of loop, go closing */
+
+ while ((c1 = GETC(f)) != EOF) {
+ if(c2) {
+ /* second byte */
+ if(c2 > DEL) {
+ /* in case of 8th bit is on */
+ if(!estab_f) {
+ /* in case of not established yet */
+ if(c1 > SSP) {
+ /* It is still ambiguious */
+ h_conv(f, c2, c1);
+ c2 = 0;
+ NEXT;
+ } else if(c1 < AT) {
+ /* ignore bogus code */
+ c2 = 0;
+ NEXT;
+ } else {
+ /* established */
+ /* it seems to be MS Kanji */
+ estab_f = TRUE;
+ iconv = s_iconv;
+ SEND;
+ }
+ } else
+ /* in case of already established */
+ if(c1 < AT) {
+ /* ignore bogus code */
+ c2 = 0;
+ NEXT;
+ } else
+ SEND;
+ } else
+ /* 7 bit code */
+ /* it might be kanji shitfted */
+ if((c1 == DEL) || (c1 <= SPACE)) {
+ /* ignore bogus first code */
+ c2 = 0;
+ NEXT;
+ } else
+ SEND;
+ } else {
+ /* first byte */
+ if(c1 > DEL) {
+ /* 8 bit code */
+ if(!estab_f && !iso8859_f) {
+ /* not established yet */
+ if(c1 < SSP) {
+ /* it seems to be MS Kanji */
+ estab_f = TRUE;
+ iconv = s_iconv;
+ } else if(c1 < 0xe0) {
+ /* it seems to be EUC */
+ estab_f = TRUE;
+ iconv = oconv;
+ } else {
+ /* still ambiguious */
+ }
+ c2 = c1;
+ NEXT;
+ } else { /* estab_f==TRUE */
+ if(iso8859_f) {
+ SEND;
+ } else if(SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
+ /* SJIS X0201 Case... */
+ /* This is too arrogant, but ... */
+ if(x0201_f==NO_X0201) {
+ iconv = oconv;
+ c2 = c1;
+ NEXT;
+ } else
+ if(x0201_f) {
+ if(dv[(c1-SSP)*2]||ev[(c1-SSP)*2]) {
+ /* look ahead for X0201/X0208conversion */
+ if((c2 = GETC(f)) == EOF) {
+ (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
+ LAST;
+ } else if(c2==(0xde)) { /* ÂùÅÀ */
+ (*oconv)(dv[(c1-SSP)*2],dv[(c1-SSP)*2+1]);
+ c2=0;
+ NEXT;
+ } else if(c2==(0xdf)&&ev[(c1-SSP)*2]) {
+ /* ȾÂùÅÀ */
+ (*oconv)(ev[(c1-SSP)*2],ev[(c1-SSP)*2+1]);
+ c2=0;
+ NEXT;
+ }
+ UNGETC(c2,f); c2 = 0;
+ }
+ (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
+ NEXT;
+ } else
+ SEND;
+ } else if(c1==SSO && iconv != s_iconv) {
+ /* EUC X0201 Case */
+ /* This is too arrogant
+ if(x0201_f == NO_X0201) {
+ estab_f = FALSE;
+ c2 = 0;
+ NEXT;
+ } */
+ c1 = GETC(f); /* skip SSO */
+ euc_1byte_check:
+ if(x0201_f && SSP<=c1 && c1<0xe0) {
+ if(dv[(c1-SSP)*2]||ev[(c1-SSP)*2]) {
+ if((c2 = GETC(f)) == EOF) {
+ (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
+ LAST;
+ }
+ /* forward lookup ÂùÅÀ/ȾÂùÅÀ */
+ if(c2 != SSO) {
+ UNGETC(c2,f); c2 = 0;
+ (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
+ NEXT;
+ } else if((c2 = GETC(f)) == EOF) {
+ (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
+ (*oconv)(0,SSO);
+ LAST;
+ } else if(c2==(0xde)) { /* ÂùÅÀ */
+ (*oconv)(dv[(c1-SSP)*2],dv[(c1-SSP)*2+1]);
+ c2=0;
+ NEXT;
+ } else if(c2==(0xdf)&&ev[(c1-SSP)*2]) {
+ /* ȾÂùÅÀ */
+ (*oconv)(ev[(c1-SSP)*2],ev[(c1-SSP)*2+1]);
+ c2=0;
+ NEXT;
+ } else {
+ (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
+ /* we have to check this c2 */
+ /* and no way to push back SSO */
+ c1 = c2; c2 = 0;
+ goto euc_1byte_check;
+ }
+ }
+ (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
+ NEXT;
+ } else
+ SEND;
+ } else if(c1 < SSP && iconv != s_iconv) {
+ /* strange code in EUC */
+ iconv = s_iconv; /* try SJIS */
+ c2 = c1;
+ NEXT;
+ } else {
+ /* already established */
+ c2 = c1;
+ NEXT;
+ }
+ }
+ } else if((c1 > SPACE) && (c1 != DEL)) {
+ /* in case of Roman characters */
+ if(shift_mode) {
+ c1 |= 0x80;
+ /* output 1 shifted byte */
+ if(x0201_f && (!iso8859_f||input_mode==X0201) &&
+ SSP<=c1 && c1<0xe0 ) {
+ if(dv[(c1-SSP)*2]||ev[(c1-SSP)*2]) {
+ if((c2 = GETC(f)) == EOF) {
+ (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
+ LAST;
+ } else if(c2==(0xde&0x7f)) { /* ÂùÅÀ */
+ (*oconv)(dv[(c1-SSP)*2],dv[(c1-SSP)*2+1]);
+ c2=0;
+ NEXT;
+ } else if(c2==(0xdf&0x7f)&&ev[(c1-SSP)*2]) {
+ /* ȾÂùÅÀ */
+ (*oconv)(ev[(c1-SSP)*2],ev[(c1-SSP)*2+1]);
+ c2=0;
+ NEXT;
+ }
+ UNGETC(c2,f); c2 = 0;
+ }
+ (*oconv)(cv[(c1-SSP)*2],cv[(c1-SSP)*2+1]);
+ NEXT;
+ } else
+ SEND;
+ } else if(c1 == '(' && broken_f && input_mode == X0208
+ && !mime_mode ) {
+ /* Try to recover missing escape */
+ if((c1 = GETC(f)) == EOF) {
+ (*oconv)(0, '(');
+ LAST;
+ } else {
+ if(c1 == 'B' || c1 == 'J' || c1 == 'H') {
+ input_mode = ASCII; shift_mode = FALSE;
+ NEXT;
+ } else {
+ (*oconv)(0, '(');
+ /* do not modify various input_mode */
+ /* It can be vt100 sequence */
+ SEND;
+ }
+ }
+ } else if(input_mode == X0208) {
+ /* in case of Kanji shifted */
+ c2 = c1;
+ NEXT;
+ /* goto next_byte */
+ } else if(c1 == '=' && mime_f && !mime_mode ) {
+ if((c1 = sgetc(f)) == EOF) {
+ (*oconv)(0, '=');
+ LAST;
+ } else if(c1 == '?') {
+ /* =? is mime conversion start sequence */
+ if(mime_begin(f) == EOF) /* check in detail */
+ LAST;
+ else
+ NEXT;
+ } else {
+ (*oconv)(0, '=');
+ sungetc(c1,f);
+ NEXT;
+ }
+ } else if(c1 == '$' && broken_f && !mime_mode) {
+ /* try to recover missing escape */
+ if((c1 = GETC(f)) == EOF) {
+ (*oconv)(0, '$');
+ LAST;
+ } else if(c1 == '@'|| c1 == 'B') {
+ /* in case of Kanji in ESC sequence */
+ input_mode = X0208;
+ shift_mode = FALSE;
+ NEXT;
+ } else {
+ /* sorry */
+ (*oconv)(0, '$');
+ (*oconv)(0, c1);
+ NEXT;
+ }
+ } else
+ SEND;
+ } else if(c1 == SI) {
+ shift_mode = FALSE;
+ NEXT;
+ } else if(c1 == SO) {
+ shift_mode = TRUE;
+ NEXT;
+ } else if(c1 == ESC ) {
+ if((c1 = GETC(f)) == EOF) {
+ (*oconv)(0, ESC);
+ LAST;
+ } else if(c1 == '$') {
+ if((c1 = GETC(f)) == EOF) {
+ (*oconv)(0, ESC);
+ (*oconv)(0, '$');
+ LAST;
+ } else if(c1 == '@'|| c1 == 'B') {
+ /* This is kanji introduction */
+ input_mode = X0208;
+ shift_mode = FALSE;
+ NEXT;
+ } else if(c1 == '(') {
+ if((c1 = GETC(f)) == EOF) {
+ (*oconv)(0, ESC);
+ (*oconv)(0, '$');
+ (*oconv)(0, '(');
LAST;
- else
- NEXT;
- } else {
- (*oconv) (0, '=');
- sungetc(c1,f);
- NEXT;
- }
- } else if(c1 == '$' && broken_f && !mime_mode) {
- /* try to recover missing escape */
- if ((c1 = GETC (f)) == SEOF) {
- (*oconv) (0, '$');
- LAST;
- } else if(c1 == '@'|| c1 == 'B') {
- /* in case of Kanji in ESC sequence */
- input_mode = X0208;
- shift_mode = FALSE;
- NEXT;
- } else {
- /* sorry */
- (*oconv) (0, '$');
- (*oconv) (0, c1);
- NEXT;
- }
- } else
- SEND;
- } else if (c1 == SI) {
- shift_mode = FALSE;
- NEXT;
- } else if (c1 == SO) {
- shift_mode = TRUE;
- NEXT;
- } else if (c1 == ESC ) {
- if ((c1 = GETC (f)) == SEOF) {
- (*oconv) (0, ESC);
- LAST;
- } else if (c1 == '$') {
- if ((c1 = GETC (f)) == SEOF) {
- (*oconv) (0, ESC);
- (*oconv) (0, '$');
- LAST;
- } else if(c1 == '@'|| c1 == 'B') {
- /* This is kanji introduction */
- input_mode = X0208;
- shift_mode = FALSE;
- NEXT;
- } else {
- (*oconv) (0, ESC);
- (*oconv) (0, '$');
- (*oconv) (0, c1);
- NEXT;
- }
- } else if (c1 == '(') {
- if ((c1 = GETC (f)) == SEOF) {
- (*oconv) (0, ESC);
- (*oconv) (0, '(');
- LAST;
- } else {
- if (c1 == 'I') {
- /* This is X0201 kana introduction */
- input_mode = X0201; shift_mode = X0201;
- NEXT;
- } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
- /* This is X0208 kanji introduction */
- input_mode = ASCII; shift_mode = FALSE;
+ } else if(c1 == '@'|| c1 == 'B') {
+ /* This is kanji introduction */
+ input_mode = X0208;
+ shift_mode = FALSE;
NEXT;
} else {
- (*oconv) (0, ESC);
- (*oconv) (0, '(');
- /* maintain various input_mode here */
- SEND;
+ (*oconv)(0, ESC);
+ (*oconv)(0, '$');
+ (*oconv)(0, '(');
+ (*oconv)(0, c1);
+ NEXT;
}
- }
- } else {
- /* lonely ESC */
- (*oconv) (0, ESC);
- SEND;
- }
- } else
- SEND;
- }
- /* send: */
- if (input_mode == X0208)
- (*oconv) (c2, c1); /* this is JIS, not SJIS/EUC case */
- else
- (*iconv) (c2, c1); /* can be EUC/SJIS */
- c2 = 0;
- continue;
- /* goto next_word */
+ } else if(broken_f&0x2) {
+ input_mode = X0208;
+ shift_mode = FALSE;
+ NEXT;
+ } else {
+ (*oconv)(0, ESC);
+ (*oconv)(0, '$');
+ (*oconv)(0, c1);
+ NEXT;
+ }
+ } else if(c1 == '(') {
+ if((c1 = GETC(f)) == EOF) {
+ (*oconv)(0, ESC);
+ (*oconv)(0, '(');
+ LAST;
+ } else {
+ if(c1 == 'I') {
+ /* This is X0201 kana introduction */
+ input_mode = X0201; shift_mode = X0201;
+ NEXT;
+ } else if(c1 == 'B' || c1 == 'J' || c1 == 'H') {
+ /* This is X0208 kanji introduction */
+ input_mode = ASCII; shift_mode = FALSE;
+ NEXT;
+ } else if(broken_f&0x2) {
+ input_mode = ASCII; shift_mode = FALSE;
+ NEXT;
+ } else {
+ (*oconv)(0, ESC);
+ (*oconv)(0, '(');
+ /* maintain various input_mode here */
+ SEND;
+ }
+ }
+ } else {
+ /* lonely ESC */
+ (*oconv)(0, ESC);
+ SEND;
+ }
+ } else if(c1 == NL && broken_f&4) {
+ input_mode = ASCII;
+ SEND;
+ } else
+ SEND;
+ }
+ /* send: */
+ if(input_mode == X0208)
+ (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
+ else
+ (*iconv)(c2, c1); /* can be EUC/SJIS */
+ c2 = 0;
+ continue;
+ /* goto next_word */
}
/* epilogue */
- (*iconv) (SEOF, 0);
-
-#ifdef EUC_STRICT_CHECK
- if(oconv == e_oconv)
- fix_euc_code(sstdout->pointer, sstdout->tail - sstdout->head);
-#endif /* EUC_STRICT_CHECK */
+ (*iconv)(EOF, 0);
return 1;
}
-static int
-h_conv(SFILE *f, int c2, int c1)
+
+
+
+int
+h_conv(SFILE *f, int c2, int c1)
{
- register int wc;
+ int wc;
- /** it must NOT be in the kanji shifte sequence */
- /** it must NOT be written in JIS7 */
- /** and it must be after 2 byte 8bit code */
+ /** it must NOT be in the kanji shifte sequence */
+ /** it must NOT be written in JIS7 */
+ /** and it must be after 2 byte 8bit code */
hold_count = 0;
- push_hold_buf (c2, c1);
+ push_hold_buf(c2, c1);
c2 = 0;
- while ((c1 = GETC (f)) != SEOF) {
- if (c2) {
- /* second byte */
- if (!estab_f) {
- /* not established */
- if (c1 > SSP) {
- /* it is still ambiguious yet */
- SEND;
- } else if (c1 < AT) {
- /* ignore bogus first byte */
- c2 = 0;
- SEND;
- } else {
- /* now established */
- /* it seems to be MS Kanji */
- estab_f = TRUE;
- iconv = s_iconv;
- SEND;
- }
- } else
- SEND;
- } else {
- /* First byte */
- if (c1 > DEL) {
- /* 8th bit is on */
- if (c1 < SSP) {
- /* it seems to be MS Kanji */
- estab_f = TRUE;
- iconv = s_iconv;
- } else if (c1 < 0xe0) {
- /* it seems to be EUC */
- estab_f = TRUE;
- iconv = oconv;
- } else {
- /* still ambiguious */
- }
- c2 = c1;
- NEXT;
- } else
- /* 7 bit code , then send without any process */
- SEND;
- }
- /* send: */
- if ((push_hold_buf (c2, c1) == SEOF) || estab_f)
- break;
- c2 = 0;
- continue;
+ while ((c1 = GETC(f)) != EOF) {
+ if(c2) {
+ /* second byte */
+ if(!estab_f) {
+ /* not established */
+ if(c1 > SSP) {
+ /* it is still ambiguious yet */
+ SEND;
+ } else if(c1 < AT) {
+ /* ignore bogus first byte */
+ c2 = 0;
+ SEND;
+ } else {
+ /* now established */
+ /* it seems to be MS Kanji */
+ estab_f = TRUE;
+ iconv = s_iconv;
+ SEND;
+ }
+ } else
+ SEND;
+ } else {
+ /* First byte */
+ if(c1 > DEL) {
+ /* 8th bit is on */
+ if(c1 < SSP) {
+ /* it seems to be MS Kanji */
+ estab_f = TRUE;
+ iconv = s_iconv;
+ } else if(c1 < 0xe0) {
+ /* it seems to be EUC */
+ estab_f = TRUE;
+ iconv = oconv;
+ } else {
+ /* still ambiguious */
+ }
+ c2 = c1;
+ NEXT;
+ } else
+ /* 7 bit code , then send without any process */
+ SEND;
+ }
+ /* send: */
+ if((push_hold_buf(c2, c1) == EOF) || estab_f)
+ break;
+ c2 = 0;
+ continue;
}
/** now,
**/
for (wc = 0; wc < hold_count; wc += 2) {
- c2 = hold_buf[wc];
- c1 = hold_buf[wc+1];
- (*iconv) (c2, c1);
+ c2 = hold_buf[wc];
+ c1 = hold_buf[wc+1];
+ (*iconv)(c2, c1);
}
- return VOIDVOID ;
+ return VOIDVOID;
}
-static int
+
+
+int
push_hold_buf(int c2, int c1)
{
- if (hold_count >= HOLD_SIZE*2)
- return (SEOF);
+ if(hold_count >= HOLD_SIZE*2)
+ return (EOF);
hold_buf[hold_count++] = c2;
hold_buf[hold_count++] = c1;
- return ((hold_count >= HOLD_SIZE*2) ? SEOF : hold_count);
+ return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
}
-static int
+int
s_iconv(int c2, int c1)
{
- if ((c2 == SEOF) || (c2 == 0)) {
- /* NOP */
+ if((c2 == EOF) || (c2 == 0)) {
+ /* NOP */
} else {
- c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
- if (c1 < 0x9f)
- c1 = c1 - ((c1 > DEL) ? SP : 0x1f);
- else {
- c1 = c1 - 0x7e;
- c2++;
- }
+ c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
+ if(c1 < 0x9f)
+ c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
+ else {
+ c1 = c1 - 0x7e;
+ c2++;
+ }
}
- (*oconv) (c2, c1);
+ (*oconv)(c2, c1);
return 1;
}
-static int
+
e_oconv(int c2, int c1)
{
c2 = pre_convert(c1,c2); c1 = c1_return;
if(fold_f) {
- switch(fold(c2,c1)) {
- case '\n': sputchar('\n');
- break;
- case 0: return VOIDVOID;
- case '\r':
- c1 = '\n'; c2 = 0;
- break;
- case '\t':
- case ' ':
- c1 = ' '; c2 = 0;
- break;
- }
+ switch(line_fold(c2,c1)) {
+ case '\n':
+ if(add_cr == TRUE) {
+ sputchar('\r');
+ c1 = '\n';
+ }
+ sputchar('\n');
+ break;
+ case 0: return VOIDVOID;
+ case '\r':
+ c1 = '\n'; c2 = 0;
+ break;
+ case '\t':
+ case ' ':
+ c1 = ' '; c2 = 0;
+ break;
+ }
+ }
+ if(c2==DOUBLE_SPACE) {
+ sputchar(' '); sputchar(' ');
+ return VOIDVOID;
}
- if (c2 == SEOF)
- return VOIDVOID;
- else if (c2 == 0 && (c1&0x80)) {
- sputchar(SSO); sputchar(c1);
- } else if (c2 == 0) {
- sputchar(c1);
+ if(c2 == EOF)
+ return VOIDVOID;
+ else if(c2 == 0 && (c1&0x80)) {
+ sputchar(SSO); sputchar(c1);
+ } else if(c2 == 0) {
+ if(c1 == '\n' && add_cr == TRUE)
+ sputchar('\r');
+ if(c1 != '\r')
+ sputchar(c1);
+ else if(del_cr == FALSE)
+ sputchar(c1);
} else {
- if((c1<0x20 || 0x7e<c1) ||
- (c2<0x20 || 0x7e<c2)) {
- estab_f = FALSE;
- return VOIDVOID; /* too late to rescue this char */
- }
- sputchar(c2 | 0x080);
- sputchar(c1 | 0x080);
+ if((c1<0x20 || 0x7e<c1) ||
+ (c2<0x20 || 0x7e<c2)) {
+ estab_f = FALSE;
+ return VOIDVOID; /* too late to rescue this char */
+ }
+ sputchar(c2 | 0x080);
+ sputchar(c1 | 0x080);
}
return VOIDVOID;
}
-static int
+
+
+int
s_oconv(int c2, int c1)
{
c2 = pre_convert(c1,c2); c1 = c1_return;
if(fold_f) {
- switch(fold(c2,c1)) {
- case '\n': sputchar('\n');
- break;
- case '\r':
- c1 = '\n'; c2 = 0;
- break;
- case 0: return VOIDVOID;
- case '\t':
- case ' ':
- c1 = ' '; c2 = 0;
- break;
- }
+ switch(line_fold(c2,c1)) {
+ case '\n':
+ if(add_cr == TRUE) {
+ sputchar('\r');
+ c1 = '\n';
+ }
+ sputchar('\n');
+ break;
+ case '\r':
+ c1 = '\n'; c2 = 0;
+ break;
+ case 0: return VOIDVOID;
+ case '\t':
+ case ' ':
+ c1 = ' '; c2 = 0;
+ break;
+ }
}
- if (c2 == SEOF)
- return VOIDVOID;
- else if (c2 == 0) {
- sputchar(c1);
+ if(c2==DOUBLE_SPACE) {
+ sputchar(' '); sputchar(' ');
+ return VOIDVOID;
+ }
+ if(c2 == EOF)
+ return VOIDVOID;
+ else if(c2 == 0) {
+ if(c1 == '\n' && add_cr == TRUE)
+ sputchar('\r');
+ if(c1 != '\r')
+ sputchar(c1);
+ else if(del_cr == FALSE)
+ sputchar(c1);
} else {
- if((c1<0x20 || 0x7e<c1) ||
- (c2<0x20 || 0x7e<c2)) {
- estab_f = FALSE;
- return VOIDVOID; /* too late to rescue this char */
- }
- sputchar((((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1)));
- sputchar((c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e)));
+ if((c1<0x20 || 0x7e<c1) ||
+ (c2<0x20 || 0x7e<c2)) {
+ estab_f = FALSE;
+ return VOIDVOID; /* too late to rescue this char */
+ }
+ sputchar((((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1)));
+ sputchar((c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e)));
}
return VOIDVOID;
}
-static int
+int
j_oconv(int c2, int c1)
{
c2 = pre_convert(c1,c2); c1 = c1_return;
if(fold_f) {
- switch(fold(c2,c1)) {
- case '\n':
- if (output_mode) {
- sputchar(ESC);
- sputchar('(');
- sputchar(ascii_intro);
- }
- sputchar('\n');
- output_mode = ASCII;
- break;
- case '\r':
- c1 = '\n'; c2 = 0;
- break;
- case '\t':
- case ' ':
- c1 = ' '; c2 = 0;
- break;
- case 0: return VOIDVOID;
- }
+ switch(line_fold(c2,c1)) {
+ case '\n':
+ if(output_mode) {
+ sputchar(ESC);
+ sputchar('(');
+ sputchar(ascii_intro);
+ }
+ if(add_cr == TRUE) {
+ sputchar('\r');
+ c1 = '\n';
+ }
+ sputchar('\n');
+ output_mode = ASCII;
+ break;
+ case '\r':
+ c1 = '\n'; c2 = 0;
+ break;
+ case '\t':
+ case ' ':
+ c1 = ' '; c2 = 0;
+ break;
+ case 0: return VOIDVOID;
+ }
}
- if (c2 == SEOF) {
- if (output_mode) {
- sputchar(ESC);
- sputchar('(');
- sputchar(ascii_intro);
- }
- } else if (c2 == 0 && (c1 & 0x80)) {
- if(input_mode==X0201 || !iso8859_f) {
- if(output_mode!=X0201) {
- sputchar(ESC);
- sputchar('(');
- sputchar('I');
- output_mode = X0201;
- }
- c1 &= 0x7f;
- } else {
- /* iso8859 introduction, or 8th bit on */
- /* Can we convert in 7bit form using ESC-'-'-A ?
- Is this popular? */
- }
- sputchar(c1);
- } else if (c2 == 0) {
- if (output_mode) {
- sputchar(ESC);
- sputchar('(');
- sputchar(ascii_intro);
- output_mode = ASCII;
- }
- sputchar(c1);
+ if(c2 == EOF) {
+ if(output_mode) {
+ sputchar(ESC);
+ sputchar('(');
+ sputchar(ascii_intro);
+ }
+ } else if(c2 == 0 && (c1 & 0x80)) {
+ if(input_mode==X0201 || !iso8859_f) {
+ if(output_mode!=X0201) {
+ sputchar(ESC);
+ sputchar('(');
+ sputchar('I');
+ output_mode = X0201;
+ }
+ c1 &= 0x7f;
+ } else {
+ /* iso8859 introduction, or 8th bit on */
+ /* Can we convert in 7bit form using ESC-'-'-A ?
+ Is this popular? */
+ }
+ sputchar(c1);
+ } else if(c2 == 0) {
+ if(output_mode) {
+ sputchar(ESC);
+ sputchar('(');
+ sputchar(ascii_intro);
+ output_mode = ASCII;
+ }
+ if(c1 == '\n' && add_cr == TRUE)
+ sputchar('\r');
+ if(c1 != '\r')
+ sputchar(c1);
+ else if(del_cr == FALSE)
+ sputchar(c1);
+ } else if(c2 == DOUBLE_SPACE) {
+ if(output_mode) {
+ sputchar(ESC);
+ sputchar('(');
+ sputchar(ascii_intro);
+ output_mode = ASCII;
+ }
+ sputchar(' ');
+ if(c1 == '\n' && add_cr == TRUE)
+ sputchar('\r');
+ if(c1 != '\r')
+ sputchar(c1);
+ else if(del_cr == FALSE)
+ sputchar(c1);
} else {
- if (output_mode != X0208) {
- sputchar(ESC);
- sputchar('$');
- sputchar(kanji_intro);
- output_mode = X0208;
- }
- if(c1<0x20 || 0x7e<c1) return VOIDVOID;
- if(c2<0x20 || 0x7e<c2) return VOIDVOID;
- sputchar(c2);
- sputchar(c1);
+ if(output_mode != X0208) {
+ sputchar(ESC);
+ sputchar('$');
+ sputchar(kanji_intro);
+ output_mode = X0208;
+ }
+ if(c1<0x20 || 0x7e<c1)
+ return VOIDVOID;
+ if(c2<0x20 || 0x7e<c2)
+ return VOIDVOID;
+ sputchar(c2);
+ if(c1 == '\n' && add_cr == TRUE)
+ sputchar('\r');
+ if(c1 != '\r')
+ sputchar(c1);
+ else if(del_cr == FALSE)
+ sputchar(c1);
}
return VOIDVOID;
}
+
+
#define rot13(c) ( \
( c < 'A' ) ? c: \
(c <= 'M') ? (c + 13): \
(c <= 'Z') ? (c - 13): \
- (c < 'a') ? (c): \
+ (c < 'a') ? (c): \
(c <= 'm') ? (c + 13): \
(c <= 'z') ? (c - 13): \
(c) \
)
-#define rot47(c) ( \
+#define rot47(c) ( \
( c < '!' ) ? c: \
( c <= 'O' ) ? (c + 47) : \
( c <= '~' ) ? (c - 47) : \
)
-/*
- Return value of fold()
+/*
+ Return value of line_fold()
- \n add newline and output char
- \r add newline and output nothing
+ \n add newline and output char
+ \r add newline and output nothing
' ' space
- 0 skip
- 1 (or else) normal output
+ 0 skip
+ 1 (or else) normal output
fold state in prev (previous character)
>0x80 Japanese (X0208/X0201)
<0x80 ASCII
- \n new line
+ \n new line
' ' space
- This fold algorthm does not conserve heading space in a line.
+ This fold algorthm does not preserve heading space in a line.
This is the main difference from fmt.
*/
-static int
-fold(int c2, int c1)
-{
+int
+line_fold(int c2, int c1)
+{
int prev0;
- if(c1=='\r')
- return 0; /* ignore cr */
+ if(c1=='\r')
+ return 0; /* ignore cr */
if(c1== 8) {
- if(line>0) line--;
- return 1;
+ if(line>0) line--;
+ return 1;
}
- if(c2==SEOF && line != 0) /* close open last line */
- return '\n';
+ if(c2==EOF && line != 0) /* close open last line */
+ return '\n';
/* new line */
if(c1=='\n') {
- if(prev == c1) { /* duplicate newline */
- if(line) {
- line = 0;
- return '\n'; /* output two newline */
- } else {
- line = 0;
- return 1;
- }
- } else {
- if(prev&0x80) { /* Japanese? */
- prev = c1;
- return 0; /* ignore given single newline */
- } else if(prev==' ') {
- return 0;
- } else {
- prev = c1;
- if(++line<=fold_len)
- return ' ';
- else {
- line = 0;
- return '\r'; /* fold and output nothing */
- }
- }
- }
+ if(prev == c1) { /* duplicate newline */
+ if(line) {
+ line = 0;
+ return '\n'; /* output two newline */
+ } else {
+ line = 0;
+ return 1;
+ }
+ } else {
+ if(prev&0x80) { /* Japanese? */
+ prev = c1;
+ return 0; /* ignore given single newline */
+ } else if(prev==' ') {
+ return 0;
+ } else {
+ prev = c1;
+ if(++line<=fold_len)
+ return ' ';
+ else {
+ line = 0;
+ return '\r'; /* fold and output nothing */
+ }
+ }
+ }
}
if(c1=='\f') {
- prev = '\n';
- if(line==0)
- return 1;
- line = 0;
- return '\n'; /* output newline and clear */
+ prev = '\n';
+ if(line==0)
+ return 1;
+ line = 0;
+ return '\n'; /* output newline and clear */
}
/* X0208 kankaku or ascii space */
- if((c2==0&&c1==' ')||
- (c2==0&&c1=='\t')||
- (c2=='!'&& c1=='!')) {
- if(prev == ' ') {
- return 0; /* remove duplicate spaces */
- }
- prev = ' ';
- if(++line<=fold_len)
- return ' '; /* output ASCII space only */
- else {
- prev = ' '; line = 0;
- return '\r'; /* fold and output nothing */
- }
- }
+ if( (c2==0&&c1==' ')||
+ (c2==0&&c1=='\t')||
+ (c2==DOUBLE_SPACE)||
+ (c2=='!'&& c1=='!')) {
+ if(prev == ' ') {
+ return 0; /* remove duplicate spaces */
+ }
+ prev = ' ';
+ if(++line<=fold_len)
+ return ' '; /* output ASCII space only */
+ else {
+ prev = ' '; line = 0;
+ return '\r'; /* fold and output nothing */
+ }
+ }
prev0 = prev; /* we still need this one... , but almost done */
prev = c1;
- if (c2 || (SSP<=c1 && c1<=0xdf))
- prev |= 0x80; /* this is Japanese */
+ if(c2 || (SSP<=c1 && c1<=0xdf))
+ prev |= 0x80; /* this is Japanese */
line += (c2==0)?1:2;
if(line<=fold_len) { /* normal case */
- return 1;
+ return 1;
}
if(line>=fold_len+FOLD_MARGIN) { /* too many kinsou suspension */
- line = (c2==0)?1:2;
- return '\n'; /* We can't wait, do fold now */
+ line = (c2==0)?1:2;
+ return '\n'; /* We can't wait, do fold now */
}
- /* simple kinsoku rules return 1 means no folding */
+ /* simple kinsoku rules return 1 means no folding */
if(c2==0) {
- if(c1==0xde) return 1; /* ¡«*/
- if(c1==0xdf) return 1; /* ¡¬*/
- if(c1==0xa4) return 1; /* ¡£*/
- if(c1==0xa3) return 1; /* ¡¤*/
- if(c1==0xa1) return 1; /* ¡×*/
- if(c1==0xb0) return 1; /* - */
- if(SSP<=c1 && c1<=0xdf) { /* X0201 */
- line = 1;
- return '\n';/* add one new line before this character */
- }
- /* fold point in ASCII { [ ( */
- if(( c1!=')'&&
- c1!=']'&&
- c1!='}'&&
- c1!='.'&&
- c1!=','&&
- c1!='!'&&
- c1!='?'&&
- c1!='/'&&
- c1!=':'&&
- c1!=';')&&
- ((prev0=='\n')|| (prev0==' ')|| /* ignored new line */
- (prev0&0x80)) /* X0208 - ASCII */
- ) {
- line = 1;
- return '\n';/* add one new line before this character */
- }
- return 1; /* default no fold in ASCII */
+ if(c1==0xde) return 1; /* ¡«*/
+ if(c1==0xdf) return 1; /* ¡¬*/
+ if(c1==0xa4) return 1; /* ¡£*/
+ if(c1==0xa3) return 1; /* ¡¤*/
+ if(c1==0xa1) return 1; /* ¡×*/
+ if(c1==0xb0) return 1; /* - */
+ if(SSP<=c1 && c1<=0xdf) { /* X0201 */
+ line = 1;
+ return '\n';/* add one new line before this character */
+ }
+ /* fold point in ASCII { [ ( */
+ if(( c1!=')'&&
+ c1!=']'&&
+ c1!='}'&&
+ c1!='.'&&
+ c1!=','&&
+ c1!='!'&&
+ c1!='?'&&
+ c1!='/'&&
+ c1!=':'&&
+ c1!=';')&&
+ ((prev0=='\n')|| (prev0==' ')|| /* ignored new line */
+ (prev0&0x80)) /* X0208 - ASCII */
+ ) {
+ line = 1;
+ return '\n';/* add one new line before this character */
+ }
+ return 1; /* default no fold in ASCII */
} else {
- if(c2=='!') {
- if(c1=='"') return 1; /* ¡¢ */
- if(c1=='#') return 1; /* ¡£ */
- if(c1=='$') return 1; /* ¡¤ */
- if(c1=='%') return 1; /* ¡¥ */
- if(c1=='\'') return 1; /* ¡Ü */
- if(c1=='(') return 1; /* ¡¨ */
- if(c1==')') return 1; /* ¡© */
- if(c1=='*') return 1; /* ¡ª */
- if(c1=='+') return 1; /* ¡« */
- if(c1==',') return 1; /* ¡¬ */
- }
- line = 2;
- return '\n'; /* add one new line before this character */
+ if(c2=='!') {
+ if(c1=='"') return 1; /* ¡¢ */
+ if(c1=='#') return 1; /* ¡£ */
+ if(c1=='$') return 1; /* ¡¤ */
+ if(c1=='%') return 1; /* ¡¥ */
+ if(c1=='\'') return 1; /* ¡Ü */
+ if(c1=='(') return 1; /* ¡¨ */
+ if(c1==')') return 1; /* ¡© */
+ if(c1=='*') return 1; /* ¡ª */
+ if(c1=='+') return 1; /* ¡« */
+ if(c1==',') return 1; /* ¡¬ */
+ }
+ line = 2;
+ return '\n'; /* add one new line before this character */
}
}
-static int
+int
pre_convert(int c1, int c2)
{
- if(c2) c1 &= 0x7f;
- c1_return = c1;
- if(c2==SEOF) return c2;
- c2 &= 0x7f;
- if(rot_f) {
- if(c2) {
- c1 = rot47(c1);
- c2 = rot47(c2);
- } else {
- if (!(c1 & 0x80))
- c1 = rot13(c1);
- }
- c1_return = c1;
- }
- /* JISX0208 Alphabet */
- if (alpha_f && c2 == 0x23 ) return 0;
- /* JISX0208 Kigou */
- if (alpha_f && c2 == 0x21 ) {
- if(0x20<c1 && c1<0x7f && fv[c1-0x20]) {
- c1_return = fv[c1-0x20];
- return 0;
- }
- }
- return c2;
+ if(c2) c1 &= 0x7f;
+ c1_return = c1;
+ if(c2==EOF) return c2;
+ c2 &= 0x7f;
+ if(rot_f) {
+ if(c2) {
+ c1 = rot47(c1);
+ c2 = rot47(c2);
+ } else {
+ if(!(c1 & 0x80))
+ c1 = rot13(c1);
+ }
+ c1_return = c1;
+ }
+ /* JISX0208 Alphabet */
+ if(alpha_f && c2 == 0x23 ) return 0;
+ /* JISX0208 Kigou */
+ if(alpha_f && c2 == 0x21 ) {
+ if(0x21==c1) {
+ if(alpha_f&0x2) {
+ c1_return = ' ';
+ return 0;
+ } else if(alpha_f&0x4) {
+ c1_return = ' ';
+ return DOUBLE_SPACE;
+ } else {
+ return c2;
+ }
+ } else if(0x20<c1 && c1<0x7f && fv[c1-0x20]) {
+ c1_return = fv[c1-0x20];
+ return 0;
+ }
+ }
+ return c2;
}
+#ifdef STRICT_MIME
+/* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
-/* This ONLY converts =?ISO-2022-JP?B?HOGEHOGE?= to adequate form */
-/* Quickly programmed by I.Ichikawa (( )) 1994, Dec. */
-/* You may freely use this, unless you pretend you build this */
-
-static unsigned char *mime_pattern[] = {
- (unsigned char *)"=?ISO-8859-1?Q?",
- (unsigned char *)"=?ISO-2022-JP?B?",
+unsigned char *mime_pattern[] = {
+ (unsigned char *)"\075?ISO-8859-1?Q?",
+ (unsigned char *)"\075?ISO-2022-JP?B?",
+ (unsigned char *)"\075?ISO-2022-JP?Q?",
+ (unsigned char *)"\075?JAPANESE_EUC?B?",
+ (unsigned char *)"\075?SHIFT_JIS?B?",
NULL
};
-static int mime_encode[] = {
- 'Q',
- 'B',
+int mime_encode[] = {
+ 'Q', 'B', 'Q',
0
};
+#endif
-static int iso8859_f_save;
+#define MAXRECOVER 20
+int iso8859_f_save;
-static int
+#ifdef STRICT_MIME
+
+#define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
+/* I don't trust portablity of toupper */
+
+int
mime_begin(SFILE *f)
{
- int c1=0;
+ int c1;
int i,j,k;
unsigned char *p,*q;
+ int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
mime_mode = FALSE;
/* =? has been checked */
j = 0;
p = mime_pattern[j];
+ r[0]='='; r[1]='?';
+
+ for(i=2;p[i]>' ';i++) { /* start at =? */
+ if( ((r[i] = c1 = sgetc(f))==EOF) || nkf_toupper(c1) != p[i] ) {
+ /* pattern fails, try next one */
+ q = p;
+ while (p = mime_pattern[++j]) {
+ for(k=2;k<i;k++) /* assume length(p) > i */
+ if(p[k]!=q[k]) break;
+ if(k==i && nkf_toupper(c1)==p[k]) break;
+ }
+ if(p) continue; /* found next one, continue */
+ /* all fails, output from recovery buffer */
+ sungetc(c1,f);
+ for(j=0;j<i;j++) {
+ (*oconv)(0,r[j]);
+ }
+ return c1;
+ }
+ }
+ mime_mode = mime_encode[j];
+ iso8859_f_save = iso8859_f;
+ if(j==0) {
+ iso8859_f = TRUE;
+ }
+ if(mime_mode=='B') {
+ mimebuf_f = unbuf_f;
+ if(!unbuf_f) {
+ /* do MIME integrity check */
+ return mime_integrity(f,mime_pattern[j]);
+ }
+ }
+ mimebuf_f = TRUE;
+ return c1;
+}
- for(i=2;p[i]>' ';i++) {
- if( ((c1 = sgetc(f))==SEOF) || c1 != p[i] ) {
- q = p;
-/* while (p = mime_pattern[++j]) { */
- while ((p = mime_pattern[++j])!=NULL) {
- for(k=2;k<i;k++) /* assume length(p) > i */
- if(p[k]!=q[k]) break; if(k==i && c1==p[k]) break; }
- if(p) continue; sungetc(c1,f); for(j=0;j<i;j++) {
- (*oconv)(0,q[j]); } return c1; } } iso8859_f_save
- = iso8859_f; if(j==0) { iso8859_f = TRUE; }
- mime_mode = mime_encode[j]; return c1; }
+#define mime_getc0(f) (mimebuf_f?sgetc(f):Fifo(mime_input++))
+#define mime_ungetc0(c,f) (mimebuf_f?sungetc(c,f):mime_input--)
-static int
+#else
+int
+mime_begin(SFILE *f)
+{
+ int c1;
+ int i,j;
+ int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
+
+ mime_mode = FALSE;
+ /* =? has been checked */
+ j = 0;
+ r[0]='='; r[1]='?';
+ for(i=2;i<MAXRECOVER;i++) { /* start at =? */
+ /* We accept any charcter type even if it is breaked by new lines */
+ if( (r[i] = c1 = sgetc(f))==EOF) break;
+ if(c1=='=') break;
+ if(c1<' '&& c1!='\r' && c1!='\n') break;
+ if(c1=='?') {
+ i++;
+ if(!(i<MAXRECOVER) || (r[i] = c1 = sgetc(f))==EOF) break;
+ if(c1=='b'||c1=='B') {
+ mime_mode = 'B';
+ } else if(c1=='q'||c1=='Q') {
+ mime_mode = 'Q';
+ } else {
+ break;
+ }
+ i++;
+ if(!(i<MAXRECOVER) || (r[i] = c1 = sgetc(f))==EOF) break;
+ if(c1=='?') {
+ break;
+ } else {
+ mime_mode = FALSE;
+ }
+ break;
+ }
+ }
+ if(!mime_mode || c1==EOF || i==MAXRECOVER) {
+ sungetc(c1,f);
+ for(j=0;j<i;j++) {
+ (*oconv)(0,r[j]);
+ }
+ return c1;
+ }
+ iso8859_f_save = iso8859_f;
+ /* do no MIME integrity check */
+ return c1; /* used only for checking EOF */
+}
+
+#define mime_getc0(f) sgetc(f)
+#define mime_ungetc0(c,f) sungetc(c,f)
+
+#endif
+
+int
mime_getc(SFILE *f)
{
int c1, c2, c3, c4, cc;
- int t1, t2, t3, t4, mode;
+ int t1, t2, t3, t4, mode, exit_mode;
- if(mime_top != mime_last) { /* Something is in FIFO */
- return Fifo(mime_top++);
+ if(mime_top != mime_last) { /* Something is in FIFO */
+ return Fifo(mime_top++);
}
+ if(mimebuf_f == FIXED_MIME)
+ exit_mode = mime_mode;
+ else
+ exit_mode = FALSE;
if(mime_mode == 'Q') {
- if ((c1 = sgetc(f)) == SEOF) return (SEOF);
- if(c1!='=' && c1!='?') return c1;
- if ((c2 = sgetc(f)) == SEOF) return (SEOF);
- if(c1=='?'&&c2=='=') {
- /* end Q encoding */
- iso8859_f = iso8859_f_save;
- mime_mode = 0; return sgetc(f);
- }
- if ((c3 = sgetc(f)) == SEOF) return (SEOF);
-#define hex(c) (('0'<=c&&c<='9')?(c-'0'):\
+ if((c1 = mime_getc0(f)) == EOF) return (EOF);
+ if(c1=='_') return ' ';
+ if(c1!='=' && c1!='?')
+ return c1;
+ mime_mode = exit_mode; /* prepare for quit */
+ if(c1<=' ') return c1;
+ if((c2 = mime_getc0(f)) == EOF) return (EOF);
+ if(c2<=' ') return c2;
+ if(c1=='?'&&c2=='=') {
+ /* end Q encoding */
+ input_mode = exit_mode;
+ iso8859_f = iso8859_f_save;
+ return sgetc(f);
+ }
+ if(c1=='?') {
+ mime_mode = 'Q'; /* still in MIME */
+ mime_ungetc0(c2,f);
+ return c1;
+ }
+ if((c3 = mime_getc0(f)) == EOF) return (EOF);
+ if(c2<=' ') return c2;
+ mime_mode = 'Q'; /* still in MIME */
+#define hex(c) (('0'<=c&&c<='9')?(c-'0'):\
('A'<=c&&c<='F')?(c-'A'+10):('a'<=c&&c<='f')?(c-'a'+10):0)
- return ((hex(c2)<<4) + hex(c3));
+ return ((hex(c2)<<4) + hex(c3));
}
- if(mime_mode != 'B')
- return sgetc(f);
+ if(mime_mode != 'B') {
+ mime_mode = FALSE;
+ return sgetc(f);
+ }
/* Base64 encoding */
- /*
- MIME allows line break in the middle of
- Base64, but we are very pessimistic in decoding
- in unbuf mode because MIME encoded code may broken by
- less or editor's control sequence (such as ESC-[-K in unbuffered
- mode. ignore incomplete MIME.
+ /*
+ MIME allows line break in the middle of
+ Base64, but we are very pessimistic in decoding
+ in unbuf mode because MIME encoded code may broken by
+ less or editor's control sequence (such as ESC-[-K in unbuffered
+ mode. ignore incomplete MIME.
*/
mode = mime_mode;
- mime_mode = FALSE; /* prepare for quit */
+ mime_mode = exit_mode; /* prepare for quit */
- while ((c1 = sgetc(f))<=' ') {
- if(c1==SEOF)
- return (SEOF);
- if(unbuf_f) {
- input_mode = ASCII; return c1;
- }
+ while ((c1 = mime_getc0(f))<=' ') {
+ if(c1==EOF)
+ return (EOF);
}
- while ((c2 = sgetc(f))<=' ') {
- if(c2==SEOF)
- return (SEOF);
- if(unbuf_f) {
- input_mode = ASCII; return c2;
- }
+ if((c2 = mime_getc0(f))<=' ') {
+ if(c2==EOF)
+ return (EOF);
+ if(mimebuf_f!=FIXED_MIME) input_mode = ASCII;
+ return c2;
}
- if ((c1 == '?') && (c2 == '=')) return sgetc(f);
- while ((c3 = sgetc(f))<=' ') {
- if(c3==SEOF)
- return (SEOF);
- if(unbuf_f) {
- input_mode = ASCII; return c3;
- }
+ if((c1 == '?') && (c2 == '=')) {
+ input_mode = ASCII;
+ while((c1 = sgetc(f))==' ' /* || c1=='\n' || c1=='\r' */);
+ return c1;
}
- while ((c4 = sgetc(f))<=' ') {
- if(c4==SEOF)
- return (SEOF);
- if(unbuf_f) {
- input_mode = ASCII; return c4;
- }
+ if((c3 = mime_getc0(f))<=' ') {
+ if(c3==EOF)
+ return (EOF);
+ if(mimebuf_f!=FIXED_MIME) input_mode = ASCII;
+ return c3;
+ }
+ if((c4 = mime_getc0(f))<=' ') {
+ if(c4==EOF)
+ return (EOF);
+ if(mimebuf_f!=FIXED_MIME) input_mode = ASCII;
+ return c4;
}
mime_mode = mode; /* still in MIME sigh... */
t3 = 0x3f & base64decode(c3);
t4 = 0x3f & base64decode(c4);
cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
- if (c2 != '=') {
- Fifo(mime_last++) = cc;
- cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
- if (c3 != '=') {
- Fifo(mime_last++) = cc;
- cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
- if (c4 != '=')
- Fifo(mime_last++) = cc;
- }
+ if(c2 != '=') {
+ Fifo(mime_last++) = cc;
+ cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
+ if(c3 != '=') {
+ Fifo(mime_last++) = cc;
+ cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
+ if(c4 != '=')
+ Fifo(mime_last++) = cc;
+ }
} else {
- return c1;
+ return c1;
}
return Fifo(mime_top++);
}
-static int
-mime_ungetc(unsigned int c)
+int
+mime_ungetc(c)
+unsigned int c;
{
Fifo(mime_last++) = c;
return c;
}
-static int
-base64decode(int c)
+#ifdef STRICT_MIME
+int
+mime_integrity(SFILE *f, unsigned char *p)
{
- int i;
- if (c > '@')
- {
- if (c < '[')
- i = c - 'A'; /* A..Z 0-25 */
- else
- i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
+ int c,d;
+ unsigned int q;
+ /* In buffered mode, read until =? or NL or buffer full
+ */
+ mime_input = mime_top;
+ mime_last = mime_top;
+ while(*p) Fifo(mime_input++) = *p++;
+ d = 0;
+ q = mime_input;
+ while((c=sgetc(f))!=EOF) {
+ if(((mime_input-mime_top)&MIME_BUF_MASK)==0) break;
+ if(c=='=' && d=='?') {
+ /* checked. skip header, start decode */
+ Fifo(mime_input++) = c;
+ mime_input = q;
+ return 1;
+ }
+ if(!( (c=='+'||c=='/'|| c=='=' || c=='?' ||
+ ('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))))
+ break;
+ /* Should we check length mod 4? */
+ Fifo(mime_input++) = c;
+ d=c;
}
- else if (c > '/')
- i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
- else if (c == '+')
- i = '>' /* 62 */ ; /* + 62 */
- else
- i = '?' /* 63 */ ; /* / 63 */
+ /* In case of Incomplete MIME, no MIME decode */
+ Fifo(mime_input++) = c;
+ mime_last = mime_input; /* point undecoded buffer */
+ mime_mode = 1; /* no decode on Fifo last in mime_getc */
+ return 1;
+}
+#endif
+int
+base64decode(int c)
+{
+ int i;
+ if(c > '@')
+ if(c < '[')
+ i = c - 'A'; /* A..Z 0-25 */
+ else
+ i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
+ else if(c > '/')
+ i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
+ else if(c == '+')
+ i = '>' /* 62 */ ; /* + 62 */
+ else
+ i = '?' /* 63 */ ; /* / 63 */
return (i);
}
+void
+reinit()
+{
+ unbuf_f = FALSE;
+ estab_f = FALSE;
+ nop_f = FALSE;
+ binmode_f = TRUE;
+ rot_f = FALSE;
+ input_f = FALSE;
+ alpha_f = FALSE;
+ mime_f = TRUE;
+ mimebuf_f = FALSE;
+ broken_f = FALSE;
+ iso8859_f = FALSE;
+ x0201_f = TRUE;
+ x0201_f = NO_X0201;
+ fold_f = FALSE;
+ kanji_intro = DEFAULT_J;
+ ascii_intro = DEFAULT_R;
+ oconv = DEFAULT_CONV;
+ output_mode = ASCII;
+ input_mode = ASCII;
+ shift_mode = FALSE;
+ mime_mode = FALSE;
+ file_out = FALSE;
+ add_cr = FALSE;
+ del_cr = FALSE;
+}
+
+#if 0
+#ifndef PERL_XS
+int
+usage()
+{
+ fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
+ fprintf(stderr,"Flags:\n");
+ fprintf(stderr,"b,u Output is bufferred (DEFAULT),Output is unbufferred\n");
+#ifdef DEFAULT_CODE_SJIS
+ fprintf(stderr,"j,s,e Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC)\n");
+#endif
+#ifdef DEFAULT_CODE_JIS
+ fprintf(stderr,"j,s,e Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC)\n");
+#endif
+#ifdef DEFAULT_CODE_EUC
+ fprintf(stderr,"j,s,e Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT)\n");
+#endif
+ fprintf(stderr,"J,S,E Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC)\n");
+ fprintf(stderr,"t no conversion\n");
+ fprintf(stderr,"i_ Output sequence to designate JIS-kanji (DEFAULT B)\n");
+ fprintf(stderr,"o_ Output sequence to designate ASCII (DEFAULT B)\n");
+ fprintf(stderr,"r {de/en}crypt ROT13/47\n");
+ fprintf(stderr,"v Show this usage\n");
+ fprintf(stderr,"m[BQ0] MIME decode [B:base64,Q:quoted,0:no decode]\n");
+ fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
+ fprintf(stderr,"f Folding: -f60 or -f\n");
+ fprintf(stderr,"Z[0-2] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces\n");
+ fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
+ fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
+#ifdef MSDOS
+ fprintf(stderr,"T Text mode output\n");
+#endif
+ fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
+ fprintf(stderr,"d,c Delete \\r in line feed, Add \\r in line feed\n");
+ fprintf(stderr,"Network Kanji Filter Version %s (%s) "
+#if defined(MSDOS) && !defined(_Windows)
+ "for DOS"
+#endif
+#if !defined(__WIN32__) && defined(_Windows)
+ "for Win16"
+#endif
+#if defined(__WIN32__) && defined(_Windows)
+ "for Win32"
+#endif
+#ifdef __OS2__
+ "for OS/2"
+#endif
+ ,Version,Patchlevel);
+ fprintf(stderr,"\n%s\n",CopyRight);
+ return 0;
+}
+#endif
+#endif
+
/**
** ¥Ñ¥Ã¥ÁÀ©ºî¼Ô
** void@merope.pleiades.or.jp (Kusakabe Youichi)
** kiri@pulser.win.or.jp (Tetsuaki Kiriyama)
** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp>
** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe)
- ** kono@csl.sony.co.jp (Shinji Kono)
+ ** kono@ie.u-ryukyu.ac.jp (Shinji Kono)
+ ** GHG00637@nifty-serve.or.jp (COW)
**
** ºÇ½ª¹¹¿·Æü
- ** 1994.12.21
+ ** 1998.11.7
**/
/* end */
#endif /* JAPANESE */
+