X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=nkf.c;h=a8bd1e5daa42b352a109e6e77d716be5e84865ec;hb=b7168bf6e811efa0f1333bcfb0defae316629e50;hp=427acfae693e34db80c8f8af97f8bbff8a83abfd;hpb=f90d6b46ae254dfb0250d38228fbcd9334127ab2;p=nkf%2Fnkf.git diff --git a/nkf.c b/nkf.c index 427acfa..a8bd1e5 100644 --- a/nkf.c +++ b/nkf.c @@ -42,11 +42,11 @@ #include "config.h" static char *CopyRight = - "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002 Kono, Furukawa"; + "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2003 Kono, Furukawa"; static char *Version = "2.0"; static char *Patchlevel = - "1/0209/Shinji Kono"; + "3/0301/Shinji Kono"; /* ** @@ -95,8 +95,11 @@ static char *Patchlevel = ** **/ -#if (defined(__TURBOC__) || defined(LSI_C)) && !defined(MSDOS) +#if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C)) && !defined(MSDOS) #define MSDOS +#if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__) +#define __WIN32__ +#endif #endif #ifdef PERL_XS @@ -125,7 +128,7 @@ static char *Patchlevel = #define setbinmode(fp) #endif -#ifdef _IOFBF /* SysV and MSDOS */ +#ifdef _IOFBF /* SysV and MSDOS, Windows */ #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size) #else /* BSD */ #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size) @@ -134,15 +137,25 @@ static char *Patchlevel = /*Borland C++ 4.5 EasyWin*/ #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */ #define EASYWIN +#ifndef __WIN16__ +#define __WIN16__ +#endif #include #endif #ifdef OVERWRITE /* added by satoru@isoternet.org */ #include -#ifndef MSDOS +#ifndef MSDOS /* UNIX, OS/2 */ #include #include +#else +#if defined(_MSC_VER) /* VC++ */ +#include +#elif defined(__TURBOC__) /* BCC */ +#include +#elif defined(LSI_C) /* LSI C */ +#endif #endif #endif @@ -245,6 +258,17 @@ extern POINT _BufferSize; #define STATIC #endif +struct input_code{ + char *name; + int stat; + int score; + int index; + int buf[3]; + void (*status_func)PROTO((struct input_code *, int)); + int (*iconv_func)PROTO((int c2, int c1, int c0)); + int _file_stat; +}; + STATIC int noconvert PROTO((FILE *f)); STATIC int kanji_convert PROTO((FILE *f)); STATIC int h_conv PROTO((FILE *f,int c2,int c1)); @@ -265,6 +289,7 @@ STATIC void w_oconv PROTO((int c2,int c1)); STATIC void w_oconv16 PROTO((int c2,int c1)); #endif STATIC void e_oconv PROTO((int c2,int c1)); +STATIC void e2s_conv PROTO((int c2, int c1, int *p2, int *p1)); STATIC void s_oconv PROTO((int c2,int c1)); STATIC void j_oconv PROTO((int c2,int c1)); STATIC void fold_conv PROTO((int c2,int c1)); @@ -277,7 +302,7 @@ STATIC void iso2022jp_check_conv PROTO((int c2,int c1)); STATIC void no_connection PROTO((int c2,int c1)); STATIC int no_connection2 PROTO((int c2,int c1,int c0)); -STATIC int code_score PROTO((int c2,int c1,int s)); +STATIC void code_score PROTO((struct input_code *ptr)); STATIC void code_status PROTO((int c)); STATIC void std_putc PROTO((int c)); @@ -346,10 +371,11 @@ static int x0201_f = NO_X0201; /* Assume NO JISX0201 */ static int iso2022jp_f = FALSE; /* convert ISO-2022-JP */ #ifdef UTF8_OUTPUT_ENABLE static int w_oconv16_begin_f= 0; /* utf-16 header */ +static int w_oconv16_LE = 0; /* utf-16 little endian */ #endif -#ifdef CAP_URL_OPTION +#ifdef INPUT_OPTION static int cap_f = FALSE; static int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */ static int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc; @@ -361,6 +387,12 @@ static int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */ static int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc; STATIC int url_getc PROTO((FILE *f)); STATIC int url_ungetc PROTO((int c,FILE *f)); + +static int numchar_f = FALSE; +static int (*i_ngetc)PROTO((FILE *)) = std_getc; /* input of ugetc */ +static int (*i_nungetc)PROTO((int c ,FILE *f)) = std_ungetc; +STATIC int numchar_getc PROTO((FILE *f)); +STATIC int numchar_ungetc PROTO((int c,FILE *f)); #endif #ifdef CHECK_OPTION @@ -370,21 +402,30 @@ static int debug_f = FALSE; STATIC void debug PROTO((char *str)); #endif -static int e_stat = 0; -static int e_score = 0; -static int e_buf[2]; -static int s_stat = 0; -static int s_score = 0; -static int s_buf[2]; +#ifdef SHIFTJIS_CP932 +STATIC int cp932_f = FALSE; +#define CP932_TABLE_BEGIN (0xfa) +#define CP932_TABLE_END (0xfc) + +#endif /* SHIFTJIS_CP932 */ + +STATIC void e_status PROTO((struct input_code *, int)); +STATIC void s_status PROTO((struct input_code *, int)); + #ifdef UTF8_INPUT_ENABLE -static int w_stat = 0; -static int w_score = 0; -static int w_buf[2]; +STATIC void w_status PROTO((struct input_code *, int)); +STATIC void w16_status PROTO((struct input_code *, int)); static int utf16_mode = UTF16_INPUT; -#else -static int w_stat = -1; #endif +struct input_code input_code_list[] = { + {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0}, + {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0}, + {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0}, + {"UTF-16", 0, 0, 0, {0, 0, 0}, w16_status, w_iconv16, 0}, + {0} +}; + static int mimeout_mode = 0; static int base64_count = 0; @@ -795,14 +836,18 @@ struct { #ifdef OVERWRITE {"overwrite", ""}, #endif -#ifdef CAP_URL_OPTION +#ifdef INPUT_OPTION {"cap-input", ""}, {"url-input", ""}, + {"numchar-input", ""}, #endif #ifdef CHECK_OPTION {"no-output", ""}, {"debug", ""}, #endif +#ifdef SHIFTJIS_CP932 + {"cp932", ""}, +#endif }; static int option_mode; @@ -841,7 +886,7 @@ options(cp) continue; } #endif -#ifdef CAP_URL_OPTION +#ifdef INPUT_OPTION if (strcmp(long_option[i].name, "cap-input") == 0){ cap_f = TRUE; continue; @@ -850,6 +895,10 @@ options(cp) url_f = TRUE; continue; } + if (strcmp(long_option[i].name, "numchar-input") == 0){ + numchar_f = TRUE; + continue; + } #endif #ifdef CHECK_OPTION if (strcmp(long_option[i].name, "no-output") == 0){ @@ -861,6 +910,12 @@ options(cp) continue; } #endif +#ifdef SHIFTJIS_CP932 + if (strcmp(long_option[i].name, "cp932") == 0){ + cp932_f = TRUE; + continue; + } +#endif } continue; case 'b': /* buffered mode */ @@ -928,7 +983,16 @@ options(cp) output_conv = w_oconv16; cp+=2; if (cp[0]=='L') { w_oconv16_begin_f=2; cp++; - } + w_oconv16_LE = 1; + if (cp[0] == '0'){ + w_oconv16_begin_f=1; cp++; + } + } else if (cp[0] == 'B') { + w_oconv16_begin_f=2; cp++; + if (cp[0] == '0'){ + w_oconv16_begin_f=1; cp++; + } + } } else output_conv = w_oconv; continue; @@ -1106,9 +1170,15 @@ void set_iconv(f, iconv_func) #endif } -#define SCORE_DEPEND (1) /* 機種依存文字 */ -#define SCORE_NO_EXIST (SCORE_DEPEND << 1) /* 存在しない文字 */ -#define SCORE_ERROR (SCORE_NO_EXIST << 1) /* エラー */ +#define SCORE_KANA (1) /* いわゆる半角カナ */ +#define SCORE_DEPEND (SCORE_KANA << 1) /* 機種依存文字 */ +#ifdef SHIFTJIS_CP932 +#define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932 による読み換え */ +#define SCORE_NO_EXIST (SCORE_CP932 << 1) /* 存在しない文字 */ +#else +#define SCORE_NO_EXIST (SCORE_DEPEND << 1) /* 存在しない文字 */ +#endif +#define SCORE_ERROR (SCORE_NO_EXIST << 1) /* エラー */ int score_table_A0[] = { 0, 0, 0, 0, 0, 0, 0, 0, @@ -1123,163 +1193,296 @@ int score_table_F0[] = { SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR, }; -int code_score(c2, c1, s) - int c2, c1, s; +void set_code_score(ptr, score) + struct input_code *ptr; + int score; { + ptr->score |= score; +} + +void code_score(ptr) + struct input_code *ptr; +{ + int c2 = ptr->buf[0]; + int c1 = ptr->buf[1]; if (c2 < 0){ - s |= SCORE_ERROR; + set_code_score(ptr, SCORE_ERROR); }else if ((c2 & 0xf0) == 0xa0){ - s |= score_table_A0[c2 & 0x0f]; + set_code_score(ptr, score_table_A0[c2 & 0x0f]); }else if ((c2 & 0xf0) == 0xf0){ - s |= score_table_F0[c2 & 0x0f]; + set_code_score(ptr, score_table_F0[c2 & 0x0f]); + }else if (c2 == SSO){ + set_code_score(ptr, SCORE_KANA); } #ifdef UTF8_OUTPUT_ENABLE else if (!e2w_conv(c2, c1)){ - s |= SCORE_NO_EXIST; + set_code_score(ptr, SCORE_NO_EXIST); } #endif - return s; } -void -code_status(c) +void status_disable(ptr) +struct input_code *ptr; +{ + ptr->stat = -1; + ptr->buf[0] = -1; + code_score(ptr); + if (iconv == ptr->iconv_func) set_iconv(FALSE, 0); +} + +void status_push_ch(ptr, c) + struct input_code *ptr; + int c; +{ + ptr->buf[ptr->index++] = c; +} + +void status_reset(ptr) + struct input_code *ptr; +{ + ptr->stat = 0; + ptr->score = 0; + ptr->index = 0; +} + +void status_reinit(ptr) + struct input_code *ptr; +{ + status_reset(ptr); + ptr->_file_stat = 0; +} + +void status_check(ptr, c) + struct input_code *ptr; + int c; +{ + if (c <= DEL && estab_f){ + status_reset(ptr); + } +} + +void s_status(ptr, c) + struct input_code *ptr; int c; { - switch (s_stat){ + switch(ptr->stat){ case -1: - if (c <= DEL && estab_f){ - s_stat = 0; - s_score = 0; - } + status_check(ptr, c); break; case 0: - if (c <= DEL - || (0xa1 <= c && c <= 0xef && iconv == s_iconv)){ + if (c <= DEL){ break; - }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){ - s_stat = 1; - s_buf[1] = c; + }else if (0xa1 <= c && c <= 0xdf){ + status_push_ch(ptr, SSO); + status_push_ch(ptr, c); + code_score(ptr); + status_reset(ptr); + }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){ + ptr->stat = 1; + status_push_ch(ptr, c); +#ifdef SHIFTJIS_CP932 + }else if (cp932_f + && CP932_TABLE_BEGIN <= c && c <= CP932_TABLE_END){ + ptr->stat = 2; + status_push_ch(ptr, c); +#endif /* SHIFTJIS_CP932 */ }else{ - s_stat = -1; - s_score = code_score(-1, 0, s_score); - if (iconv == s_iconv) set_iconv(FALSE, 0); + status_disable(ptr); } break; case 1: - if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfd)){ - s_stat = 0; - s_buf[0] = c; - s2e_conv(s_buf[1], s_buf[0], &s_buf[1], &s_buf[0]); - s_score = code_score(s_buf[1], s_buf[0], s_score); + if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){ + status_push_ch(ptr, c); + s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]); + code_score(ptr); + status_reset(ptr); }else{ - s_stat = -1; - s_score = code_score(-1, 0, s_score); - if (iconv == s_iconv) set_iconv(FALSE, 0); + status_disable(ptr); + } + break; +#ifdef SHIFTJIS_CP932 + case 2: + if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){ + status_push_ch(ptr, c); + if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){ + set_code_score(ptr, SCORE_CP932); + status_reset(ptr); + break; + } } + status_disable(ptr); break; +#endif /* SHIFTJIS_CP932 */ } - switch (e_stat){ +} + +void e_status(ptr, c) + struct input_code *ptr; + int c; +{ + switch (ptr->stat){ case -1: - if (c <= DEL && estab_f){ - e_stat = 0; - e_score = 0; - } + status_check(ptr, c); break; case 0: if (c <= DEL){ break; }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){ - e_stat = 1; - e_buf[1] = c; + ptr->stat = 1; + status_push_ch(ptr, c); }else{ - e_stat = -1; - e_score = code_score(-1, 0, e_score); - if (iconv == e_iconv) set_iconv(FALSE, 0); + status_disable(ptr); } break; case 1: if (0xa1 <= c && c <= 0xfe){ - e_stat = 0; - e_buf[0] = c; - e_score = code_score(e_buf[1], e_buf[0], e_score); + status_push_ch(ptr, c); + code_score(ptr); + status_reset(ptr); }else{ - e_stat = -1; - if (iconv == e_iconv) set_iconv(FALSE, 0); - e_score = code_score(-1, 0, e_score); + status_disable(ptr); } break; } +} + #ifdef UTF8_INPUT_ENABLE - switch (w_stat){ +void w16_status(ptr, c) + struct input_code *ptr; + int c; +{ + switch (ptr->stat){ case -1: - if (c <= DEL && estab_f){ - w_stat = 0; - w_score = 0; + break; + case 0: + if (ptr->_file_stat == 0){ + if (c == 0xfe || c == 0xff){ + ptr->stat = c; + status_push_ch(ptr, c); + ptr->_file_stat = 1; + }else{ + status_disable(ptr); + ptr->_file_stat = -1; + } + }else if (ptr->_file_stat > 0){ + ptr->stat = 1; + status_push_ch(ptr, c); + }else if (ptr->_file_stat < 0){ + status_disable(ptr); + } + break; + + case 1: + if (c == EOF){ + status_disable(ptr); + ptr->_file_stat = -1; + }else{ + status_push_ch(ptr, c); + status_reset(ptr); + } + break; + + case 0xfe: + case 0xff: + if (ptr->stat != c && (c == 0xfe || c == 0xff)){ + status_push_ch(ptr, c); + status_reset(ptr); + }else{ + status_disable(ptr); + ptr->_file_stat = -1; } break; + } +} + +void w_status(ptr, c) + struct input_code *ptr; + int c; +{ + switch (ptr->stat){ + case -1: + status_check(ptr, c); + break; case 0: if (c <= DEL){ break; }else if (0xc0 <= c && c <= 0xdf){ - w_buf[2] = 0; - w_stat = 1; - w_buf[1] = c; + ptr->stat = 1; + status_push_ch(ptr, c); }else if (0xe0 <= c && c <= 0xef){ - w_stat = 2; - w_buf[2] = c; + ptr->stat = 2; + status_push_ch(ptr, c); }else{ - w_stat = -1; - w_score = code_score(-1, 0, w_score); - if (iconv == w_iconv) set_iconv(FALSE, 0); + status_disable(ptr); } break; case 1: case 2: if (0x80 <= c && c <= 0xbf){ - --w_stat; - w_buf[w_stat] = c; - if (w_stat == 0){ - if (w_buf[2]){ - w2e_conv(w_buf[2], w_buf[1], w_buf[0], - &w_buf[1], &w_buf[0]); - }else{ - w2e_conv(w_buf[1], w_buf[0], 0, - &w_buf[1], &w_buf[0]); - } - w_score = code_score(w_buf[1], w_buf[0], w_score); + status_push_ch(ptr, c); + if (ptr->index > ptr->stat){ + w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2], + &ptr->buf[0], &ptr->buf[1]); + code_score(ptr); + status_reset(ptr); } }else{ - w_stat = -1; - w_score = code_score(-1, 0, w_score); - if (iconv == w_iconv) set_iconv(FALSE, 0); + status_disable(ptr); } break; } - - if (s_stat < 0 && e_stat < 0 && w_stat == 0){ - set_iconv(TRUE, w_iconv); - } +} #endif - if (s_stat == 0 && e_stat < 0 && w_stat < 0){ - set_iconv(TRUE, s_iconv); - } - if (s_stat < 0 && e_stat == 0 && w_stat < 0){ - set_iconv(TRUE, e_iconv); + +void +code_status(c) + int c; +{ + int action_flag = 1; + struct input_code *result = 0; + struct input_code *p = input_code_list; + while (p->name){ + (p->status_func)(p, c); + if (p->stat > 0){ + action_flag = 0; + }else if(p->stat == 0){ + if (result){ + action_flag = 0; + }else{ + result = p; + } + } + ++p; } - if (s_stat < 0 && e_stat < 0 && w_stat < 0){ - if (c <= DEL){ - s_stat = e_stat = 0; -#ifdef UTF8_INPUT_ENABLE - w_stat = 0; -#endif + + if (action_flag){ + if (result && !estab_f){ + set_iconv(TRUE, result->iconv_func); + }else if (c <= DEL){ + struct input_code *ptr = input_code_list; + while (ptr->name){ + status_reset(ptr); + ++ptr; + } } } } +#ifdef PERL_XS +#define STD_GC_BUFSIZE (256) +int std_gc_buf[STD_GC_BUFSIZE]; +int std_gc_ndx; +#endif + int std_getc(f) FILE *f; { +#ifdef PERL_XS + if (std_gc_ndx){ + return std_gc_buf[--std_gc_ndx]; + } +#endif return getc(f); } @@ -1288,6 +1491,13 @@ std_ungetc(c,f) int c; FILE *f; { +#ifdef PERL_XS + if (std_gc_ndx == STD_GC_BUFSIZE){ + return EOF; + } + std_gc_buf[std_gc_ndx++] = c; + return c; +#endif return ungetc(c,f); } @@ -1356,7 +1566,7 @@ module_connection() i_getc = std_getc; /* input redicrection */ -#ifdef CAP_URL_OPTION +#ifdef INPUT_OPTION if (cap_f){ i_cgetc = i_getc; i_getc = cap_getc; i_cungetc = i_ungetc; i_ungetc= cap_ungetc; @@ -1365,6 +1575,10 @@ module_connection() i_ugetc = i_getc; i_getc = url_getc; i_uungetc = i_ungetc; i_ungetc= url_ungetc; } + if (numchar_f){ + i_ngetc = i_getc; i_getc = numchar_getc; + i_nungetc = i_ungetc; i_ungetc= numchar_ungetc; + } #endif if (mime_f && mimebuf_f==FIXED_MIME) { i_mgetc = i_getc; i_getc = mime_getc; @@ -1388,13 +1602,12 @@ module_connection() set_iconv(FALSE, e_iconv); } - e_stat = 0; - s_stat = 0; -#ifdef UTF8_INPUT_ENABLE - w_stat = 0; -#else - w_stat = -1; -#endif + { + struct input_code *p = input_code_list; + while (p->name){ + status_reinit(p++); + } + } } /* @@ -1705,25 +1918,15 @@ h_conv(f, c2, c1) } if (!estab_f){ - if (e_score <= s_score -#ifdef UTF8_INPUT_ENABLE - && e_score <= w_score -#endif - ){ - set_iconv(FALSE, e_iconv); - } - else if (s_score <= e_score -#ifdef UTF8_INPUT_ENABLE - && s_score <= w_score -#endif - ){ - set_iconv(FALSE, s_iconv); - } -#ifdef UTF8_INPUT_ENABLE - else{ - set_iconv(FALSE, w_iconv); + struct input_code *p = input_code_list; + struct input_code *result = p; + while (p->name){ + if (p->score < result->score){ + result = p; + } + ++p; } -#endif + set_iconv(FALSE, p->iconv_func); } @@ -1786,6 +1989,15 @@ int s2e_conv(c2, c1, p2, p1) int c2, c1; int *p2, *p1; { +#ifdef SHIFTJIS_CP932 + if (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){ + extern unsigned short shiftjis_cp932[3][189]; + c1 = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40]; + if (c1 == 0) return 1; + c2 = c1 >> 8; + c1 &= 0xff; + } +#endif /* SHIFTJIS_CP932 */ c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394); if (c1 < 0x9f) c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f); @@ -1795,7 +2007,7 @@ int s2e_conv(c2, c1, p2, p1) } if (p2) *p2 = c2; if (p1) *p1 = c1; - return (c2 << 8) | c1; + return 0; } int @@ -1808,7 +2020,8 @@ s_iconv(c2, c1, c0) } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) { /* NOP */ } else { - s2e_conv(c2, c1, &c2, &c1); + int ret = s2e_conv(c2, c1, &c2, &c1); + if (ret) return ret; } (*oconv)(c2, c1); return 0; @@ -1821,6 +2034,9 @@ e_iconv(c2, c1, c0) { if (c2 == X0201) { c1 &= 0x7f; + } else if (c2 == SSO){ + c2 = X0201; + c1 &= 0x7f; } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) { /* NOP */ } else { @@ -1870,15 +2086,55 @@ w_iconv(c2, c1, c0) return ret; } +void +w16w_conv(val, p2, p1, p0) + unsigned short val; + int *p2, *p1, *p0; +{ + if (val < 0x80){ + *p2 = val; + *p1 = 0; + *p0 = 0; + }else if (val < 0x800){ + *p2 = 0xc0 | (val >> 6); + *p1 = 0x80 | (val & 0x3f); + *p0 = 0; + }else{ + *p2 = 0xe0 | (val >> 12); + *p1 = 0x80 | ((val >> 6) & 0x3f); + *p0 = 0x80 | (val & 0x3f); + } +} + int -w_iconv16(c2, c1, c0) - int c2, c1,c0; +w16e_conv(val, p2, p1) + unsigned short val; + int *p2, *p1; { extern unsigned short * utf8_to_euc_2bytes[]; extern unsigned short ** utf8_to_euc_3bytes[]; + int c2, c1, c0; unsigned short **pp; - unsigned short val; int psize; + + w16w_conv(val, &c2, &c1, &c0); + if (c1){ + if (c0){ + pp = utf8_to_euc_3bytes[c2 - 0x80]; + psize = sizeof_utf8_to_euc_C2; + }else{ + pp = utf8_to_euc_2bytes; + psize = sizeof_utf8_to_euc_2bytes; + } + return w_iconv_common(c1, c0, pp, psize, p2, p1); + } + return val; +} + +int +w_iconv16(c2, c1, c0) + int c2, c1,c0; +{ int ret; if (c2==0376 && c1==0377){ @@ -1896,25 +2152,7 @@ w_iconv16(c2, c1, c0) (*oconv)(c2, c1); return 0; } - val = ((c2<<8)&0xff00) + c1; - if (c2 < 0x8){ - c0 = (0x80 | (c1 & 0x3f)); - c1 = (0xc0 | (val >> 6)); - pp = utf8_to_euc_2bytes; - psize = sizeof_utf8_to_euc_2bytes; - }else{ - c0 = (0x80 | (c1 & 0x3f)); - c2 = (0xe0 | (val >> 12)); - c1 = (0x80 | ((val >> 6) & 0x3f)); - if (c0 == 0) return -1; - if (0<=c2-0x80 && c2-0x80 >8); - (*o_putc)(val&0xff); + c2 = (val >> 8) & 0xff; + c1 = val & 0xff; + } + if (w_oconv16_LE){ + (*o_putc)(c1); + (*o_putc)(c2); + }else{ + (*o_putc)(c2); + (*o_putc)(c1); } } @@ -2068,7 +2317,13 @@ e_oconv(c2, c1) } } - +void +e2s_conv(c2, c1, p2, p1) + int c2, c1, *p2, *p1; +{ + if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1); + if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e); +} void s_oconv(c2, c1) @@ -2094,8 +2349,9 @@ s_oconv(c2, c1) return; /* too late to rescue this char */ } output_mode = SHIFT_JIS; - (*o_putc)((((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1))); - (*o_putc)((c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e))); + e2s_conv(c2, c1, &c2, &c1); + (*o_putc)(c2); + (*o_putc)(c1); } } @@ -2277,19 +2533,27 @@ int c2,c1; int prev0; int fold_state=0; - if (c1== '\r') { - fold_state=0; /* ignroe cr */ + if (c1== '\r' && !fold_preserve_f) { + fold_state=0; /* ignore cr */ + }else if (c1== '\n'&&f_prev=='\r' && fold_preserve_f) { + f_prev = '\n'; + fold_state=0; /* ignore cr */ } else if (c1== BS) { if (f_line>0) f_line--; fold_state = 1; } else if (c2==EOF && f_line != 0) { /* close open last line */ fold_state = '\n'; - } else if (c1=='\n') { + } else if ((c1=='\n' && !fold_preserve_f) + || ((c1=='\r'||(c1=='\n'&&f_prev!='\r')) + && fold_preserve_f)) { /* new line */ if (fold_preserve_f) { - f_line = 0; - fold_state = '\r'; - } else if (f_prev == c1) { /* duplicate newline */ + f_prev = c1; + f_line = 0; + fold_state = '\r'; + } else if ((f_prev == c1 && !fold_preserve_f) + || (f_prev == '\n' && fold_preserve_f) + ) { /* duplicate newline */ if (f_line) { f_line = 0; fold_state = '\n'; /* output two newline */ @@ -2608,6 +2872,7 @@ unsigned char *mime_pattern[] = { (unsigned char *)"\075?EUC-JP?B?", (unsigned char *)"\075?SHIFT_JIS?B?", (unsigned char *)"\075?ISO-8859-1?Q?", + (unsigned char *)"\075?ISO-8859-1?B?", (unsigned char *)"\075?ISO-2022-JP?B?", (unsigned char *)"\075?ISO-2022-JP?Q?", #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE) @@ -2617,7 +2882,7 @@ unsigned char *mime_pattern[] = { }; int mime_encode[] = { - JAPANESE_EUC, SHIFT_JIS,ISO8859_1, X0208, X0201, + JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201, #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE) UTF8, #endif @@ -2625,7 +2890,7 @@ int mime_encode[] = { }; int mime_encode_method[] = { - 'B', 'B','Q', 'B', 'Q', + 'B', 'B','Q', 'B', 'B', 'Q', #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE) 'B', #endif @@ -2807,7 +3072,7 @@ void debug(str) } #endif -#ifdef CAP_URL_OPTION +#ifdef INPUT_OPTION int hex2bin(x) int x; @@ -2833,12 +3098,12 @@ hex_getc(ch, f, g, u) return c1; } c2 = (*g)(f); - if (!nkf_isxdigit(c2) == EOF){ + if (!nkf_isxdigit(c2)){ (*u)(c2, f); return c1; } c3 = (*g)(f); - if (!nkf_isxdigit(c3) == EOF){ + if (!nkf_isxdigit(c3)){ (*u)(c2, f); (*u)(c3, f); return c1; @@ -2875,6 +3140,91 @@ url_ungetc(c, f) { return (*i_uungetc)(c, f); } + +int +numchar_getc(f) + FILE *f; +{ + int (*g)() = i_ngetc; + int (*u)() = i_nungetc; + int i = 0, j; + int buf[8]; + long c = -1; + + buf[i] = (*g)(f); + if (buf[i] == '&'){ + buf[++i] = (*g)(f); + if (buf[i] == '#'){ + c = 0; + buf[++i] = (*g)(f); + if (buf[i] == 'x' || buf[i] == 'X'){ + for (j = 0; j < 5; j++){ + buf[++i] = (*g)(f); + if (!nkf_isxdigit(buf[i])){ + if (buf[i] != ';'){ + c = -1; + } + break; + } + c <<= 4; + c |= hex2bin(buf[i]); + } + }else{ + for (j = 0; j < 6; j++){ + if (j){ + buf[++i] = (*g)(f); + } + if (!nkf_isdigit(buf[i])){ + if (buf[i] != ';'){ + c = -1; + } + break; + } + c *= 10; + c += hex2bin(buf[i]); + } + } + } + } + if (c != -1){ + int c2, c1, c0; + if (c < 0x80){ + return c; + } + if (0x100 <= c){ + w16w_conv(c, &c2, &c1, &c0); + if (iconv == w_iconv){ + if (c0){ + (*u)(c0, f); + } + (*u)(c1, f); + return c2; + } + if (w2e_conv(c2, c1, c0, &c2, &c1) == 0){ + c2 |= 0x80; + c1 |= 0x80; + if (iconv == s_iconv){ + e2s_conv(c2, c1, &c2, &c1); + } + (*u)(c1, f); + return c2; + } + } + } + while (i > 0){ + (*u)(buf[i], f); + --i; + } + return buf[0]; +} + +int +numchar_ungetc(c, f) + int c; + FILE *f; +{ + return (*i_nungetc)(c, f); +} #endif @@ -3277,13 +3627,12 @@ reinit() option_mode = 0; crmode_f = 0; - e_stat = 0; - s_stat = 0; -#ifdef UTF8_INPUT_ENABLE - w_stat = 0; -#else - w_stat = -1; -#endif + { + struct input_code *p = input_code_list; + while (p->name){ + status_reinit(p++); + } + } #ifdef UTF8_OUTPUT_ENABLE if (w_oconv16_begin_f) { w_oconv16_begin_f = 2; @@ -3368,13 +3717,13 @@ void version() { fprintf(stderr,"Network Kanji Filter Version %s (%s) " -#if defined(MSDOS) && !defined(_Windows) +#if defined(MSDOS) && !defined(__WIN32__) && !defined(__WIN16__) "for DOS" #endif -#if !defined(__WIN32__) && defined(_Windows) +#if defined(MSDOS) && defined(__WIN16__) "for Win16" #endif -#if defined(__WIN32__) && defined(_Windows) +#if defined(MSDOS) && defined(__WIN32__) "for Win32" #endif #ifdef __OS2__ @@ -3397,8 +3746,6 @@ version() ** kono@ie.u-ryukyu.ac.jp (Shinji Kono) ** GHG00637@nifty-serve.or.jp (COW) ** - ** 最終更新日 - ** 2002.9.24 **/ /* end */