** E-Mail: furukawa@tcp-ip.or.jp
** \e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#\e(B
***********************************************************************/
-/* $Id: nkf.c,v 1.123 2007/05/27 20:31:01 naruse Exp $ */
+/* $Id: nkf.c,v 1.132 2007/09/19 11:51:55 naruse Exp $ */
#define NKF_VERSION "2.0.8"
-#define NKF_RELEASE_DATE "2007-05-28"
+#define NKF_RELEASE_DATE "2007-09-12"
#include "config.h"
#include "utf8tbl.h"
#define COPY_RIGHT \
"Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
- "Copyright (C) 2002-2006 Kono, Furukawa, Naruse, mastodon"
+ "Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
/*
#define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
('A'<=c&&c<='F') ? (c-'A'+10) : \
('a'<=c&&c<='f') ? (c-'a'+10) : 0 )
+#define bin2hex(c) ("0123456789ABCDEF"[c&15])
#define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
#define CP932_TABLE_BEGIN 0xFA
* 0: Shift_JIS, eucJP-ascii
* 1: eucJP-ms
* 2: CP932, CP51932
+ * 3: CP10001
*/
-#define UCS_MAP_ASCII 0
-#define UCS_MAP_MS 1
-#define UCS_MAP_CP932 2
+#define UCS_MAP_ASCII 0
+#define UCS_MAP_MS 1
+#define UCS_MAP_CP932 2
+#define UCS_MAP_CP10001 3
static int ms_ucs_map_f = UCS_MAP_ASCII;
#endif
#ifdef UTF8_INPUT_ENABLE
/* X0201 kana conversion table */
/* 90-9F A0-DF */
-static const
-unsigned char cv[]= {
+static const unsigned char cv[]= {
0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
/* X0201 kana conversion table for daguten */
/* 90-9F A0-DF */
-static const
-unsigned char dv[]= {
+static const unsigned char dv[]= {
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
/* X0201 kana conversion table for han-daguten */
/* 90-9F A0-DF */
-static const
-unsigned char ev[]= {
+static const unsigned char ev[]= {
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
/* X0208 kigou conversion table */
/* 0x8140 - 0x819e */
-static const
-unsigned char fv[] = {
+static const unsigned char fv[] = {
0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
#endif
static int crmode_f = 0; /* CR, NL, CRLF */
+static nkf_char prev_cr = 0;
#ifdef EASYWIN /*Easy Win */
static int end_check;
#endif /*Easy Win */
}
#endif
-static const
-struct {
+static const struct {
const char *name;
const char *alias;
} long_option[] = {
#ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_CP932;
#endif
+ }else if(strcmp(codeset, "CP10001") == 0){
+ input_f = SJIS_INPUT;
+#ifdef SHIFTJIS_CP932
+ cp51932_f = TRUE;
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ ms_ucs_map_f = UCS_MAP_CP10001;
+#endif
}else if(strcmp(codeset, "EUCJP") == 0 ||
strcmp(codeset, "EUC-JP") == 0){
input_f = EUC_INPUT;
#ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_CP932;
#endif
+ }else if(strcmp(codeset, "CP10001") == 0){
+ output_conv = s_oconv;
+#ifdef UTF8_OUTPUT_ENABLE
+ ms_ucs_map_f = UCS_MAP_CP10001;
+#endif
}else if(strcmp(codeset, "EUCJP") == 0 ||
strcmp(codeset, "EUC-JP") == 0){
output_conv = e_oconv;
if (x0201_f==NO_X0201) x0201_f=TRUE;
continue;
case 'Z': /* Convert X0208 alphabet to asii */
- /* bit:0 Convert X0208
- bit:1 Convert Kankaku to one space
- bit:2 Convert Kankaku to two spaces
- bit:3 Convert HTML Entity
+ /* alpha_f
+ bit:0 Convert JIS X 0208 Alphabet to ASCII
+ bit:1 Convert Kankaku to one space
+ bit:2 Convert Kankaku to two spaces
+ bit:3 Convert HTML Entity
+ bit:4 Convert JIS X 0208 Katakana to JIS X 0201 Katakana
*/
- if ('9'>= *cp && *cp>='0')
- alpha_f |= 1<<(*cp++ -'0');
- else
- alpha_f |= TRUE;
+ while ('0'<= *cp && *cp <='9') {
+ alpha_f |= 1 << (*cp++ - '0');
+ }
+ if (!alpha_f) alpha_f = 1;
continue;
case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
x0201_f = FALSE; /* No X0201->X0208 conversion */
#define SCORE_INIT (SCORE_iMIME)
-const nkf_char score_table_A0[] = {
+static const char score_table_A0[] = {
0, 0, 0, 0,
0, 0, 0, 0,
0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
};
-const nkf_char score_table_F0[] = {
+static const char score_table_F0[] = {
SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
} else { /* bogus code, skip SSO and one byte */
NEXT;
}
+ } else if (ms_ucs_map_f == UCS_MAP_CP10001 &&
+ (c1 == 0xFD || c1 == 0xFE)) {
+ /* CP10001 */
+ c2 = X0201;
+ c1 &= 0x7f;
+ SEND;
} else {
/* already established */
c2 = c1;
C%7 : 0 1 2 3 4 5 6
NUM : 2 0 3 4 5 X 1
*/
- static const int jphone_emoji_first_table[7] = {2, 0, 3, 4, 5, 0, 1};
+ static const char jphone_emoji_first_table[7] = {2, 0, 3, 4, 5, 0, 1};
c0 = (jphone_emoji_first_table[c1 % 7] << 8) - SPACE + 0xE000 + CLASS_UNICODE;
while ((c1 = (*i_getc)(f)) != EOF) {
if (SPACE <= c1 && c1 <= 'z') {
SEND;
}
}
- if (crmode_f == CR && c1 == NL) crmode_f = CRLF;
- else crmode_f = c1;
+ if (!crmode_f) {
+ if (prev_cr && c1 == NL) crmode_f = CRLF;
+ else crmode_f = c1;
+ }
} else if (c1 == DEL && input_mode == X0208 ) {
/* CP5022x */
c2 = c1;
code_status(c1);
}
while (p->name){
- if (p->score < result->score){
+ if (p->status_func && p->score < result->score){
result = p;
}
++p;
}
- set_iconv(FALSE, result->iconv_func);
+ set_iconv(TRUE, result->iconv_func);
}
#if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
nkf_char val;
#endif
- static const nkf_char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
+ static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
#ifdef SHIFTJIS_CP932
if (!cp932inv_f && is_ibmext_in_sjis(c2)){
-#if 0
- extern const unsigned short shiftjis_cp932[3][189];
-#endif
val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
if (val){
c2 = val >> 8;
}
if (cp932inv_f
&& CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
-#if 0
- extern const unsigned short cp932inv[2][189];
-#endif
nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
if (c){
c2 = c >> 8;
#endif /* SHIFTJIS_CP932 */
#ifdef X0212_ENABLE
if (!x0213_f && is_ibmext_in_sjis(c2)){
-#if 0
- extern const unsigned short shiftjis_x0212[3][189];
-#endif
val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
if (val){
if (val > 0x7FFF){
nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
{
nkf_char ret = 0;
- static const int w_iconv_utf8_1st_byte[] =
+ static const char w_iconv_utf8_1st_byte[] =
{ /* 0xC0 - 0xFF */
20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
{
-#if 0
- extern const unsigned short *const utf8_to_euc_2bytes[];
- extern const unsigned short *const utf8_to_euc_2bytes_ms[];
- extern const unsigned short *const utf8_to_euc_2bytes_932[];
- extern const unsigned short *const *const utf8_to_euc_3bytes[];
- extern const unsigned short *const *const utf8_to_euc_3bytes_ms[];
- extern const unsigned short *const *const utf8_to_euc_3bytes_932[];
-#endif
const unsigned short *const *pp;
const unsigned short *const *const *ppp;
- static const int no_best_fit_chars_table_C2[] =
+ static const char no_best_fit_chars_table_C2[] =
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
- static const int no_best_fit_chars_table_C2_ms[] =
+ static const char no_best_fit_chars_table_C2_ms[] =
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
- static const int no_best_fit_chars_table_932_C2[] =
+ static const char no_best_fit_chars_table_932_C2[] =
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
- static const int no_best_fit_chars_table_932_C3[] =
+ static const char no_best_fit_chars_table_932_C3[] =
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
}
}else if(ms_ucs_map_f == UCS_MAP_MS){
if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
+ }else if(ms_ucs_map_f == UCS_MAP_CP10001){
+ switch(c2){
+ case 0xC2:
+ switch(c1){
+ case 0xA2:
+ case 0xA3:
+ case 0xA5:
+ case 0xA6:
+ case 0xAC:
+ case 0xAF:
+ case 0xB8:
+ return 1;
+ }
+ break;
+ }
}
}
pp =
ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
+ ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
utf8_to_euc_2bytes;
ret = w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
}else if(c0 < 0xF0){
if(c1 == 0x80 || c0 == 0x9C) return 1;
break;
}
+ }else if(ms_ucs_map_f == UCS_MAP_CP10001){
+ switch(c2){
+ case 0xE3:
+ switch(c1){
+ case 0x82:
+ if(c0 == 0x94) return 1;
+ break;
+ case 0x83:
+ if(c0 == 0xBB) return 1;
+ break;
+ }
+ break;
+ }
}else{
switch(c2){
case 0xE2:
ppp =
ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
+ ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
utf8_to_euc_3bytes;
ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
}else return -1;
void nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
{
- const char *hex = "0123456789ABCDEF";
int shift = 20;
c &= VALUE_MASK;
while(shift >= 0){
if(c >= 1<<shift){
while(shift >= 0){
- (*f)(0, hex[(c>>shift)&0xF]);
+ (*f)(0, bin2hex(c>>shift));
shift -= 4;
}
}else{
void encode_fallback_java(nkf_char c)
{
- const char *hex = "0123456789ABCDEF";
(*oconv)(0, '\\');
c &= VALUE_MASK;
if(!is_unicode_bmp(c)){
(*oconv)(0, 'U');
(*oconv)(0, '0');
(*oconv)(0, '0');
- (*oconv)(0, hex[(c>>20)&0xF]);
- (*oconv)(0, hex[(c>>16)&0xF]);
+ (*oconv)(0, bin2hex(c>>20));
+ (*oconv)(0, bin2hex(c>>16));
}else{
(*oconv)(0, 'u');
}
- (*oconv)(0, hex[(c>>12)&0xF]);
- (*oconv)(0, hex[(c>> 8)&0xF]);
- (*oconv)(0, hex[(c>> 4)&0xF]);
- (*oconv)(0, hex[ c &0xF]);
+ (*oconv)(0, bin2hex(c>>12));
+ (*oconv)(0, bin2hex(c>> 8));
+ (*oconv)(0, bin2hex(c>> 4));
+ (*oconv)(0, bin2hex(c ));
return;
}
#ifdef UTF8_OUTPUT_ENABLE
nkf_char e2w_conv(nkf_char c2, nkf_char c1)
{
-#if 0
- extern const unsigned short euc_to_utf8_1byte[];
- extern const unsigned short *const euc_to_utf8_2bytes[];
- extern const unsigned short *const euc_to_utf8_2bytes_ms[];
- extern const unsigned short *const x0212_to_utf8_2bytes[];
-#endif
const unsigned short *p;
if (c2 == X0201) {
+ if (ms_ucs_map_f == UCS_MAP_CP10001) {
+ switch (c1) {
+ case 0x20:
+ return 0xA0;
+ case 0x7D:
+ return 0xA9;
+ }
+ }
p = euc_to_utf8_1byte;
#ifdef X0212_ENABLE
} else if (is_eucg3(c2)){
c2 &= 0x7f;
c2 = (c2&0x7f) - 0x21;
if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
- p = ms_ucs_map_f != UCS_MAP_ASCII ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
+ p =
+ ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
+ ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
+ euc_to_utf8_2bytes_ms[c2];
else
return 0;
}
else if(nkf_isgraph(ndx)){
nkf_char val = 0;
const unsigned short *ptr;
-#if 0
- extern const unsigned short *const x0212_shiftjis[];
-#endif
ptr = x0212_shiftjis[ndx - 0x21];
if (ptr){
val = ptr[(c1 & 0x7f) - 0x21];
#ifdef SHIFTJIS_CP932
if (cp932inv_f
&& CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
-#if 0
- extern const unsigned short cp932inv[2][189];
-#endif
nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
if (c){
c2 = c >> 8;
return c;
}
-static nkf_char prev_cr = 0;
-
void cr_conv(nkf_char c2, nkf_char c1)
{
if (prev_cr) {
/* if (c2) c1 &= 0x7f; assertion */
+ if (c2 == X0201 && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) {
+ (*o_zconv)(c2,c1);
+ return;
+ }
+
if (x0201_f && z_prev2==X0201) { /* X0201 */
if (c1==(0xde&0x7f)) { /* \e$BByE@\e(B */
z_prev2=0;
}
}
- /* JISX0208 Alphabet */
- if (alpha_f && c2 == 0x23 ) {
+ if (alpha_f&1 && c2 == 0x23 ) {
+ /* JISX0208 Alphabet */
c2 = 0;
- } else if (alpha_f && c2 == 0x21 ) {
- /* JISX0208 Kigou */
+ } else if (c2 == 0x21) {
+ /* JISX0208 Kigou */
if (0x21==c1) {
- if (alpha_f&0x2) {
- c1 = ' ';
+ if (alpha_f&2) {
c2 = 0;
- } else if (alpha_f&0x4) {
- (*o_zconv)(0,' ');
- (*o_zconv)(0,' ');
+ c1 = ' ';
+ } else if (alpha_f&4) {
+ (*o_zconv)(0, ' ');
+ (*o_zconv)(0, ' ');
return;
}
- } else if (0x20<c1 && c1<0x7f && fv[c1-0x20]) {
- c1 = fv[c1-0x20];
+ } else if (alpha_f&1 && 0x20<c1 && c1<0x7f && fv[c1-0x20]) {
c2 = 0;
- if (alpha_f&0x8) {
- char *entity = 0;
- switch (c1){
- case '>': entity = ">"; break;
- case '<': entity = "<"; break;
- case '\"': entity = """; break;
- case '&': entity = "&"; break;
- }
- if (entity){
- while (*entity) (*o_zconv)(0, *entity++);
- return;
- }
- }
+ c1 = fv[c1-0x20];
}
}
+
+ if (alpha_f&8 && c2 == 0) {
+ /* HTML Entity */
+ char *entity = 0;
+ switch (c1){
+ case '>': entity = ">"; break;
+ case '<': entity = "<"; break;
+ case '\"': entity = """; break;
+ case '&': entity = "&"; break;
+ }
+ if (entity){
+ while (*entity) (*o_zconv)(0, *entity++);
+ return;
+ }
+ }
+
+ if (alpha_f & 16) {
+ /* JIS X 0208 Katakana to JIS X 0201 Katakana */
+ if (c2 == 0x21) {
+ char c = 0;
+ switch (c1) {
+ case 0x23:
+ /* U+3002 (0x8142) Ideographic Full Stop -> U+FF61 (0xA1) Halfwidth Ideographic Full Stop */
+ c = 0xA1;
+ break;
+ case 0x56:
+ /* U+300C (0x8175) Left Corner Bracket -> U+FF62 (0xA2) Halfwidth Left Corner Bracket */
+ c = 0xA2;
+ break;
+ case 0x57:
+ /* U+300D (0x8176) Right Corner Bracket -> U+FF63 (0xA3) Halfwidth Right Corner Bracket */
+ c = 0xA3;
+ break;
+ case 0x22:
+ /* U+3001 (0x8141) Ideographic Comma -> U+FF64 (0xA4) Halfwidth Ideographic Comma */
+ c = 0xA4;
+ break;
+ case 0x26:
+ /* U+30FB (0x8145) Katakana Middle Dot -> U+FF65 (0xA5) Halfwidth Katakana Middle Dot */
+ c = 0xA5;
+ break;
+ case 0x3C:
+ /* U+30FC (0x815B) Katakana-Hiragana Prolonged Sound Mark -> U+FF70 (0xB0) Halfwidth Katakana-Hiragana Prolonged Sound Mark */
+ c = 0xB0;
+ break;
+ case 0x2B:
+ /* U+309B (0x814A) Katakana-Hiragana Voiced Sound Mark -> U+FF9E (0xDE) Halfwidth Katakana Voiced Sound Mark */
+ c = 0xDE;
+ break;
+ case 0x2C:
+ /* U+309C (0x814B) Katakana-Hiragana Semi-Voiced Sound Mark -> U+FF9F (0xDF) Halfwidth Katakana Semi-Voiced Sound Mark */
+ c = 0xDF;
+ break;
+ }
+ if (c) {
+ (*o_zconv)(X0201, c);
+ return;
+ }
+ } else if (c2 == 0x25) {
+ /* JISX0208 Katakana */
+ static const int fullwidth_to_halfwidth[] =
+ {
+ 0x0000, 0x2700, 0x3100, 0x2800, 0x3200, 0x2900, 0x3300, 0x2A00,
+ 0x3400, 0x2B00, 0x3500, 0x3600, 0x365E, 0x3700, 0x375E, 0x3800,
+ 0x385E, 0x3900, 0x395E, 0x3A00, 0x3A5E, 0x3B00, 0x3B5E, 0x3C00,
+ 0x3C5E, 0x3D00, 0x3D5E, 0x3E00, 0x3E5E, 0x3F00, 0x3F5E, 0x4000,
+ 0x405E, 0x4100, 0x415E, 0x2F00, 0x4200, 0x425E, 0x4300, 0x435E,
+ 0x4400, 0x445E, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900, 0x4A00,
+ 0x4A5E, 0x4A5F, 0x4B00, 0x4B5E, 0x4B5F, 0x4C00, 0x4C5E, 0x4C5F,
+ 0x4D00, 0x4D5E, 0x4D5F, 0x4E00, 0x4E5E, 0x4E5F, 0x4F00, 0x5000,
+ 0x5100, 0x5200, 0x5300, 0x2C00, 0x5400, 0x2D00, 0x5500, 0x2E00,
+ 0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x5B00, 0x0000, 0x5C00,
+ 0x0000, 0x0000, 0x2600, 0x5D00, 0x335E, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+ };
+ if (fullwidth_to_halfwidth[c1-0x20]){
+ c2 = fullwidth_to_halfwidth[c1-0x20];
+ (*o_zconv)(X0201, c2>>8);
+ if (c2 & 0xFF) {
+ (*o_zconv)(X0201, c2&0xFF);
+ }
+ return;
+ }
+ }
+ }
(*o_zconv)(c2,c1);
}
/* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
-const unsigned char *mime_pattern[] = {
+static const unsigned char *mime_pattern[] = {
(const unsigned char *)"\075?EUC-JP?B?",
(const unsigned char *)"\075?SHIFT_JIS?B?",
(const unsigned char *)"\075?ISO-8859-1?Q?",
0,
};
-const nkf_char mime_encode[] = {
+static const nkf_char mime_encode[] = {
JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201,
#if defined(UTF8_INPUT_ENABLE)
UTF8, UTF8,
0
};
-const nkf_char mime_encode_method[] = {
+static const nkf_char mime_encode_method[] = {
'B', 'B','Q', 'B', 'B', 'Q',
#if defined(UTF8_INPUT_ENABLE)
'B', 'Q',
int i=0, j, k=1, lower, upper;
nkf_char buf[9];
const nkf_nfchar *array;
-#if 0
- extern const struct normalization_pair normalization_table[];
-#endif
buf[i] = (*g)(f);
while (k > 0 && ((buf[i] & 0xc0) != 0x80)){
}
}
-nkf_char mime_lastchar2, mime_lastchar1;
+/*nkf_char mime_lastchar2, mime_lastchar1;*/
void mime_prechar(nkf_char c2, nkf_char c1)
{
if (mimeout_mode){
- if (c2){
+ if (c2 == EOF){
+ if (base64_count + mimeout_buf_count/3*4> 73){
+ (*o_base64conv)(EOF,0);
+ (*o_base64conv)(0,NL);
+ (*o_base64conv)(0,SPACE);
+ }
+ } else if (c2){
if (base64_count + mimeout_buf_count/3*4> 66){
(*o_base64conv)(EOF,0);
(*o_base64conv)(0,NL);
(*o_base64conv)(0,SPACE);
}
}*/
- mime_lastchar2 = c2;
- mime_lastchar1 = c1;
+ /*mime_lastchar2 = c2;
+ mime_lastchar1 = c1;*/
}
void mime_putc(nkf_char c)
mimeout_buf_count = 0;
i = 0;
if (mimeout_mode) {
- for (;i<j;i++) {
- if (nkf_isspace(mimeout_buf[i]) && base64_count < 71){
- break;
+ if (!nkf_isblank(mimeout_buf[j-1])) {
+ for (;i<j;i++) {
+ if (nkf_isspace(mimeout_buf[i]) && base64_count < 71){
+ break;
+ }
+ mimeout_addchar(mimeout_buf[i]);
}
- mimeout_addchar(mimeout_buf[i]);
- }
- eof_mime();
- for (;i<j;i++) {
- mimeout_addchar(mimeout_buf[i]);
+ eof_mime();
+ for (;i<j;i++) {
+ mimeout_addchar(mimeout_buf[i]);
+ }
+ } else {
+ for (;i<j;i++) {
+ mimeout_addchar(mimeout_buf[i]);
+ }
+ eof_mime();
}
} else {
for (;i<j;i++) {
fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
- fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII\n");
- fprintf(stderr," 1: Kankaku to 1 space 2: to 2 spaces 3: Convert to HTML Entity\n");
+ fprintf(stderr,"Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n");
+ fprintf(stderr," 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n");
+ fprintf(stderr," 4: JISX0208 Katakana to JISX0201 Katakana\n");
fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
#ifdef MSDOS