** E-Mail: furukawa@tcp-ip.or.jp
** \e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#\e(B
***********************************************************************/
-/* $Id: nkf.c,v 1.118 2006/11/10 09:45:39 naruse Exp $ */
+/* $Id: nkf.c,v 1.119 2006/11/11 20:33:56 naruse Exp $ */
#define NKF_VERSION "2.0.8"
-#define NKF_RELEASE_DATE "2006-11-04"
+#define NKF_RELEASE_DATE "2006-11-12"
#include "config.h"
#include "utf8tbl.h"
#ifdef SHIFTJIS_CP932
/* invert IBM extended characters to others */
-static int cp51932_f = TRUE;
+static int cp51932_f = FALSE;
/* invert NEC-selected IBM extended characters to IBM extended characters */
static int cp932inv_f = TRUE;
x0213_f = TRUE;
#ifdef SHIFTJIS_CP932
cp51932_f = FALSE;
- cp932inv_f = FALSE;
#endif
}else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
strcmp(codeset, "EUC-JIS-2004") == 0){
x0213_f = TRUE;
#ifdef SHIFTJIS_CP932
cp51932_f = FALSE;
- cp932inv_f = FALSE;
#endif
#ifdef UTF8_INPUT_ENABLE
}else if(strcmp(codeset, "UTF-8") == 0 ||
output_conv = j_oconv;
no_cp932ext_f = TRUE;
#ifdef SHIFTJIS_CP932
- cp51932_f = TRUE;
+ cp932inv_f = FALSE;
#endif
#ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_CP932;
output_conv = j_oconv;
x0201_f = TRUE;
#ifdef SHIFTJIS_CP932
- cp51932_f = TRUE;
+ cp932inv_f = FALSE;
#endif
#ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_CP932;
}else if(strcmp(codeset, "CP50221") == 0){
output_conv = j_oconv;
#ifdef SHIFTJIS_CP932
- cp51932_f = TRUE;
+ cp932inv_f = FALSE;
#endif
#ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_CP932;
x0212_f = TRUE;
#endif
#ifdef SHIFTJIS_CP932
- cp51932_f = FALSE;
+ cp932inv_f = FALSE;
#endif
}else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
output_conv = j_oconv;
#endif
x0213_f = TRUE;
#ifdef SHIFTJIS_CP932
- cp51932_f = FALSE;
+ cp932inv_f = FALSE;
#endif
}else if(strcmp(codeset, "SHIFT_JIS") == 0){
output_conv = s_oconv;
strcmp(codeset, "CP932") == 0 ||
strcmp(codeset, "MS932") == 0){
output_conv = s_oconv;
-#ifdef SHIFTJIS_CP932
- cp51932_f = TRUE;
- cp932inv_f = TRUE;
-#endif
#ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_CP932;
#endif
}else if(strcmp(codeset, "CP51932") == 0){
output_conv = e_oconv;
#ifdef SHIFTJIS_CP932
- cp51932_f = TRUE;
+ cp932inv_f = FALSE;
#endif
#ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_CP932;
#ifdef X0212_ENABLE
x0212_f = TRUE;
#endif
-#ifdef SHIFTJIS_CP932
- cp51932_f = FALSE;
-#endif
#ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_MS;
#endif
#ifdef X0212_ENABLE
x0212_f = TRUE;
#endif
-#ifdef SHIFTJIS_CP932
- cp51932_f = FALSE;
-#endif
#ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_ASCII;
#endif
#endif
x0213_f = TRUE;
#ifdef SHIFTJIS_CP932
- cp51932_f = FALSE;
+ cp932inv_f = FALSE;
#endif
#ifdef UTF8_OUTPUT_ENABLE
}else if(strcmp(codeset, "UTF-8") == 0){
continue;
case 'e': /* AT&T EUC output */
output_conv = e_oconv;
+ cp932inv_f = FALSE;
continue;
case 's': /* SJIS output */
output_conv = s_oconv;
code_status(c1);
if (c2) {
/* second byte */
- if (c2 > DEL) {
+ if (c2 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
/* in case of 8th bit is on */
if (!estab_f&&!mime_decode_mode) {
/* in case of not established yet */
NEXT;
} else {
/* in case of already established */
- if (c1 < AT && !(X0208 && 0x80 <= c2 && c2 <= 0x92)) {
+ if (c1 < AT) {
/* ignore bogus code and not CP5022x UCD */
c2 = 0;
NEXT;
SEND;
} else
#endif
- if (c1 > DEL) {
+ if (c1 > ((input_f == JIS_INPUT && ms_ucs_map_f) ? 0x92 : DEL)) {
/* 8 bit code */
if (!estab_f && !iso8859_f) {
/* not established yet */
break;
case X0208:
case X0213_1:
- if (ms_ucs_map_f == UCS_MAP_CP932 &&
+ if (ms_ucs_map_f &&
0x7F <= c2 && c2 <= 0x92 &&
0x21 <= c1 && c1 <= 0x7E) {
/* CP932 UDC */
#endif
static const nkf_char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
#ifdef SHIFTJIS_CP932
- if (cp51932_f && is_ibmext_in_sjis(c2)){
+ if (!cp932inv_f && is_ibmext_in_sjis(c2)){
#if 0
extern const unsigned short shiftjis_cp932[3][189];
#endif
c1 = val & 0xff;
}
}
+ if (cp932inv_f
+ && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
+#if 0
+ extern const unsigned short cp932inv[2][189];
+#endif
+ nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
+ if (c){
+ c2 = c >> 8;
+ c1 = c & 0xff;
+ }
+ }
#endif /* SHIFTJIS_CP932 */
#ifdef X0212_ENABLE
if (!x0213_f && is_ibmext_in_sjis(c2)){
val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
if (val){
if (val > 0x7FFF){
- c2 = PREFIX_EUCG3 | (val >> 8);
+ c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
c1 = val & 0xff;
}else{
c2 = val >> 8;
c1 &= 0x7f;
} else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
/* NOP */
- } else if (ms_ucs_map_f == UCS_MAP_CP932 &&
- 0xF0 <= c2 && c2 <= 0xF9 &&
- 0x40 <= c1 && c1 <= 0xFC) {
+ } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
/* CP932 UDC */
if(c1 == 0x7F) return 0;
c1 = (c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000 + CLASS_UNICODE;
if (c0 == 0){
return -1;
}
- if (ms_ucs_map_f == UCS_MAP_MS && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
+ if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
/* encoding is eucJP-ms, so invert to Unicode Private User Area */
c1 = (c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC + CLASS_UNICODE;
c2 = 0;
} else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
/* NOP */
} else {
- if (ms_ucs_map_f == UCS_MAP_MS && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
+ if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
/* encoding is eucJP-ms, so invert to Unicode Private User Area */
c1 = (c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000 + CLASS_UNICODE;
c2 = 0;
} else {
c1 &= 0x7f;
c2 &= 0x7f;
+#ifdef SHIFTJIS_CP932
+ if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
+ nkf_char s2, s1;
+ if (e2s_conv(c2, c1, &s2, &s1) == 0){
+ s2e_conv(s2, s1, &c2, &c1);
+ if (c2 < 0x100){
+ c1 &= 0x7f;
+ c2 &= 0x7f;
+ }
+ }
+ }
+#endif /* SHIFTJIS_CP932 */
}
}
(*oconv)(c2, c1);
if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
break;
}
- }else if(cp51932_f){
+ }else if(!cp932inv_f){
switch(c2){
case 0xC2:
if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
if(c0 == 0x8D) return 1;
break;
case 0xBD:
- if(c0 == 0x9E && cp51932_f) return 1;
+ if(c0 == 0x9E && !cp932inv_f) return 1;
break;
case 0xBF:
if(0xA0 <= c0 && c0 <= 0xA5) return 1;
ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
}else return -1;
#ifdef SHIFTJIS_CP932
- if (!ret && cp51932_f && is_eucg3(*p2)) {
+ if (!ret && !cp932inv_f && is_eucg3(*p2)) {
nkf_char s2, s1;
if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
s2e_conv(s2, s1, p2, p1);
w16e_conv(c1, &c2, &c1);
if (c2 == 0 && is_unicode_capsule(c1)){
c2 = c1 & VALUE_MASK;
- if (ms_ucs_map_f == UCS_MAP_MS &&
- 0xE000 <= c2 && c2 <= 0xE757) {
+ if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
/* eucJP-ms UDC */
c1 &= 0xFFF;
c2 = c1 / 94;
c2 += c2 < 10 ? 0x75 : 0x8FEB;
c1 = 0x21 + c1 % 94;
+ if (is_eucg3(c2)){
+ (*o_putc)(0x8f);
+ (*o_putc)((c2 & 0x7f) | 0x080);
+ (*o_putc)(c1 | 0x080);
+ }else{
+ (*o_putc)((c2 & 0x7f) | 0x080);
+ (*o_putc)(c1 | 0x080);
+ }
+ return;
} else {
if (encode_fallback) (*encode_fallback)(c1);
return;
} else if (is_eucg3(c2)){
output_mode = JAPANESE_EUC;
#ifdef SHIFTJIS_CP932
- if (cp51932_f){
+ if (!cp932inv_f){
nkf_char s2, s1;
if (e2s_conv(c2, c1, &s2, &s1) == 0){
s2e_conv(s2, s1, &c2, &c1);
{
nkf_char ndx;
if (is_eucg3(c2)){
- ndx = c2 & 0xff;
+ ndx = c2 & 0x7f;
if (x0213_f){
if((0x21 <= ndx && ndx <= 0x2F)){
if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
w16e_conv(c1, &c2, &c1);
if (c2 == 0 && is_unicode_capsule(c1)){
c2 = c1 & VALUE_MASK;
- if (ms_ucs_map_f == UCS_MAP_CP932 &&
- 0xE000 <= c2 && c2 <= 0xE757) {
+ if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
/* CP932 UDC */
c1 &= 0xFFF;
c2 = c1 / 188 + 0xF0;
w16e_conv(c1, &c2, &c1);
if (c2 == 0 && is_unicode_capsule(c1)){
c2 = c1 & VALUE_MASK;
- if (ms_ucs_map_f == UCS_MAP_CP932 &&
- 0xE000 <= c2 && c2 <= 0xE757) {
+ if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
/* CP5022x UDC */
c1 &= 0xFFF;
c2 = 0x7F + c1 / 94;
}
(*o_putc)(c1);
} else {
- if(ms_ucs_map_f == UCS_MAP_CP932
+ if(ms_ucs_map_f
? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1
: c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1) return;
if(x0213_f){