* \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
* http://sourceforge.jp/projects/nkf/
***********************************************************************/
-#define NKF_IDENT "$Id: nkf.c,v 1.184 2008/10/22 18:27:57 naruse Exp $"
+#define NKF_IDENT "$Id: nkf.c,v 1.190 2008/11/09 20:34:04 naruse Exp $"
#define NKF_VERSION "2.0.8"
-#define NKF_RELEASE_DATE "2008-10-23"
+#define NKF_RELEASE_DATE "2008-11-10"
#define COPY_RIGHT \
"Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
"Copyright (C) 2002-2008 Kono, Furukawa, Naruse, mastodon"
#include <windows.h>
#include <locale.h>
#endif
+#if defined(__OS2__)
+# define INCL_DOS
+# define INCL_DOSERRORS
+# include <os2.h>
+#endif
+#include <assert.h>
/* state of output_mode and input_mode
#define DEFAULT_ENCIDX ISO_2022_JP
#elif defined(DEFAULT_CODE_SJIS)
#define DEFAULT_ENCIDX SHIFT_JIS
+#elif defined(DEFAULT_CODE_WINDOWS_31J)
+#define DEFAULT_ENCIDX WINDOWS_31J
#elif defined(DEFAULT_CODE_EUC)
#define DEFAULT_ENCIDX EUC_JP
#elif defined(DEFAULT_CODE_UTF8)
#define nkf_char_euc3_new(c) ((c) | PREFIX_EUCG3)
#define nkf_char_unicode_new(c) ((c) | CLASS_UNICODE)
#define nkf_char_unicode_p(c) ((c & CLASS_MASK) == CLASS_UNICODE)
-#define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= NKF_INT32_C(UNICODE_BMP_MAX))
-#define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= NKF_INT32_C(UNICODE_MAX))
+#define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= UNICODE_BMP_MAX)
+#define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= UNICODE_MAX)
#ifdef NUMCHAR_OPTION
static int numchar_f = FALSE;
return str;
}
else return NULL;
+#elif defined(__OS2__)
+# if defined(INT_IS_SHORT)
+ /* OS/2 1.x */
+ return NULL;
+# else
+ /* OS/2 32bit */
+ static char buf[16];
+ ULONG ulCP[1], ulncp;
+ DosQueryCp(sizeof(ulCP), ulCP, &ulncp);
+ if (ulCP[0] == 932 || ulCP[0] == 943)
+ strcpy(buf, "Shift_JIS");
+ else
+ sprintf(buf, "CP%lu", ulCP[0]);
+ return buf;
+# endif
#else
return NULL;
#endif
char *encname = nkf_locale_charmap();
if (encname)
enc = nkf_enc_find(encname);
- if (enc < 0) enc = 0;
return enc;
}
#endif /* DEFAULT_CODE_LOCALE */
static nkf_encoding*
+nkf_utf8_encoding()
+{
+ return &nkf_encoding_table[UTF_8];
+}
+
+static nkf_encoding*
nkf_default_encoding()
{
nkf_encoding *enc = 0;
#ifdef DEFAULT_CODE_LOCALE
enc = nkf_locale_encoding();
-#elif DEFAULT_ENCIDX
+#elif defined(DEFAULT_ENCIDX)
enc = nkf_enc_from_index(DEFAULT_ENCIDX);
#endif
+ if (!enc) enc = nkf_utf8_encoding();
return enc;
}
" Default output encoding: "
#ifdef DEFAULT_CODE_LOCALE
"LOCALE (%s)\n", nkf_enc_name(nkf_default_encoding())
-#elif DEFAULT_ENCIDX
+#elif defined(DEFAULT_ENCIDX)
"CONFIG (%s)\n", nkf_enc_name(nkf_default_encoding())
#else
"NONE\n"
#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
static void
-nkf_unicode_to_utf8(nkf_char val, int *p1, int *p2, int *p3, int *p4)
+nkf_unicode_to_utf8(nkf_char val, nkf_char *p1, nkf_char *p2, nkf_char *p3, nkf_char *p4)
{
val &= VALUE_MASK;
if (val < 0x80){
}
static nkf_char
-nkf_utf8_to_unicode(int c1, int c2, int c3, int c4)
+nkf_utf8_to_unicode(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
{
nkf_char wc;
if (c1 <= 0x7F) {
static nkf_char
w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
{
- int c1, c2, c3, c4;
+ nkf_char c1, c2, c3, c4;
nkf_char ret = 0;
val &= VALUE_MASK;
if (val < 0x80) {
#define NKF_ICONV_NEED_TWO_MORE_BYTES -2
#define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00))
static size_t
-nkf_iconv_utf_16(int c1, int c2, int c3, int c4)
+nkf_iconv_utf_16(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
{
nkf_char wc;
}
static size_t
-nkf_iconv_utf_32(int c1, int c2, int c3, int c4)
+nkf_iconv_utf_32(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
{
nkf_char wc;
static void
w_oconv(nkf_char c2, nkf_char c1)
{
- int c3, c4;
+ nkf_char c3, c4;
nkf_char val;
if (output_bom_f) {
nkf_char (*g)(FILE *) = i_ngetc;
nkf_char (*u)(nkf_char c ,FILE *f) = i_nungetc;
int i = 0, j;
- nkf_char buf[8];
+ nkf_char buf[12];
long c = -1;
buf[i] = (*g)(f);
#ifdef UNICODE_NORMALIZATION
+#define nkf_error(mes) nkf_error0(__FILE__, __LINE__, mes);
+
+static void
+nkf_error0(const char *file, int line, const char *mes)
+{
+ fprintf(stderr, "%s:%d: %s\n", file, line, mes);
+}
+
+static void *
+nkf_malloc(size_t n)
+{
+ void *ptr = malloc(n);
+ if (ptr == NULL) {
+ nkf_error("can't allocate memory");
+ }
+ return ptr;
+}
+
+#define nkf_free(ptr) free(ptr)
+
+typedef struct {
+ unsigned char *ary;
+ int max_length;
+ int count;
+} nkf_ary;
+
+static nkf_ary *
+nkf_ary_new(int length)
+{
+ nkf_ary *ary = nkf_malloc(sizeof(nkf_ary));
+ ary->ary = nkf_malloc(length);
+ ary->max_length = length;
+ ary->count = 0;
+ return ary;
+}
+
+static void
+nkf_ary_dispose(nkf_ary *ary)
+{
+ nkf_free(ary->ary);
+ nkf_free(ary);
+}
+
+#define nkf_ary_length(ary) ((ary)->count)
+#define nkf_ary_empty_p(ary) ((ary)->count == 0)
+
+static unsigned char
+nkf_ary_at(nkf_ary *ary, int index)
+{
+ assert(index <= ary->count);
+ return ary->ary[index];
+}
+
+static void
+nkf_ary_clear(nkf_ary *ary)
+{
+ ary->count = 0;
+}
+
+static unsigned char
+nkf_ary_push(nkf_ary *ary, nkf_char c)
+{
+ assert(ary->max_length > ary->count);
+ ary->ary[ary->count++] = c;
+ return ary->count;
+}
+
+static unsigned char
+nkf_ary_pop(nkf_ary *ary)
+{
+ assert(0 < ary->count);
+ return ary->ary[--ary->count];
+}
+
/* Normalization Form C */
static nkf_char
nfc_getc(FILE *f)
{
nkf_char (*g)(FILE *f) = i_nfc_getc;
nkf_char (*u)(nkf_char c ,FILE *f) = i_nfc_ungetc;
- int i=0, j, k=1, lower, upper;
- nkf_char buf[9];
+ nkf_ary *buf = nkf_ary_new(9);
const unsigned char *array;
+ int lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
+ nkf_char c = (*g)(f);
- buf[i] = (*g)(f);
- while (k > 0 && ((buf[i] & 0xc0) != 0x80)){
- lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
- while (upper >= lower) {
- j = (lower+upper) / 2;
- array = normalization_table[j].nfd;
- for (k=0; k < NORMALIZATION_TABLE_NFD_LENGTH && array[k]; k++){
- if (array[k] != buf[k]){
- array[k] < buf[k] ? (lower = j + 1) : (upper = j - 1);
- k = 0;
+ if (c == EOF || c > 0xFF || (c & 0xc0) == 0x80) return c;
+
+ nkf_ary_push(buf, (unsigned char)c);
+ do {
+ while (lower <= upper) {
+ int mid = (lower+upper) / 2;
+ int len;
+ array = normalization_table[mid].nfd;
+ for (len=0; len < NORMALIZATION_TABLE_NFD_LENGTH && array[len]; len++) {
+ if (len >= nkf_ary_length(buf)) {
+ c = (*g)(f);
+ if (c == EOF) {
+ len = 0;
+ lower = 1, upper = 0;
+ break;
+ }
+ nkf_ary_push(buf, c);
+ }
+ if (array[len] != nkf_ary_at(buf, len)) {
+ if (array[len] < nkf_ary_at(buf, len)) lower = mid + 1;
+ else upper = mid - 1;
+ len = 0;
break;
- } else if (k >= i)
- buf[++i] = (*g)(f);
+ }
}
- if (k > 0){
- array = normalization_table[j].nfc;
+ if (len > 0) {
+ int i;
+ array = normalization_table[mid].nfc;
+ nkf_ary_clear(buf);
for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++)
- buf[i] = (nkf_char)(array[i]);
- i--;
+ nkf_ary_push(buf, array[i]);
break;
}
}
- while (i > 0)
- (*u)(buf[i--], f);
- }
- return buf[0];
+ } while (lower <= upper);
+
+ while (nkf_ary_length(buf) > 1) (*u)(nkf_ary_pop(buf), f);
+ c = nkf_ary_pop(buf);
+ nkf_ary_dispose(buf);
+
+ return c;
}
static nkf_char
SEND;
}
}
+ else if (nkf_char_unicode_p(c1)) {
+ (*oconv)(0, c1);
+ NEXT;
+ }
else {
/* first byte */
if (input_mode == JIS_X_0208 && DEL <= c1 && c1 < 0x92) {
C%7 : 0 1 2 3 4 5 6
NUM : 2 0 3 4 5 X 1
*/
- static const int jphone_emoji_first_table[7] =
+ static const nkf_char jphone_emoji_first_table[7] =
{0xE1E0, 0xDFE0, 0xE2E0, 0xE3E0, 0xE4E0, 0xDFE0, 0xE0E0};
c3 = nkf_char_unicode_new(jphone_emoji_first_table[c1 % 7]);
if ((c1 = (*i_getc)(f)) == EOF) LAST;
}
if (strcmp(long_option[i].name, "oc=") == 0){
enc = nkf_enc_find((char *)p);
- if (enc <= 0) continue;
+ /* if (enc <= 0) continue; */
+ if (!enc) continue;
output_encoding = enc;
continue;
}