** E-Mail: furukawa@tcp-ip.or.jp
** \e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#\e(B
***********************************************************************/
-/* $Id: nkf.c,v 1.74 2005/07/18 16:24:35 naruse Exp $ */
+/* $Id: nkf.c,v 1.82 2005/11/07 22:38:08 naruse Exp $ */
#define NKF_VERSION "2.0.5"
-#define NKF_RELEASE_DATE "2005-07-19"
+#define NKF_RELEASE_DATE "2005-11-07"
#include "config.h"
-static char *CopyRight =
- "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2005 Kono, Furukawa, Naruse";
+#define COPY_RIGHT \
+ "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2005 Kono, Furukawa, Naruse"
/*
#ifdef PERL_XS
#undef OVERWRITE
#endif
-#if defined( UTF8_OUTPUT_ENABLE ) || defined( UTF8_INPUT_ENABLE )
-#define UNICODE_ENABLE
-#else
-#undef UNICODE_NORMALIZATION
-#endif
#ifndef PERL_XS
#include <stdio.h>
#endif
#include <stdlib.h>
+#include <string.h>
#if defined(MSDOS) || defined(__OS2__)
#include <fcntl.h>
#ifdef OVERWRITE
/* added by satoru@isoternet.org */
-#include <string.h>
#include <sys/stat.h>
#ifndef MSDOS /* UNIX, OS/2 */
#include <unistd.h>
#define is_alnum(c) \
(('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
+/* I don't trust portablity of toupper */
+#define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
+#define nkf_isoctal(c) ('0'<=c && c<='7')
+#define nkf_isdigit(c) ('0'<=c && c<='9')
+#define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
+#define nkf_isblank(c) (c == SPACE || c == TAB)
+#define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == NL)
+#define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
+#define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
+#define hex2bin(x) ( nkf_isdigit(x) ? x - '0' : nkf_toupper(x) - 'A' + 10)
+
#define HOLD_SIZE 1024
#define IOBUF_SIZE 16384
#define GETA2 0x2e
-#ifdef UNICODE_ENABLE
+#if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
#define sizeof_euc_utf8 94
#define sizeof_euc_to_utf8_1byte 94
#define sizeof_euc_to_utf8_2bytes 94
/* MIME preprocessor */
-
#ifdef EASYWIN /*Easy Win */
extern POINT _BufferSize;
#endif
#ifdef ANSI_C_PROTOTYPE
#define PROTO(x) x
#define STATIC static
+#define CONST const
#else
#define PROTO(x) ()
#define STATIC
+#define CONST
#endif
struct input_code{
STATIC char *input_codename = "";
+#ifndef PERL_XS
+STATIC const char *CopyRight = COPY_RIGHT;
+#endif
+#if !defined(PERL_XS) && !defined(WIN32DLL)
STATIC int noconvert PROTO((FILE *f));
+#endif
STATIC int kanji_convert PROTO((FILE *f));
STATIC int h_conv PROTO((FILE *f,int c2,int c1));
STATIC int push_hold_buf PROTO((int c2));
STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
STATIC int e_iconv PROTO((int c2,int c1,int c0));
#ifdef UTF8_INPUT_ENABLE
+STATIC void encode_fallback_html PROTO((int c));
+STATIC void encode_fallback_xml PROTO((int c));
+STATIC void encode_fallback_java PROTO((int c));
+STATIC void encode_fallback_perl PROTO((int c));
+STATIC void encode_fallback_subchar PROTO((int c));
+STATIC void (*encode_fallback)PROTO((int c)) = NULL;
STATIC int w2e_conv PROTO((int c2,int c1,int c0,int *p2,int *p1));
STATIC int w_iconv PROTO((int c2,int c1,int c0));
STATIC int w_iconv16 PROTO((int c2,int c1,int c0));
-STATIC int w_iconv_common PROTO((int c1,int c0,unsigned short **pp,int psize,int *p2,int *p1));
+STATIC int w_iconv_common PROTO((int c1,int c0,const unsigned short *const *pp,int psize,int *p2,int *p1));
STATIC int ww16_conv PROTO((int c2, int c1, int c0));
+STATIC int w16e_conv PROTO((unsigned short val,int *p2,int *p1));
#endif
#ifdef UTF8_OUTPUT_ENABLE
STATIC int e2w_conv PROTO((int c2,int c1));
STATIC int mime_begin_strict PROTO((FILE *f));
STATIC int mime_getc_buf PROTO((FILE *f));
STATIC int mime_ungetc_buf PROTO((int c,FILE *f));
-STATIC int mime_integrity PROTO((FILE *f,unsigned char *p));
+STATIC int mime_integrity PROTO((FILE *f,const unsigned char *p));
STATIC int base64decode PROTO((int c));
STATIC void mime_prechar PROTO((int c2, int c1));
STATIC void mime_putc PROTO((int c));
STATIC void open_mime PROTO((int c));
STATIC void close_mime PROTO(());
+#ifndef PERL_XS
STATIC void usage PROTO(());
STATIC void version PROTO(());
+#endif
STATIC void options PROTO((unsigned char *c));
#if defined(PERL_XS) || defined(WIN32DLL)
STATIC void reinit PROTO(());
/* buffers */
-static unsigned char stdibuf[IOBUF_SIZE];
-static unsigned char stdobuf[IOBUF_SIZE];
-static unsigned char hold_buf[HOLD_SIZE*2];
-static int hold_count;
+#if !defined(PERL_XS) && !defined(WIN32DLL)
+STATIC unsigned char stdibuf[IOBUF_SIZE];
+STATIC unsigned char stdobuf[IOBUF_SIZE];
+#endif
+STATIC unsigned char hold_buf[HOLD_SIZE*2];
+STATIC int hold_count;
/* MIME preprocessor fifo */
#define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
#define MIME_BUF_MASK (MIME_BUF_SIZE-1)
#define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
-static unsigned char mime_buf[MIME_BUF_SIZE];
-static unsigned int mime_top = 0;
-static unsigned int mime_last = 0; /* decoded */
-static unsigned int mime_input = 0; /* undecoded */
-static int (*mime_iconv_back)PROTO((int c2,int c1,int c0)) = NULL;
+STATIC unsigned char mime_buf[MIME_BUF_SIZE];
+STATIC unsigned int mime_top = 0;
+STATIC unsigned int mime_last = 0; /* decoded */
+STATIC unsigned int mime_input = 0; /* undecoded */
+STATIC int (*mime_iconv_back)PROTO((int c2,int c1,int c0)) = NULL;
/* flags */
-static int unbuf_f = FALSE;
-static int estab_f = FALSE;
-static int nop_f = FALSE;
-static int binmode_f = TRUE; /* binary mode */
-static int rot_f = FALSE; /* rot14/43 mode */
-static int hira_f = FALSE; /* hira/kata henkan */
-static int input_f = FALSE; /* non fixed input code */
-static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
-static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
-static int mime_decode_f = FALSE; /* mime decode is explicitly on */
-static int mimebuf_f = FALSE; /* MIME buffered input */
-static int broken_f = FALSE; /* convert ESC-less broken JIS */
-static int iso8859_f = FALSE; /* ISO8859 through */
-static int mimeout_f = FALSE; /* base64 mode */
+STATIC int unbuf_f = FALSE;
+STATIC int estab_f = FALSE;
+STATIC int nop_f = FALSE;
+STATIC int binmode_f = TRUE; /* binary mode */
+STATIC int rot_f = FALSE; /* rot14/43 mode */
+STATIC int hira_f = FALSE; /* hira/kata henkan */
+STATIC int input_f = FALSE; /* non fixed input code */
+STATIC int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
+STATIC int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
+STATIC int mime_decode_f = FALSE; /* mime decode is explicitly on */
+STATIC int mimebuf_f = FALSE; /* MIME buffered input */
+STATIC int broken_f = FALSE; /* convert ESC-less broken JIS */
+STATIC int iso8859_f = FALSE; /* ISO8859 through */
+STATIC int mimeout_f = FALSE; /* base64 mode */
#if defined(MSDOS) || defined(__OS2__)
-static int x0201_f = TRUE; /* Assume JISX0201 kana */
+STATIC int x0201_f = TRUE; /* Assume JISX0201 kana */
#else
-static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
+STATIC int x0201_f = NO_X0201; /* Assume NO JISX0201 */
#endif
-static int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
-#ifdef UNICODE_ENABLE
-static int internal_unicode_f = FALSE; /* Internal Unicode Processing */
+STATIC int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
+#if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
+STATIC int internal_unicode_f = FALSE; /* Internal Unicode Processing */
#endif
#ifdef UTF8_OUTPUT_ENABLE
-static int unicode_bom_f= 0; /* Output Unicode BOM */
-static int w_oconv16_LE = 0; /* utf-16 little endian */
-static int ms_ucs_map_f = FALSE; /* Microsoft UCS Mapping Compatible */
+STATIC int unicode_bom_f= 0; /* Output Unicode BOM */
+STATIC int w_oconv16_LE = 0; /* utf-16 little endian */
+STATIC int ms_ucs_map_f = FALSE; /* Microsoft UCS Mapping Compatible */
+STATIC int unicode_subchar = '?'; /* the regular substitution character */
#endif
#ifdef UNICODE_NORMALIZATION
-static int nfc_f = FALSE;
-static int (*i_nfc_getc)PROTO((FILE *)) = std_getc; /* input of ugetc */
-static int (*i_nfc_ungetc)PROTO((int c ,FILE *f)) = std_ungetc;
+STATIC int nfc_f = FALSE;
+STATIC int (*i_nfc_getc)PROTO((FILE *)) = std_getc; /* input of ugetc */
+STATIC int (*i_nfc_ungetc)PROTO((int c ,FILE *f)) = std_ungetc;
STATIC int nfc_getc PROTO((FILE *f));
STATIC int nfc_ungetc PROTO((int c,FILE *f));
#endif
#ifdef INPUT_OPTION
-static int cap_f = FALSE;
-static int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */
-static int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc;
+STATIC int cap_f = FALSE;
+STATIC int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */
+STATIC int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc;
STATIC int cap_getc PROTO((FILE *f));
STATIC int cap_ungetc PROTO((int c,FILE *f));
-static int url_f = FALSE;
-static int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
-static int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc;
+STATIC int url_f = FALSE;
+STATIC int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
+STATIC int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc;
STATIC int url_getc PROTO((FILE *f));
STATIC int url_ungetc PROTO((int c,FILE *f));
#endif
#ifdef NUMCHAR_OPTION
#define CLASS_MASK 0x0f000000
#define CLASS_UTF16 0x01000000
-static int numchar_f = FALSE;
-static int (*i_ngetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
-static int (*i_nungetc)PROTO((int c ,FILE *f)) = std_ungetc;
+STATIC int numchar_f = FALSE;
+STATIC int (*i_ngetc)PROTO((FILE *)) = std_getc; /* input of ugetc */
+STATIC int (*i_nungetc)PROTO((int c ,FILE *f)) = std_ungetc;
STATIC int numchar_getc PROTO((FILE *f));
STATIC int numchar_ungetc PROTO((int c,FILE *f));
#endif
#ifdef CHECK_OPTION
-static int noout_f = FALSE;
+STATIC int noout_f = FALSE;
STATIC void no_putc PROTO((int c));
-static int debug_f = FALSE;
-STATIC void debug PROTO((char *str));
-static int (*iconv_for_check)() = 0;
+STATIC int debug_f = FALSE;
+STATIC void debug PROTO((const char *str));
+STATIC int (*iconv_for_check)() = 0;
#endif
-static int guess_f = FALSE;
+STATIC int guess_f = FALSE;
+#if !defined PERL_XS
STATIC void print_guessed_code PROTO((char *filename));
+#endif
STATIC void set_input_codename PROTO((char *codename));
-static int is_inputcode_mixed = FALSE;
-static int is_inputcode_set = FALSE;
+STATIC int is_inputcode_mixed = FALSE;
+STATIC int is_inputcode_set = FALSE;
#ifdef EXEC_IO
-static int exec_f = 0;
+STATIC int exec_f = 0;
#endif
#ifdef SHIFTJIS_CP932
-STATIC int cp932_f = TRUE;
+/* invert IBM extended characters to others and controls some UCS mapping */
+STATIC int cp51932_f = TRUE;
#define CP932_TABLE_BEGIN (0xfa)
#define CP932_TABLE_END (0xfc)
+/* invert NEC-selected IBM extended characters to IBM extended characters */
STATIC int cp932inv_f = TRUE;
#define CP932INV_TABLE_BEGIN (0xed)
#define CP932INV_TABLE_END (0xee)
#ifdef X0212_ENABLE
STATIC int x0212_f = FALSE;
-static int x0212_shift PROTO((int c));
-static int x0212_unshift PROTO((int c));
+STATIC int x0212_shift PROTO((int c));
+STATIC int x0212_unshift PROTO((int c));
#endif
STATIC unsigned char prefix_table[256];
#ifdef UTF8_INPUT_ENABLE
STATIC void w_status PROTO((struct input_code *, int));
STATIC void w16_status PROTO((struct input_code *, int));
-static int utf16_mode = UTF16BE_INPUT;
+STATIC int utf16_mode = UTF16BE_INPUT;
#endif
struct input_code input_code_list[] = {
{"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
{"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
+#ifdef UTF8_INPUT_ENABLE
{"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
{"UTF-16", 0, 0, 0, {0, 0, 0}, w16_status, w_iconv16, 0},
+#endif
{0}
};
-static int mimeout_mode = 0;
-static int base64_count = 0;
+STATIC int mimeout_mode = 0;
+STATIC int base64_count = 0;
/* X0208 -> ASCII converter */
/* fold parameter */
-static int f_line = 0; /* chars in line */
-static int f_prev = 0;
-static int fold_preserve_f = FALSE; /* preserve new lines */
-static int fold_f = FALSE;
-static int fold_len = 0;
+STATIC int f_line = 0; /* chars in line */
+STATIC int f_prev = 0;
+STATIC int fold_preserve_f = FALSE; /* preserve new lines */
+STATIC int fold_f = FALSE;
+STATIC int fold_len = 0;
/* options */
-static unsigned char kanji_intro = DEFAULT_J;
-static unsigned char ascii_intro = DEFAULT_R;
+STATIC unsigned char kanji_intro = DEFAULT_J;
+STATIC unsigned char ascii_intro = DEFAULT_R;
/* Folding */
#define FOLD_MARGIN 10
#define DEFAULT_FOLD 60
-static int fold_margin = FOLD_MARGIN;
+STATIC int fold_margin = FOLD_MARGIN;
/* converters */
#endif
/* process default */
-static void (*output_conv)PROTO((int c2,int c1)) = DEFAULT_CONV;
+STATIC void (*output_conv)PROTO((int c2,int c1)) = DEFAULT_CONV;
-static void (*oconv)PROTO((int c2,int c1)) = no_connection;
+STATIC void (*oconv)PROTO((int c2,int c1)) = no_connection;
/* s_iconv or oconv */
-static int (*iconv)PROTO((int c2,int c1,int c0)) = no_connection2;
+STATIC int (*iconv)PROTO((int c2,int c1,int c0)) = no_connection2;
-static void (*o_zconv)PROTO((int c2,int c1)) = no_connection;
-static void (*o_fconv)PROTO((int c2,int c1)) = no_connection;
-static void (*o_crconv)PROTO((int c2,int c1)) = no_connection;
-static void (*o_rot_conv)PROTO((int c2,int c1)) = no_connection;
-static void (*o_hira_conv)PROTO((int c2,int c1)) = no_connection;
-static void (*o_base64conv)PROTO((int c2,int c1)) = no_connection;
-static void (*o_iso2022jp_check_conv)PROTO((int c2,int c1)) = no_connection;
+STATIC void (*o_zconv)PROTO((int c2,int c1)) = no_connection;
+STATIC void (*o_fconv)PROTO((int c2,int c1)) = no_connection;
+STATIC void (*o_crconv)PROTO((int c2,int c1)) = no_connection;
+STATIC void (*o_rot_conv)PROTO((int c2,int c1)) = no_connection;
+STATIC void (*o_hira_conv)PROTO((int c2,int c1)) = no_connection;
+STATIC void (*o_base64conv)PROTO((int c2,int c1)) = no_connection;
+STATIC void (*o_iso2022jp_check_conv)PROTO((int c2,int c1)) = no_connection;
-/* static redirections */
+/* STATIC redirections */
-static void (*o_putc)PROTO((int c)) = std_putc;
+STATIC void (*o_putc)PROTO((int c)) = std_putc;
-static int (*i_getc)PROTO((FILE *f)) = std_getc; /* general input */
-static int (*i_ungetc)PROTO((int c,FILE *f)) =std_ungetc;
+STATIC int (*i_getc)PROTO((FILE *f)) = std_getc; /* general input */
+STATIC int (*i_ungetc)PROTO((int c,FILE *f)) =std_ungetc;
-static int (*i_bgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
-static int (*i_bungetc)PROTO((int c ,FILE *f)) = std_ungetc;
+STATIC int (*i_bgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
+STATIC int (*i_bungetc)PROTO((int c ,FILE *f)) = std_ungetc;
-static void (*o_mputc)PROTO((int c)) = std_putc ; /* output of mputc */
+STATIC void (*o_mputc)PROTO((int c)) = std_putc ; /* output of mputc */
-static int (*i_mgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
-static int (*i_mungetc)PROTO((int c ,FILE *f)) = std_ungetc;
+STATIC int (*i_mgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */
+STATIC int (*i_mungetc)PROTO((int c ,FILE *f)) = std_ungetc;
/* for strict mime */
-static int (*i_mgetc_buf)PROTO((FILE *)) = std_getc; /* input of mgetc_buf */
-static int (*i_mungetc_buf)PROTO((int c,FILE *f)) = std_ungetc;
+STATIC int (*i_mgetc_buf)PROTO((FILE *)) = std_getc; /* input of mgetc_buf */
+STATIC int (*i_mungetc_buf)PROTO((int c,FILE *f)) = std_ungetc;
/* Global states */
-static int output_mode = ASCII, /* output kanji mode */
+STATIC int output_mode = ASCII, /* output kanji mode */
input_mode = ASCII, /* input kanji mode */
shift_mode = FALSE; /* TRUE shift out, or X0201 */
-static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
+STATIC int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
/* X0201 / X0208 conversion tables */
/* X0201 kana conversion table */
/* 90-9F A0-DF */
-static
+STATIC const
unsigned char cv[]= {
0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
/* X0201 kana conversion table for daguten */
/* 90-9F A0-DF */
-static
+STATIC const
unsigned char dv[]= {
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
/* X0201 kana conversion table for han-daguten */
/* 90-9F A0-DF */
-static
+STATIC const
unsigned char ev[]= {
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
/* X0208 kigou conversion table */
/* 0x8140 - 0x819e */
-static
+STATIC const
unsigned char fv[] = {
0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
#define CRLF 1
-static int file_out = FALSE;
+STATIC int file_out = FALSE;
#ifdef OVERWRITE
-static int overwrite = FALSE;
+STATIC int overwrite = FALSE;
#endif
-static int crmode_f = 0; /* CR, NL, CRLF */
+STATIC int crmode_f = 0; /* CR, NL, CRLF */
#ifdef EASYWIN /*Easy Win */
-static int end_check;
+STATIC int end_check;
#endif /*Easy Win */
#define STD_GC_BUFSIZE (256)
}
#endif /* WIN32DLL */
-static
+STATIC const
struct {
- char *name;
- char *alias;
+ const char *name;
+ const char *alias;
} long_option[] = {
+ {"ic=", ""},
+ {"oc=", ""},
{"base64","jMB"},
{"euc","e"},
{"euc-input","E"},
#ifdef X0212_ENABLE
{"x0212", ""},
#endif
-#ifdef UNICODE_ENABLE
+#if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
{"internal-unicode", ""},
#endif
#ifdef UTF8_OUTPUT_ENABLE
{"utf8", "w"},
{"utf16", "w16"},
{"ms-ucs-map", ""},
+ {"fb-skip", ""},
+ {"fb-html", ""},
+ {"fb-xml", ""},
+ {"fb-perl", ""},
+ {"fb-java", ""},
+ {"fb-subchar", ""},
+ {"fb-subchar=", ""},
#endif
#ifdef UTF8_INPUT_ENABLE
{"utf8-input", "W"},
{"prefix=", ""},
};
-static int option_mode = 0;
+STATIC int option_mode = 0;
void
options(cp)
unsigned char *cp;
{
- int i;
+ int i, j;
unsigned char *p = NULL;
+ unsigned char *cp_back = NULL;
+ unsigned char codeset[32];
if (option_mode==1)
return;
while(*cp && *cp++!='-');
- while (*cp) {
+ while (*cp || cp_back) {
+ if(!*cp){
+ cp = cp_back;
+ cp_back = NULL;
+ continue;
+ }
p = 0;
switch (*cp++) {
case '-': /* literal options */
- if (!*cp) { /* ignore the rest of arguments */
+ if (!*cp || *cp == SPACE) { /* ignore the rest of arguments */
option_mode = 1;
return;
}
for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
- int j;
p = (unsigned char *)long_option[i].name;
for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
if (*p == cp[j] || cp[j] == ' '){
- p = &cp[j];
+ p = &cp[j] + 1;
break;
}
p = 0;
}
if (p == 0) return;
- cp = (unsigned char *)long_option[i].alias;
- if (!*cp){
- cp = p;
+ while(*cp && *cp != SPACE && cp++);
+ if (long_option[i].alias[0]){
+ cp_back = cp;
+ cp = (unsigned char *)long_option[i].alias;
+ }else{
+ if (strcmp(long_option[i].name, "ic=") == 0){
+ for (i=0; i < 16 && SPACE < p[i] && p[i] < DEL; i++){
+ codeset[i] = nkf_toupper(p[i]);
+ }
+ codeset[i] = 0;
+ if(strcmp(codeset, "ISO-2022-JP") == 0){
+ input_f = JIS_INPUT;
+ }else if(strcmp(codeset, "SHIFT_JIS") == 0){
+ input_f = SJIS_INPUT;
+ if (x0201_f==NO_X0201) x0201_f=TRUE;
+ }else if(strcmp(codeset, "CP932") == 0){
+ input_f = SJIS_INPUT;
+ x0201_f = FALSE;
+#ifdef SHIFTJIS_CP932
+ cp51932_f = TRUE;
+ cp932inv_f = TRUE;
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ ms_ucs_map_f = TRUE;
+#endif
+ }else if(strcmp(codeset, "EUCJP") == 0 ||
+ strcmp(codeset, "EUC-JP") == 0 ||
+ strcmp(codeset, "CP51932") == 0){
+ input_f = JIS_INPUT;
+ x0201_f = FALSE;
+#ifdef SHIFTJIS_CP932
+ cp51932_f = TRUE;
+ cp932inv_f = TRUE;
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ ms_ucs_map_f = TRUE;
+#endif
+ }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
+ strcmp(codeset, "EUCJP-MS") == 0){
+ input_f = JIS_INPUT;
+ x0201_f = FALSE;
+#ifdef SHIFTJIS_CP932
+ cp51932_f = FALSE;
+ cp932inv_f = TRUE;
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ ms_ucs_map_f = TRUE;
+#endif
+#ifdef UTF8_INPUT_ENABLE
+ }else if(strcmp(codeset, "UTF-8") == 0 ||
+ strcmp(codeset, "UTF-8N") == 0 ||
+ strcmp(codeset, "UTF-8-BOM") == 0){
+ input_f = UTF8_INPUT;
+#ifdef UNICODE_NORMALIZATION
+ }else if(strcmp(codeset, "UTF8-MAC") == 0){
+ input_f = UTF8_INPUT;
+ nfc_f = TRUE;
+#endif
+ }else if(strcmp(codeset, "UTF-16") == 0){
+ input_f = UTF16BE_INPUT;
+ utf16_mode = UTF16BE_INPUT;
+ }else if(strcmp(codeset, "UTF-16BE") == 0 ||
+ strcmp(codeset, "UTF-16BE-BOM") == 0){
+ input_f = UTF16BE_INPUT;
+ utf16_mode = UTF16BE_INPUT;
+ }else if(strcmp(codeset, "UTF-16LE") == 0 ||
+ strcmp(codeset, "UTF-16LE-BOM") == 0){
+ input_f = UTF16LE_INPUT;
+ utf16_mode = UTF16LE_INPUT;
+#endif
+ }
+ continue;
+ }
+ if (strcmp(long_option[i].name, "oc=") == 0){
+ for (i=0; i < 16 && SPACE < p[i] && p[i] < DEL; i++){
+ codeset[i] = nkf_toupper(p[i]);
+ }
+ codeset[i] = 0;
+ if(strcmp(codeset, "ISO-2022-JP") == 0){
+ output_conv = j_oconv;
+ }else if(strcmp(codeset, "SHIFT_JIS") == 0){
+ output_conv = s_oconv;
+ }else if(strcmp(codeset, "CP932") == 0){
+ output_conv = s_oconv;
+ x0201_f = FALSE;
+#ifdef SHIFTJIS_CP932
+ cp51932_f = TRUE;
+ cp932inv_f = TRUE;
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ ms_ucs_map_f = TRUE;
+#endif
+ }else if(strcmp(codeset, "EUCJP") == 0 ||
+ strcmp(codeset, "EUC-JP") == 0 ||
+ strcmp(codeset, "CP51932") == 0){
+ output_conv = e_oconv;
+ x0201_f = FALSE;
+#ifdef SHIFTJIS_CP932
+ cp51932_f = TRUE;
+ cp932inv_f = TRUE;
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ ms_ucs_map_f = TRUE;
+#endif
+ }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
+ strcmp(codeset, "EUCJP-MS") == 0){
+ output_conv = e_oconv;
+ x0201_f = FALSE;
+ x0212_f = TRUE;
+#ifdef SHIFTJIS_CP932
+ cp51932_f = FALSE;
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ ms_ucs_map_f = TRUE;
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ }else if(strcmp(codeset, "UTF-8") == 0){
+ output_conv = w_oconv;
+ }else if(strcmp(codeset, "UTF-8N") == 0){
+ output_conv = w_oconv;
+ unicode_bom_f=1;
+ }else if(strcmp(codeset, "UTF-8-BOM") == 0){
+ output_conv = w_oconv;
+ unicode_bom_f=2;
+ }else if(strcmp(codeset, "UTF-16") == 0){
+ output_conv = w_oconv16;
+ }else if(strcmp(codeset, "UTF-16BE") == 0){
+ output_conv = w_oconv16;
+ unicode_bom_f=1;
+ }else if(strcmp(codeset, "UTF-16BE-BOM") == 0){
+ output_conv = w_oconv16;
+ unicode_bom_f=2;
+ }else if(strcmp(codeset, "UTF-16LE") == 0){
+ output_conv = w_oconv16;
+ w_oconv16_LE = 1;
+ unicode_bom_f=1;
+ }else if(strcmp(codeset, "UTF-16LE-BOM") == 0){
+ output_conv = w_oconv16;
+ w_oconv16_LE = 1;
+ unicode_bom_f=2;
+#endif
+ }
+ continue;
+ }
#ifdef OVERWRITE
if (strcmp(long_option[i].name, "overwrite") == 0){
file_out = TRUE;
#endif
if (strcmp(long_option[i].name, "cp932") == 0){
#ifdef SHIFTJIS_CP932
- cp932_f = TRUE;
+ cp51932_f = TRUE;
cp932inv_f = TRUE;
#endif
#ifdef UTF8_OUTPUT_ENABLE
}
if (strcmp(long_option[i].name, "no-cp932") == 0){
#ifdef SHIFTJIS_CP932
- cp932_f = FALSE;
+ cp51932_f = FALSE;
cp932inv_f = FALSE;
#endif
#ifdef UTF8_OUTPUT_ENABLE
return;
}
#endif
-#ifdef UNICODE_ENABLE
+#if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
if (strcmp(long_option[i].name, "internal-unicode") == 0){
internal_unicode_f = TRUE;
continue;
}
+ if (strcmp(long_option[i].name, "fb-skip") == 0){
+ encode_fallback = NULL;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "fb-html") == 0){
+ encode_fallback = encode_fallback_html;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "fb-xml" ) == 0){
+ encode_fallback = encode_fallback_xml;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "fb-java") == 0){
+ encode_fallback = encode_fallback_java;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "fb-perl") == 0){
+ encode_fallback = encode_fallback_perl;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "fb-subchar") == 0){
+ encode_fallback = encode_fallback_subchar;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "fb-subchar=") == 0){
+ encode_fallback = encode_fallback_subchar;
+ unicode_subchar = 0;
+ if (p[0] != '0'){
+ /* decimal number */
+ for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
+ unicode_subchar *= 10;
+ unicode_subchar += hex2bin(p[i]);
+ }
+ }else if(p[1] == 'x' || p[1] == 'X'){
+ /* hexadecimal number */
+ for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
+ unicode_subchar <<= 4;
+ unicode_subchar |= hex2bin(p[i]);
+ }
+ }else{
+ /* octal number */
+ for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
+ unicode_subchar *= 8;
+ unicode_subchar += hex2bin(p[i]);
+ }
+ }
+ w16e_conv(unicode_subchar, &i, &j);
+ unicode_subchar = i<<8 | j;
+ continue;
+ }
#endif
#ifdef UTF8_OUTPUT_ENABLE
if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
}
#endif
if (strcmp(long_option[i].name, "prefix=") == 0){
- if (*p == '=' && ' ' < p[1] && p[1] < 128){
- for (i = 2; ' ' < p[i] && p[i] < 128; i++){
- prefix_table[p[i]] = p[1];
+ if (' ' < p[0] && p[0] < 128){
+ for (i = 1; ' ' < p[i] && p[i] < 128; i++){
+ prefix_table[p[i]] = p[0];
}
}
continue;
#define SCORE_INIT (SCORE_iMIME)
-int score_table_A0[] = {
+const int score_table_A0[] = {
0, 0, 0, 0,
0, 0, 0, 0,
0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
};
-int score_table_F0[] = {
+const int score_table_F0[] = {
SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
struct input_code *ptr;
{
int c2 = ptr->buf[0];
+#ifdef UTF8_OUTPUT_ENABLE
int c1 = ptr->buf[1];
+#endif
if (c2 < 0){
set_code_score(ptr, SCORE_ERROR);
}else if (c2 == SSO){
ptr->stat = 1;
status_push_ch(ptr, c);
#ifdef SHIFTJIS_CP932
- }else if (cp932_f
+ }else if (cp51932_f
&& CP932_TABLE_BEGIN <= c && c <= CP932_TABLE_END){
ptr->stat = 2;
status_push_ch(ptr, c);
}
#endif /*WIN32DLL*/
+#if !defined(PERL_XS) && !defined(WIN32DLL)
int
noconvert(f)
FILE *f;
(*o_putc)(c);
return 1;
}
-
+#endif
void
module_connection()
/* normal ASCII code */
SEND;
}
- } else if (c1 == SI) {
+ } else if (!is_8bit && c1 == SI) {
shift_mode = FALSE;
NEXT;
- } else if (c1 == SO) {
+ } else if (!is_8bit && c1 == SO) {
shift_mode = TRUE;
NEXT;
- } else if (c1 == ESC ) {
+ } else if (!is_8bit && c1 == ESC ) {
if ((c1 = (*i_getc)(f)) == EOF) {
/* (*oconv)(0, ESC); don't send bogus code */
LAST;
input_mode = X0208;
shift_mode = FALSE;
set_input_codename("ISO-2022-JP");
+#ifdef CHECK_OPTION
debug(input_codename);
+#endif
NEXT;
} else if (c1 == '(') {
if ((c1 = (*i_getc)(f)) == EOF) {
int c2, c1;
int *p2, *p1;
{
+#if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
int val;
+#endif
#ifdef SHIFTJIS_CP932
- if (cp932_f && CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){
- extern unsigned short shiftjis_cp932[3][189];
+ if (cp51932_f && CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){
+ extern const unsigned short shiftjis_cp932[3][189];
val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
if (val){
c2 = val >> 8;
#endif /* SHIFTJIS_CP932 */
#ifdef X0212_ENABLE
if (x0212_f && 0xfa <= c2 && c2 <= 0xfc){
- extern unsigned short shiftjis_x0212[3][189];
+ extern const unsigned short shiftjis_x0212[3][189];
val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
if (val){
if (val & 0x8000){
}
}
#endif
- c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
- if (c1 < 0x9f)
- c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
- else {
- c1 = c1 - 0x7e;
- c2++;
+ if(c2 >= 0x80){
+ c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394);
+ if (c1 < 0x9f)
+ c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f);
+ else {
+ c1 = c1 - 0x7e;
+ c2++;
+ }
}
#ifdef X0212_ENABLE
c2 = (c2 << 8) | (c1 & 0x7f);
c1 = c0 & 0x7f;
#ifdef SHIFTJIS_CP932
- if (cp932_f){
+ if (cp51932_f){
int s2, s1;
if (e2s_conv(c2, c1, &s2, &s1) == 0){
s2e_conv(s2, s1, &c2, &c1);
int c2, c1, c0;
int *p2, *p1;
{
- extern unsigned short * utf8_to_euc_2bytes[];
- extern unsigned short ** utf8_to_euc_3bytes[];
+ extern const unsigned short *const utf8_to_euc_2bytes[];
+ extern const unsigned short *const *const utf8_to_euc_3bytes[];
int ret = 0;
if (0xc0 <= c2 && c2 <= 0xef) {
- unsigned short **pp;
+ const unsigned short *const *pp;
if (0xe0 <= c2) {
if (c0 == 0) return -1;
c1, c0;
{
int ret = 0;
- unsigned short val = 0;
if (c0 == 0){
if (c2 == 0) /* 0x00-0x7f */
else return 0;
}
if (c2 == EOF);
- else if (c2 == 0xef && c1 == 0xbb && c0 == 0xbf)
+ else if (c2 == 0xef && c1 == 0xbb && c0 == 0xbf) {
return 0; /* throw BOM */
- else if (internal_unicode_f && (output_conv == w_oconv || output_conv == w_oconv16)){
+#if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
+ } else if (internal_unicode_f && (output_conv == w_oconv || output_conv == w_oconv16)){
+ unsigned short val = 0;
if(c2 == 0){
c2 = c1;
c1 = 0;
val = ww16_conv(c2, c1, c0);
c2 = (val >> 8) & 0xff;
c1 = val & 0xff;
+#endif
} else {
ret = w2e_conv(c2, c1, c0, &c2, &c1);
}
unsigned short val;
int *p2, *p1;
{
- extern unsigned short * utf8_to_euc_2bytes[];
- extern unsigned short ** utf8_to_euc_3bytes[];
+ extern const unsigned short *const utf8_to_euc_2bytes[];
+ extern const unsigned short *const *const utf8_to_euc_3bytes[];
int c2, c1, c0;
- unsigned short **pp;
+ const unsigned short *const *pp;
int psize;
int ret = 0;
(*oconv)(c2, c1);
return 0;
}
+#if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
if (internal_unicode_f && (output_conv == w_oconv || output_conv == w_oconv16));
+#endif
else ret = w16e_conv(((c2<<8)&0xff00) + c1, &c2, &c1);
if (ret) return ret;
(*oconv)(c2, c1);
int
w_iconv_common(c1, c0, pp, psize, p2, p1)
int c1,c0;
- unsigned short **pp;
+ const unsigned short *const *pp;
int psize;
int *p2, *p1;
{
int c2;
- unsigned short *p ;
+ const unsigned short *p;
unsigned short val;
+ /* CP932/CP51932: U+00A6 (BROKEN BAR) -> not 0x8fa2c3, but 0x7c */
+ if (ms_ucs_map_f && cp51932_f && c1 == 0xC2 && c0 == 0xA6){
+ if (p2) *p2 = 0;
+ if (p1) *p1 = 0x7C;
+ return 0;
+ }
+
if (pp == 0) return 1;
c1 -= 0x80;
if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1;
val = p[c0];
if (val == 0) return 1;
+ if (!ms_ucs_map_f && (val <= 0xFF || (c1 >= 0x40 && val & 0x8000))) return 1;
c2 = val >> 8;
if (val & 0x8000){
#endif
#ifdef UTF8_OUTPUT_ENABLE
+void
+nkf_each_char_to_hex(f, c)
+ void (*f)PROTO((int c2,int c1));
+ int c;
+{
+ const char *hex = "0123456789ABCDEF";
+ c &= 0x00FFFFFF;
+ int shift = 20;
+ while(shift >= 0){
+ if(c >= 1<<shift){
+ while(shift >= 0){
+ (*f)(0, hex[(c>>shift)&0xF]);
+ shift -= 4;
+ }
+ }else{
+ shift -= 4;
+ }
+ }
+ return;
+}
+
+void
+encode_fallback_html(c)
+ int c;
+{
+ (*oconv)(0, '&');
+ (*oconv)(0, '#');
+ c &= 0x00FFFFFF;
+ if(c >= 1000000)
+ (*oconv)(0, 0x30+(c/1000000)%10);
+ if(c >= 100000)
+ (*oconv)(0, 0x30+(c/100000 )%10);
+ if(c >= 10000)
+ (*oconv)(0, 0x30+(c/10000 )%10);
+ if(c >= 1000)
+ (*oconv)(0, 0x30+(c/1000 )%10);
+ if(c >= 100)
+ (*oconv)(0, 0x30+(c/100 )%10);
+ if(c >= 10)
+ (*oconv)(0, 0x30+(c/10 )%10);
+ if(c >= 0)
+ (*oconv)(0, 0x30+ c %10);
+ (*oconv)(0, ';');
+ return;
+}
+
+void
+encode_fallback_xml(c)
+ int c;
+{
+ (*oconv)(0, '&');
+ (*oconv)(0, '#');
+ (*oconv)(0, 'x');
+ nkf_each_char_to_hex(oconv, c);
+ (*oconv)(0, ';');
+ return;
+}
+
+void
+encode_fallback_java(c)
+ int c;
+{
+ const char *hex = "0123456789ABCDEF";
+ (*oconv)(0, '\\');
+ if((c&0x00FFFFFF) > 0xFFFF){
+ (*oconv)(0, 'U');
+ (*oconv)(0, '0');
+ (*oconv)(0, '0');
+ (*oconv)(0, hex[(c>>20)&0xF]);
+ (*oconv)(0, hex[(c>>16)&0xF]);
+ }else{
+ (*oconv)(0, 'u');
+ }
+ (*oconv)(0, hex[(c>>12)&0xF]);
+ (*oconv)(0, hex[(c>> 8)&0xF]);
+ (*oconv)(0, hex[(c>> 4)&0xF]);
+ (*oconv)(0, hex[ c &0xF]);
+ return;
+}
+
+void
+encode_fallback_perl(c)
+ int c;
+{
+ (*oconv)(0, '\\');
+ (*oconv)(0, 'x');
+ (*oconv)(0, '{');
+ nkf_each_char_to_hex(oconv, c);
+ (*oconv)(0, '}');
+ return;
+}
+
+void
+encode_fallback_subchar(c)
+ int c;
+{
+ c = unicode_subchar;
+ (*oconv)((c>>8)&0xFF, c&0xFF);
+ return;
+ int shift = 16;
+ while(shift >= 0){
+ if(c >= 1<<shift){
+ while(shift >= 0){
+ (*oconv)(0, (c>>shift)&0xFF);
+ shift -= 8;
+ }
+ }else{
+ shift -= 8;
+ }
+ }
+ return;
+}
+
int
e2w_conv(c2, c1)
int c2, c1;
{
- extern unsigned short euc_to_utf8_1byte[];
- extern unsigned short * euc_to_utf8_2bytes[];
- extern unsigned short * euc_to_utf8_2bytes_ms[];
- unsigned short *p;
+ extern const unsigned short euc_to_utf8_1byte[];
+ extern const unsigned short *const euc_to_utf8_2bytes[];
+ extern const unsigned short *const euc_to_utf8_2bytes_ms[];
+ const unsigned short *p;
if (c2 == X0201) {
p = euc_to_utf8_1byte;
#ifdef X0212_ENABLE
} else if (c2 >> 8 == 0x8f){
- extern unsigned short * x0212_to_utf8_2bytes[];
+ extern const unsigned short *const x0212_to_utf8_2bytes[];
c2 = (c2&0x7f) - 0x21;
if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
p = x0212_to_utf8_2bytes[c2];
if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
w16e_conv(c1, &c2, &c1);
if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
+ if(encode_fallback)(*encode_fallback)(c1);
return;
}
}
} else if ((c2 & 0xff00) >> 8 == 0x8f){
output_mode = JAPANESE_EUC;
#ifdef SHIFTJIS_CP932
- if (cp932_f){
+ if (cp51932_f){
int s2, s1;
if (e2s_conv(c2, c1, &s2, &s1) == 0){
s2e_conv(s2, s1, &c2, &c1);
}
}
#endif
- if ((c2 & 0xff00) >> 8 == 0x8f){
+ if (c2 == 0) {
+ output_mode = ASCII;
+ (*o_putc)(c1);
+ }else if ((c2 & 0xff00) >> 8 == 0x8f){
if (x0212_f){
(*o_putc)(0x8f);
(*o_putc)((c2 & 0x7f) | 0x080);
e2s_conv(c2, c1, p2, p1)
int c2, c1, *p2, *p1;
{
+#ifdef X0212_ENABLE
int val = 0;
- unsigned short *ptr;
+ const unsigned short *ptr;
int ndx;
- extern unsigned short *x0212_shiftjis[];
-#ifdef X0212_ENABLE
+ extern const unsigned short *const x0212_shiftjis[];
if ((c2 & 0xff00) == 0x8f00){
ndx = c2 & 0x7f;
if (0x21 <= ndx && ndx <= 0x7e){
#ifdef NUMCHAR_OPTION
if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
w16e_conv(c1, &c2, &c1);
+ if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
+ if(encode_fallback)(*encode_fallback)(c1);
+ return;
+ }
}
#endif
if (c2 == EOF) {
#ifdef SHIFTJIS_CP932
if (cp932inv_f
&& CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
- extern unsigned short cp932inv[2][189];
+ extern const unsigned short cp932inv[2][189];
int c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
if (c){
c2 = c >> 8;
c1;
{
#ifdef NUMCHAR_OPTION
- if ((c1 & CLASS_MASK) == CLASS_UTF16){
+ if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
w16e_conv(c1, &c2, &c1);
+ if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){
+ if(encode_fallback)(*encode_fallback)(c1);
+ return;
+ }
}
#endif
if (c2 == EOF) {
}
-static int broken_buf[3];
-static int broken_counter = 0;
-static int broken_last = 0;
+STATIC int broken_buf[3];
+STATIC int broken_counter = 0;
+STATIC int broken_last = 0;
int
broken_getc(f)
FILE *f;
return c;
}
-static int prev_cr = 0;
+STATIC int prev_cr = 0;
void
cr_conv(c2,c1)
if (f_line<=fold_len) { /* normal case */
fold_state = 1;
} else {
- if (f_line>=fold_len+fold_margin) { /* too many kinsou suspension */
+ if (f_line>fold_len+fold_margin) { /* too many kinsoku suspension */
f_line = char_size(c2,c1);
fold_state = '\n'; /* We can't wait, do fold now */
} else if (c2==X0201) {
iso2022jp_check_conv(c2,c1)
int c2, c1;
{
- static int range[RANGE_NUM_MAX][2] = {
+ STATIC const int range[RANGE_NUM_MAX][2] = {
{0x222f, 0x2239,},
{0x2242, 0x2249,},
{0x2251, 0x225b,},
/* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
-unsigned char *mime_pattern[] = {
- (unsigned char *)"\075?EUC-JP?B?",
- (unsigned char *)"\075?SHIFT_JIS?B?",
- (unsigned char *)"\075?ISO-8859-1?Q?",
- (unsigned char *)"\075?ISO-8859-1?B?",
- (unsigned char *)"\075?ISO-2022-JP?B?",
- (unsigned char *)"\075?ISO-2022-JP?Q?",
+const unsigned char *mime_pattern[] = {
+ (const unsigned char *)"\075?EUC-JP?B?",
+ (const unsigned char *)"\075?SHIFT_JIS?B?",
+ (const unsigned char *)"\075?ISO-8859-1?Q?",
+ (const unsigned char *)"\075?ISO-8859-1?B?",
+ (const unsigned char *)"\075?ISO-2022-JP?B?",
+ (const unsigned char *)"\075?ISO-2022-JP?Q?",
#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
- (unsigned char *)"\075?UTF-8?B?",
- (unsigned char *)"\075?UTF-8?Q?",
+ (const unsigned char *)"\075?UTF-8?B?",
+ (const unsigned char *)"\075?UTF-8?Q?",
#endif
- (unsigned char *)"\075?US-ASCII?Q?",
- NULL
+ (const unsigned char *)"\075?US-ASCII?Q?",
+ NULL
};
0,
};
-int mime_encode[] = {
+const int mime_encode[] = {
JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201,
#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
UTF8, UTF8,
0
};
-int mime_encode_method[] = {
+const int mime_encode_method[] = {
'B', 'B','Q', 'B', 'B', 'Q',
#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
'B', 'Q',
#define MAXRECOVER 20
-/* I don't trust portablity of toupper */
-#define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
-#define nkf_isdigit(c) ('0'<=c && c<='9')
-#define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
-#define nkf_isblank(c) (c == SPACE || c == TAB)
-#define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == NL)
-#define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
-#define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
-
void
switch_mime_getc()
{
{
int c1 = 0;
int i,j,k;
- unsigned char *p,*q;
+ const unsigned char *p,*q;
int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */
mime_decode_mode = FALSE;
}
void debug(str)
- char *str;
+ const char *str;
{
if (debug_f){
fprintf(stderr, "%s\n", str);
is_inputcode_set = TRUE;
}
-#ifndef WIN32DLL
+#if !defined(PERL_XS) && !defined(WIN32DLL)
void
print_guessed_code (filename)
char *filename;
}
#endif /*WIN32DLL*/
-int
-hex2bin(x)
- int x;
-{
- if (nkf_isdigit(x)) return x - '0';
- return nkf_toupper(x) - 'A' + 10;
-}
-
#ifdef INPUT_OPTION
#ifdef ANSI_C_PROTOTYPE
int (*u)() = i_nfc_ungetc;
int i=0, j, k=1, lower, upper;
int buf[9];
- int *array = NULL;
- extern struct normalization_pair normalization_table[];
+ const int *array = NULL;
+ extern const struct normalization_pair normalization_table[];
buf[i] = (*g)(f);
while (k > 0 && ((buf[i] & 0xc0) != 0x80)){
int
mime_integrity(f,p)
-FILE *f;
-unsigned char *p;
+ FILE *f;
+ const unsigned char *p;
{
int c,d;
unsigned int q;
*/
mime_input = mime_top;
mime_last = mime_top;
+
while(*p) Fifo(mime_input++) = *p++;
d = 0;
q = mime_input;
return (i);
}
-static char basis_64[] =
+STATIC const char basis_64[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
-static int b64c;
+STATIC int b64c;
#define MIMEOUT_BUF_LENGTH (60)
char mimeout_buf[MIMEOUT_BUF_LENGTH+1];
int mimeout_buf_count = 0;
open_mime(mode)
int mode;
{
- unsigned char *p;
+ const unsigned char *p;
int i;
int j;
p = mime_pattern[0];
x0201_f = NO_X0201;
#endif
iso2022jp_f = FALSE;
-#ifdef UNICODE_ENABLE
+#if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
internal_unicode_f = FALSE;
#endif
#ifdef UTF8_OUTPUT_ENABLE
unicode_bom_f = 0;
w_oconv16_LE = 0;
ms_ucs_map_f = FALSE;
+ encode_fallback = NULL;
+ unicode_subchar = '?';
#endif
#ifdef UNICODE_NORMALIZATION
nfc_f = FALSE;
exec_f = 0;
#endif
#ifdef SHIFTJIS_CP932
- cp932_f = TRUE;
+ cp51932_f = TRUE;
cp932inv_f = TRUE;
#endif
+#ifdef X0212_ENABLE
+ x0212_f = FALSE;
+#endif
{
int i;
for (i = 0; i < 256; i++){
#ifdef UNICODE_NORMALIZATION
fprintf(stderr," --utf8mac-input UTF-8-MAC input\n");
#endif
+#ifdef UTF8_INPUT_ENABLE
+ fprintf(stderr," --fb-{skip, html, xml, perl, java, subchar}\n");
+ fprintf(stderr," set the way nkf handles unassigned characters\n");
+#endif
#ifdef UTF8_OUTPUT_ENABLE
fprintf(stderr," --ms-ucs-map Microsoft UCS Mapping Compatible\n");
#endif