+ nkf_iconv_t converter;
+
+ converter->input_buffer_size = IOBUF_SIZE;
+ converter->input_buffer = nkf_xmalloc(converter->input_buffer_size);
+ converter->output_buffer_size = IOBUF_SIZE * 2;
+ converter->output_buffer = nkf_xmalloc(converter->output_buffer_size);
+ converter->cd = iconv_open(tocode, fromcode);
+ if (converter->cd == (iconv_t)-1)
+ {
+ switch (errno) {
+ case EINVAL:
+ perror(fprintf("iconv doesn't support %s to %s conversion.", fromcode, tocode));
+ return -1;
+ default:
+ perror("can't iconv_open");
+ }
+ }
+}
+
+static size_t
+nkf_iconv_convert(nkf_iconv_t *converter, FILE *input)
+{
+ size_t invalid = (size_t)0;
+ char *input_buffer = converter->input_buffer;
+ size_t input_length = (size_t)0;
+ char *output_buffer = converter->output_buffer;
+ size_t output_length = converter->output_buffer_size;
+ int c;
+
+ do {
+ if (c != EOF) {
+ while ((c = (*i_getc)(f)) != EOF) {
+ input_buffer[input_length++] = c;
+ if (input_length < converter->input_buffer_size) break;
+ }
+ }
+
+ size_t ret = iconv(converter->cd, &input_buffer, &input_length, &output_buffer, &output_length);
+ while (output_length-- > 0) {
+ (*o_putc)(output_buffer[converter->output_buffer_size-output_length]);
+ }
+ if (ret == (size_t) - 1) {
+ switch (errno) {
+ case EINVAL:
+ if (input_buffer != converter->input_buffer)
+ memmove(converter->input_buffer, input_buffer, input_length);
+ break;
+ case E2BIG:
+ converter->output_buffer_size *= 2;
+ output_buffer = realloc(converter->outbuf, converter->output_buffer_size);
+ if (output_buffer == NULL) {
+ perror("can't realloc");
+ return -1;
+ }
+ converter->output_buffer = output_buffer;
+ break;
+ default:
+ perror("can't iconv");
+ return -1;
+ }
+ } else {
+ invalid += ret;
+ }
+ } while (1);
+
+ return invalid;
+}
+
+
+static void
+nkf_iconv_close(nkf_iconv_t *convert)
+{
+ nkf_xfree(converter->inbuf);
+ nkf_xfree(converter->outbuf);
+ iconv_close(converter->cd);
+}
+#endif
+
+
+static void
+reinit(void)
+{
+ {
+ struct input_code *p = input_code_list;
+ while (p->name){
+ status_reinit(p++);
+ }
+ }
+ unbuf_f = FALSE;
+ estab_f = FALSE;
+ nop_f = FALSE;
+ binmode_f = TRUE;
+ rot_f = FALSE;
+ hira_f = FALSE;
+ alpha_f = FALSE;
+ mime_f = MIME_DECODE_DEFAULT;
+ mime_decode_f = FALSE;
+ mimebuf_f = FALSE;
+ broken_f = FALSE;
+ iso8859_f = FALSE;
+ mimeout_f = FALSE;
+ x0201_f = X0201_DEFAULT;
+ iso2022jp_f = FALSE;
+#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
+ ms_ucs_map_f = UCS_MAP_ASCII;
+#endif
+#ifdef UTF8_INPUT_ENABLE
+ no_cp932ext_f = FALSE;
+ no_best_fit_chars_f = FALSE;
+ encode_fallback = NULL;
+ unicode_subchar = '?';
+ input_endian = ENDIAN_BIG;
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ output_bom_f = FALSE;
+ output_endian = ENDIAN_BIG;
+#endif
+#ifdef UNICODE_NORMALIZATION
+ nfc_f = FALSE;
+#endif
+#ifdef INPUT_OPTION
+ cap_f = FALSE;
+ url_f = FALSE;
+ numchar_f = FALSE;
+#endif
+#ifdef CHECK_OPTION
+ noout_f = FALSE;
+ debug_f = FALSE;
+#endif
+ guess_f = 0;
+#ifdef EXEC_IO
+ exec_f = 0;
+#endif
+#ifdef SHIFTJIS_CP932
+ cp51932_f = TRUE;
+ cp932inv_f = TRUE;
+#endif
+#ifdef X0212_ENABLE
+ x0212_f = FALSE;
+ x0213_f = FALSE;
+#endif
+ {
+ int i;
+ for (i = 0; i < 256; i++){
+ prefix_table[i] = 0;
+ }
+ }
+ hold_count = 0;
+ mimeout_state.count = 0;
+ mimeout_mode = 0;
+ base64_count = 0;
+ f_line = 0;
+ f_prev = 0;
+ fold_preserve_f = FALSE;
+ fold_f = FALSE;
+ fold_len = 0;
+ kanji_intro = DEFAULT_J;
+ ascii_intro = DEFAULT_R;
+ fold_margin = FOLD_MARGIN;
+ o_zconv = no_connection;
+ o_fconv = no_connection;
+ o_eol_conv = no_connection;
+ o_rot_conv = no_connection;
+ o_hira_conv = no_connection;
+ o_base64conv = no_connection;
+ o_iso2022jp_check_conv = no_connection;
+ o_putc = std_putc;
+ i_getc = std_getc;
+ i_ungetc = std_ungetc;
+ i_bgetc = std_getc;
+ i_bungetc = std_ungetc;
+ o_mputc = std_putc;
+ i_mgetc = std_getc;
+ i_mungetc = std_ungetc;
+ i_mgetc_buf = std_getc;
+ i_mungetc_buf = std_ungetc;
+ output_mode = ASCII;
+ input_mode = ASCII;
+ mime_decode_mode = FALSE;
+ file_out_f = FALSE;
+ eolmode_f = 0;
+ input_eol = 0;
+ prev_cr = 0;
+ option_mode = 0;
+ z_prev2=0,z_prev1=0;
+#ifdef CHECK_OPTION
+ iconv_for_check = 0;
+#endif
+ input_codename = NULL;
+ input_encoding = NULL;
+ output_encoding = NULL;
+ nkf_state_init();
+#ifdef WIN32DLL
+ reinitdll();
+#endif /*WIN32DLL*/
+}
+
+static int
+module_connection(void)
+{
+ if (input_encoding) set_input_encoding(input_encoding);
+ if (!output_encoding) {
+ output_encoding = nkf_default_encoding();
+ }
+ if (!output_encoding) {
+ if (noout_f || guess_f) output_encoding = nkf_enc_from_index(ISO_2022_JP);
+ else return -1;
+ }
+ set_output_encoding(output_encoding);
+ oconv = nkf_enc_to_oconv(output_encoding);
+ o_putc = std_putc;
+
+ /* replace continucation module, from output side */
+
+ /* output redicrection */
+#ifdef CHECK_OPTION
+ if (noout_f || guess_f){
+ o_putc = no_putc;
+ }
+#endif
+ if (mimeout_f) {
+ o_mputc = o_putc;
+ o_putc = mime_putc;
+ if (mimeout_f == TRUE) {
+ o_base64conv = oconv; oconv = base64_conv;
+ }
+ /* base64_count = 0; */
+ }
+
+ if (eolmode_f || guess_f) {
+ o_eol_conv = oconv; oconv = eol_conv;
+ }
+ if (rot_f) {
+ o_rot_conv = oconv; oconv = rot_conv;
+ }
+ if (iso2022jp_f) {
+ o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
+ }
+ if (hira_f) {
+ o_hira_conv = oconv; oconv = hira_conv;
+ }
+ if (fold_f) {
+ o_fconv = oconv; oconv = fold_conv;
+ f_line = 0;
+ }
+ if (alpha_f || x0201_f) {
+ o_zconv = oconv; oconv = z_conv;
+ }
+
+ i_getc = std_getc;
+ i_ungetc = std_ungetc;
+ /* input redicrection */
+#ifdef INPUT_OPTION
+ if (cap_f){
+ i_cgetc = i_getc; i_getc = cap_getc;
+ i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
+ }
+ if (url_f){
+ i_ugetc = i_getc; i_getc = url_getc;
+ i_uungetc = i_ungetc; i_ungetc= url_ungetc;
+ }
+#endif
+#ifdef NUMCHAR_OPTION
+ if (numchar_f){
+ i_ngetc = i_getc; i_getc = numchar_getc;
+ i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
+ }
+#endif
+#ifdef UNICODE_NORMALIZATION
+ if (nfc_f){
+ i_nfc_getc = i_getc; i_getc = nfc_getc;
+ i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
+ }
+#endif
+ if (mime_f && mimebuf_f==FIXED_MIME) {
+ i_mgetc = i_getc; i_getc = mime_getc;
+ i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
+ }
+ if (broken_f & 1) {
+ i_bgetc = i_getc; i_getc = broken_getc;
+ i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
+ }
+ if (input_encoding) {
+ set_iconv(-TRUE, nkf_enc_to_iconv(input_encoding));
+ } else {
+ set_iconv(FALSE, e_iconv);
+ }
+
+ {
+ struct input_code *p = input_code_list;
+ while (p->name){
+ status_reinit(p++);
+ }
+ }
+ return 0;
+}
+
+/*
+ Conversion main loop. Code detection only.
+ */
+
+#if !defined(PERL_XS) && !defined(WIN32DLL)
+static nkf_char
+noconvert(FILE *f)
+{
+ nkf_char c;
+
+ if (nop_f == 2)
+ module_connection();
+ while ((c = (*i_getc)(f)) != EOF)
+ (*o_putc)(c);
+ (*o_putc)(EOF);
+ return 1;
+}
+#endif
+
+#define NEXT continue /* no output, get next */
+#define SKIP c2=0;continue /* no output, get next */
+#define MORE c2=c1;continue /* need one more byte */
+#define SEND ; /* output c1 and c2, get next */
+#define LAST break /* end of loop, go closing */
+#define set_input_mode(mode) do { \
+ input_mode = mode; \
+ shift_mode = 0; \
+ set_input_codename("ISO-2022-JP"); \
+ debug("ISO-2022-JP"); \
+} while (0)
+
+static int
+kanji_convert(FILE *f)
+{
+ nkf_char c1=0, c2=0, c3=0, c4=0;
+ int shift_mode = 0; /* 0, 1, 2, 3 */
+ int g2 = 0;
+ int is_8bit = FALSE;
+
+ if (input_encoding && !nkf_enc_asciicompat(input_encoding)) {
+ is_8bit = TRUE;
+ }
+
+ input_mode = ASCII;
+ output_mode = ASCII;
+
+ if (module_connection() < 0) {
+#if !defined(PERL_XS) && !defined(WIN32DLL)
+ fprintf(stderr, "no output encoding given\n");
+#endif
+ return -1;
+ }
+ check_bom(f);
+
+#ifdef UTF8_INPUT_ENABLE
+ if(iconv == w_iconv32){
+ while ((c1 = (*i_getc)(f)) != EOF &&
+ (c2 = (*i_getc)(f)) != EOF &&
+ (c3 = (*i_getc)(f)) != EOF &&
+ (c4 = (*i_getc)(f)) != EOF) {
+ nkf_iconv_utf_32(c1, c2, c3, c4);
+ }
+ (*i_ungetc)(EOF, f);
+ }
+ else if (iconv == w_iconv16) {
+ while ((c1 = (*i_getc)(f)) != EOF &&
+ (c2 = (*i_getc)(f)) != EOF) {
+ if (nkf_iconv_utf_16(c1, c2, 0, 0) == -2 &&
+ (c3 = (*i_getc)(f)) != EOF &&
+ (c4 = (*i_getc)(f)) != EOF) {
+ nkf_iconv_utf_16(c1, c2, c3, c4);
+ }
+ }
+ (*i_ungetc)(EOF, f);
+ }
+#endif
+
+ while ((c1 = (*i_getc)(f)) != EOF) {
+#ifdef INPUT_CODE_FIX
+ if (!input_encoding)
+#endif
+ code_status(c1);
+ if (c2) {
+ /* second byte */
+ if (c2 > DEL) {
+ /* in case of 8th bit is on */
+ if (!estab_f&&!mime_decode_mode) {
+ /* in case of not established yet */
+ /* It is still ambiguious */
+ if (h_conv(f, c2, c1)==EOF) {
+ LAST;
+ }
+ else {
+ SKIP;
+ }
+ }
+ else {
+ /* in case of already established */
+ if (c1 < 0x40) {
+ /* ignore bogus code */
+ SKIP;
+ } else {
+ SEND;
+ }
+ }
+ }
+ else {
+ /* 2nd byte of 7 bit code or SJIS */
+ SEND;
+ }
+ }
+ else if (nkf_char_unicode_p(c1)) {
+ (*oconv)(0, c1);
+ NEXT;
+ }
+ else {
+ /* first byte */
+ if (input_mode == JIS_X_0208 && DEL <= c1 && c1 < 0x92) {
+ /* CP5022x */
+ MORE;
+ } else if (c1 > DEL) {
+ /* 8 bit code */
+ if (!estab_f && !iso8859_f) {
+ /* not established yet */
+ MORE;
+ } else { /* estab_f==TRUE */
+ if (iso8859_f) {
+ c2 = ISO_8859_1;
+ c1 &= 0x7f;
+ SEND;
+ }
+ else if ((iconv == s_iconv && 0xA0 <= c1 && c1 <= 0xDF) ||
+ (ms_ucs_map_f == UCS_MAP_CP10001 && (c1 == 0xFD || c1 == 0xFE))) {
+ /* JIS X 0201 */
+ c2 = JIS_X_0201_1976_K;
+ c1 &= 0x7f;
+ SEND;
+ }
+ else {
+ /* already established */
+ MORE;
+ }
+ }
+ } else if (SP < c1 && c1 < DEL) {
+ /* in case of Roman characters */
+ if (shift_mode) {
+ /* output 1 shifted byte */
+ if (iso8859_f) {
+ c2 = ISO_8859_1;
+ SEND;
+ } else if (nkf_byte_jisx0201_katakana_p(c1)){
+ /* output 1 shifted byte */
+ c2 = JIS_X_0201_1976_K;
+ SEND;
+ } else {
+ /* look like bogus code */
+ SKIP;
+ }
+ } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
+ input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) {
+ /* in case of Kanji shifted */
+ MORE;
+ } else if (c1 == '=' && mime_f && !mime_decode_mode) {
+ /* Check MIME code */
+ if ((c1 = (*i_getc)(f)) == EOF) {
+ (*oconv)(0, '=');
+ LAST;
+ } else if (c1 == '?') {
+ /* =? is mime conversion start sequence */
+ if(mime_f == STRICT_MIME) {
+ /* check in real detail */
+ if (mime_begin_strict(f) == EOF)
+ LAST;
+ SKIP;
+ } else if (mime_begin(f) == EOF)
+ LAST;
+ SKIP;
+ } else {
+ (*oconv)(0, '=');
+ (*i_ungetc)(c1,f);
+ SKIP;
+ }
+ } else {
+ /* normal ASCII code */
+ SEND;
+ }
+ } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
+ shift_mode = 0;
+ SKIP;
+ } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
+ shift_mode = 1;
+ SKIP;
+ } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
+ if ((c1 = (*i_getc)(f)) == EOF) {
+ /* (*oconv)(0, ESC); don't send bogus code */
+ LAST;
+ }
+ else if (c1 == '&') {
+ /* IRR */
+ if ((c1 = (*i_getc)(f)) == EOF) {
+ LAST;
+ } else {
+ SKIP;
+ }
+ }
+ else if (c1 == '$') {
+ /* GZDMx */
+ if ((c1 = (*i_getc)(f)) == EOF) {
+ /* don't send bogus code
+ (*oconv)(0, ESC);
+ (*oconv)(0, '$'); */
+ LAST;
+ } else if (c1 == '@' || c1 == 'B') {
+ /* JIS X 0208 */
+ set_input_mode(JIS_X_0208);
+ SKIP;
+ } else if (c1 == '(') {
+ /* GZDM4 */
+ if ((c1 = (*i_getc)(f)) == EOF) {
+ /* don't send bogus code
+ (*oconv)(0, ESC);
+ (*oconv)(0, '$');
+ (*oconv)(0, '(');
+ */
+ LAST;
+ } else if (c1 == '@'|| c1 == 'B') {
+ /* JIS X 0208 */
+ set_input_mode(JIS_X_0208);
+ SKIP;
+#ifdef X0212_ENABLE
+ } else if (c1 == 'D'){
+ set_input_mode(JIS_X_0212);
+ SKIP;
+#endif /* X0212_ENABLE */
+ } else if (c1 == 'O' || c1 == 'Q'){
+ set_input_mode(JIS_X_0213_1);
+ SKIP;
+ } else if (c1 == 'P'){
+ set_input_mode(JIS_X_0213_2);
+ SKIP;
+ } else {
+ /* could be some special code */
+ (*oconv)(0, ESC);
+ (*oconv)(0, '$');
+ (*oconv)(0, '(');
+ (*oconv)(0, c1);
+ SKIP;
+ }
+ } else if (broken_f&0x2) {
+ /* accept any ESC-(-x as broken code ... */
+ input_mode = JIS_X_0208;
+ shift_mode = 0;
+ SKIP;
+ } else {
+ (*oconv)(0, ESC);
+ (*oconv)(0, '$');
+ (*oconv)(0, c1);
+ SKIP;
+ }
+ } else if (c1 == '(') {
+ /* GZD4 */
+ if ((c1 = (*i_getc)(f)) == EOF) {
+ /* don't send bogus code
+ (*oconv)(0, ESC);
+ (*oconv)(0, '('); */
+ LAST;
+ }
+ else if (c1 == 'I') {
+ /* JIS X 0201 Katakana */
+ set_input_mode(JIS_X_0201_1976_K);
+ SKIP;
+ }
+ else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
+ /* ISO-646IRV:1983 or JIS X 0201 Roman or JUNET */
+ set_input_mode(ASCII);
+ SKIP;
+ }
+ else if (broken_f&0x2) {
+ set_input_mode(ASCII);
+ SKIP;
+ }
+ else {
+ (*oconv)(0, ESC);
+ (*oconv)(0, '(');
+ SEND;
+ }
+ }
+ else if (c1 == '.') {
+ /* G2D6 */
+ if ((c1 = (*i_getc)(f)) == EOF) {
+ LAST;
+ }
+ else if (c1 == 'A') {
+ /* ISO-8859-1 */
+ g2 = ISO_8859_1;
+ SKIP;
+ }
+ else {
+ (*oconv)(0, ESC);
+ (*oconv)(0, '.');
+ SEND;
+ }
+ }
+ else if (c1 == 'N') {
+ /* SS2 */
+ c1 = (*i_getc)(f);
+ if (g2 == ISO_8859_1) {
+ c2 = ISO_8859_1;
+ SEND;
+ }else{
+ (*i_ungetc)(c1, f);
+ /* lonely ESC */
+ (*oconv)(0, ESC);
+ SEND;
+ }
+ }
+ else {
+ /* lonely ESC */
+ (*oconv)(0, ESC);
+ SEND;
+ }
+ } else if (c1 == ESC && iconv == s_iconv) {
+ /* ESC in Shift_JIS */
+ if ((c1 = (*i_getc)(f)) == EOF) {
+ /* (*oconv)(0, ESC); don't send bogus code */
+ LAST;
+ } else if (c1 == '$') {
+ /* J-PHONE emoji */
+ if ((c1 = (*i_getc)(f)) == EOF) {
+ LAST;
+ } else if (('E' <= c1 && c1 <= 'G') ||
+ ('O' <= c1 && c1 <= 'Q')) {
+ /*
+ NUM : 0 1 2 3 4 5
+ BYTE: G E F O P Q
+ C%7 : 1 6 0 2 3 4
+ C%7 : 0 1 2 3 4 5 6
+ NUM : 2 0 3 4 5 X 1
+ */
+ static const nkf_char jphone_emoji_first_table[7] =
+ {0xE1E0, 0xDFE0, 0xE2E0, 0xE3E0, 0xE4E0, 0xDFE0, 0xE0E0};
+ c3 = nkf_char_unicode_new(jphone_emoji_first_table[c1 % 7]);
+ if ((c1 = (*i_getc)(f)) == EOF) LAST;
+ while (SP <= c1 && c1 <= 'z') {
+ (*oconv)(0, c1 + c3);
+ if ((c1 = (*i_getc)(f)) == EOF) LAST;
+ }
+ SKIP;
+ }
+ else {
+ (*oconv)(0, ESC);
+ (*oconv)(0, '$');
+ SEND;
+ }
+ }
+ else {
+ /* lonely ESC */
+ (*oconv)(0, ESC);
+ SEND;
+ }
+ } else if (c1 == LF || c1 == CR) {
+ if (broken_f&4) {
+ input_mode = ASCII; set_iconv(FALSE, 0);
+ SEND;
+ } else if (mime_decode_f && !mime_decode_mode){
+ if (c1 == LF) {
+ if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
+ i_ungetc(SP,f);
+ continue;
+ } else {
+ i_ungetc(c1,f);
+ }
+ c1 = LF;
+ SEND;
+ } else { /* if (c1 == CR)*/
+ if ((c1=(*i_getc)(f))!=EOF) {
+ if (c1==SP) {
+ i_ungetc(SP,f);
+ continue;
+ } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
+ i_ungetc(SP,f);
+ continue;
+ } else {
+ i_ungetc(c1,f);
+ }
+ i_ungetc(LF,f);
+ } else {
+ i_ungetc(c1,f);
+ }
+ c1 = CR;
+ SEND;
+ }
+ }
+ } else
+ SEND;
+ }
+ /* send: */
+ switch(input_mode){
+ case ASCII:
+ switch ((*iconv)(c2, c1, 0)) { /* can be EUC / SJIS / UTF-8 */
+ case -2:
+ /* 4 bytes UTF-8 */
+ if ((c3 = (*i_getc)(f)) != EOF) {
+ code_status(c3);
+ c3 <<= 8;
+ if ((c4 = (*i_getc)(f)) != EOF) {
+ code_status(c4);
+ (*iconv)(c2, c1, c3|c4);
+ }
+ }
+ break;
+ case -1:
+ /* 3 bytes EUC or UTF-8 */
+ if ((c3 = (*i_getc)(f)) != EOF) {
+ code_status(c3);
+ (*iconv)(c2, c1, c3);
+ }
+ break;
+ }
+ break;
+ case JIS_X_0208:
+ case JIS_X_0213_1:
+ if (ms_ucs_map_f &&
+ 0x7F <= c2 && c2 <= 0x92 &&
+ 0x21 <= c1 && c1 <= 0x7E) {
+ /* CP932 UDC */
+ c1 = nkf_char_unicode_new((c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000);
+ c2 = 0;
+ }
+ (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
+ break;
+#ifdef X0212_ENABLE
+ case JIS_X_0212:
+ (*oconv)(PREFIX_EUCG3 | c2, c1);
+ break;
+#endif /* X0212_ENABLE */
+ case JIS_X_0213_2:
+ (*oconv)(PREFIX_EUCG3 | c2, c1);
+ break;
+ default:
+ (*oconv)(input_mode, c1); /* other special case */
+ }
+
+ c2 = 0;
+ c3 = 0;
+ continue;
+ /* goto next_word */
+ }
+
+ /* epilogue */
+ (*iconv)(EOF, 0, 0);
+ if (!input_codename)
+ {
+ if (is_8bit) {
+ struct input_code *p = input_code_list;
+ struct input_code *result = p;
+ while (p->name){
+ if (p->score < result->score) result = p;
+ ++p;
+ }
+ set_input_codename(result->name);
+#ifdef CHECK_OPTION
+ debug(result->name);
+#endif
+ }
+ }
+ return 0;
+}
+
+/*
+ * int options(unsigned char *cp)
+ *
+ * return values:
+ * 0: success
+ * -1: ArgumentError
+ */
+static int
+options(unsigned char *cp)
+{
+ nkf_char i, j;
+ unsigned char *p;
+ unsigned char *cp_back = NULL;
+ nkf_encoding *enc;
+
+ if (option_mode==1)
+ return 0;
+ while(*cp && *cp++!='-');
+ while (*cp || cp_back) {
+ if(!*cp){
+ cp = cp_back;
+ cp_back = NULL;
+ continue;
+ }
+ p = 0;
+ switch (*cp++) {
+ case '-': /* literal options */
+ if (!*cp || *cp == SP) { /* ignore the rest of arguments */
+ option_mode = 1;
+ return 0;
+ }
+ for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
+ p = (unsigned char *)long_option[i].name;
+ for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
+ if (*p == cp[j] || cp[j] == SP){
+ p = &cp[j] + 1;
+ break;
+ }
+ p = 0;
+ }
+ if (p == 0) {
+#if !defined(PERL_XS) && !defined(WIN32DLL)
+ fprintf(stderr, "unknown long option: --%s\n", cp);
+#endif
+ return -1;
+ }
+ while(*cp && *cp != SP && cp++);
+ if (long_option[i].alias[0]){
+ cp_back = cp;
+ cp = (unsigned char *)long_option[i].alias;
+ }else{
+ if (strcmp(long_option[i].name, "help") == 0){
+ usage();
+ exit(EXIT_SUCCESS);
+ }
+ if (strcmp(long_option[i].name, "ic=") == 0){
+ enc = nkf_enc_find((char *)p);
+ if (!enc) continue;
+ input_encoding = enc;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "oc=") == 0){
+ enc = nkf_enc_find((char *)p);
+ /* if (enc <= 0) continue; */
+ if (!enc) continue;
+ output_encoding = enc;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "guess=") == 0){
+ if (p[0] == '0' || p[0] == '1') {
+ guess_f = 1;
+ } else {
+ guess_f = 2;
+ }
+ continue;
+ }
+#ifdef OVERWRITE
+ if (strcmp(long_option[i].name, "overwrite") == 0){
+ file_out_f = TRUE;
+ overwrite_f = TRUE;
+ preserve_time_f = TRUE;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "overwrite=") == 0){
+ file_out_f = TRUE;
+ overwrite_f = TRUE;
+ preserve_time_f = TRUE;
+ backup_f = TRUE;
+ backup_suffix = (char *)p;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "in-place") == 0){
+ file_out_f = TRUE;
+ overwrite_f = TRUE;
+ preserve_time_f = FALSE;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "in-place=") == 0){
+ file_out_f = TRUE;
+ overwrite_f = TRUE;
+ preserve_time_f = FALSE;
+ backup_f = TRUE;
+ backup_suffix = (char *)p;
+ continue;
+ }
+#endif
+#ifdef INPUT_OPTION
+ if (strcmp(long_option[i].name, "cap-input") == 0){
+ cap_f = TRUE;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "url-input") == 0){
+ url_f = TRUE;
+ continue;
+ }
+#endif
+#ifdef NUMCHAR_OPTION
+ if (strcmp(long_option[i].name, "numchar-input") == 0){
+ numchar_f = TRUE;
+ continue;
+ }
+#endif
+#ifdef CHECK_OPTION
+ if (strcmp(long_option[i].name, "no-output") == 0){
+ noout_f = TRUE;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "debug") == 0){
+ debug_f = TRUE;
+ continue;
+ }
+#endif
+ if (strcmp(long_option[i].name, "cp932") == 0){
+#ifdef SHIFTJIS_CP932
+ cp51932_f = TRUE;
+ cp932inv_f = -TRUE;
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ ms_ucs_map_f = UCS_MAP_CP932;
+#endif
+ continue;
+ }
+ if (strcmp(long_option[i].name, "no-cp932") == 0){
+#ifdef SHIFTJIS_CP932
+ cp51932_f = FALSE;
+ cp932inv_f = FALSE;
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ ms_ucs_map_f = UCS_MAP_ASCII;
+#endif
+ continue;
+ }
+#ifdef SHIFTJIS_CP932
+ if (strcmp(long_option[i].name, "cp932inv") == 0){
+ cp932inv_f = -TRUE;
+ continue;
+ }
+#endif
+
+#ifdef X0212_ENABLE
+ if (strcmp(long_option[i].name, "x0212") == 0){
+ x0212_f = TRUE;
+ continue;
+ }
+#endif
+
+#ifdef EXEC_IO
+ if (strcmp(long_option[i].name, "exec-in") == 0){
+ exec_f = 1;
+ return 0;
+ }
+ if (strcmp(long_option[i].name, "exec-out") == 0){
+ exec_f = -1;
+ return 0;
+ }
+#endif
+#if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
+ if (strcmp(long_option[i].name, "no-cp932ext") == 0){
+ no_cp932ext_f = TRUE;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
+ no_best_fit_chars_f = TRUE;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "fb-skip") == 0){
+ encode_fallback = NULL;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "fb-html") == 0){
+ encode_fallback = encode_fallback_html;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "fb-xml") == 0){
+ encode_fallback = encode_fallback_xml;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "fb-java") == 0){
+ encode_fallback = encode_fallback_java;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "fb-perl") == 0){
+ encode_fallback = encode_fallback_perl;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "fb-subchar") == 0){
+ encode_fallback = encode_fallback_subchar;
+ continue;
+ }
+ if (strcmp(long_option[i].name, "fb-subchar=") == 0){
+ encode_fallback = encode_fallback_subchar;
+ unicode_subchar = 0;
+ if (p[0] != '0'){
+ /* decimal number */
+ for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
+ unicode_subchar *= 10;
+ unicode_subchar += hex2bin(p[i]);
+ }
+ }else if(p[1] == 'x' || p[1] == 'X'){
+ /* hexadecimal number */
+ for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
+ unicode_subchar <<= 4;
+ unicode_subchar |= hex2bin(p[i]);
+ }
+ }else{
+ /* octal number */
+ for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
+ unicode_subchar *= 8;
+ unicode_subchar += hex2bin(p[i]);
+ }
+ }
+ w16e_conv(unicode_subchar, &i, &j);
+ unicode_subchar = i<<8 | j;
+ continue;
+ }
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
+ ms_ucs_map_f = UCS_MAP_MS;
+ continue;
+ }
+#endif
+#ifdef UNICODE_NORMALIZATION
+ if (strcmp(long_option[i].name, "utf8mac-input") == 0){
+ nfc_f = TRUE;
+ continue;
+ }
+#endif
+ if (strcmp(long_option[i].name, "prefix=") == 0){
+ if (nkf_isgraph(p[0])){
+ for (i = 1; nkf_isgraph(p[i]); i++){
+ prefix_table[p[i]] = p[0];
+ }
+ }
+ continue;
+ }
+#if !defined(PERL_XS) && !defined(WIN32DLL)
+ fprintf(stderr, "unsupported long option: --%s\n", long_option[i].name);
+#endif
+ return -1;
+ }
+ continue;
+ case 'b': /* buffered mode */
+ unbuf_f = FALSE;
+ continue;
+ case 'u': /* non bufferd mode */
+ unbuf_f = TRUE;
+ continue;
+ case 't': /* transparent mode */
+ if (*cp=='1') {
+ /* alias of -t */
+ cp++;
+ nop_f = TRUE;
+ } else if (*cp=='2') {
+ /*
+ * -t with put/get
+ *
+ * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
+ *
+ */
+ cp++;
+ nop_f = 2;
+ } else
+ nop_f = TRUE;
+ continue;
+ case 'j': /* JIS output */
+ case 'n':
+ output_encoding = nkf_enc_from_index(ISO_2022_JP);
+ continue;
+ case 'e': /* AT&T EUC output */
+ output_encoding = nkf_enc_from_index(EUCJP_NKF);
+ continue;
+ case 's': /* SJIS output */
+ output_encoding = nkf_enc_from_index(SHIFT_JIS);
+ continue;
+ case 'l': /* ISO8859 Latin-1 support, no conversion */
+ iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
+ input_encoding = nkf_enc_from_index(ISO_8859_1);
+ continue;
+ case 'i': /* Kanji IN ESC-$-@/B */
+ if (*cp=='@'||*cp=='B')
+ kanji_intro = *cp++;
+ continue;
+ case 'o': /* ASCII IN ESC-(-J/B/H */
+ /* ESC ( H was used in initial JUNET messages */
+ if (*cp=='J'||*cp=='B'||*cp=='H')
+ ascii_intro = *cp++;
+ continue;
+ case 'h':
+ /*
+ bit:1 katakana->hiragana
+ bit:2 hiragana->katakana
+ */
+ if ('9'>= *cp && *cp>='0')
+ hira_f |= (*cp++ -'0');
+ else
+ hira_f |= 1;
+ continue;
+ case 'r':
+ rot_f = TRUE;
+ continue;
+#if defined(MSDOS) || defined(__OS2__)
+ case 'T':
+ binmode_f = FALSE;
+ continue;
+#endif
+#ifndef PERL_XS
+ case 'V':
+ show_configuration();
+ exit(EXIT_SUCCESS);
+ break;
+ case 'v':
+ version();
+ exit(EXIT_SUCCESS);
+ break;
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ case 'w': /* UTF-8 output */
+ if (cp[0] == '8') {
+ cp++;
+ if (cp[0] == '0'){
+ cp++;
+ output_encoding = nkf_enc_from_index(UTF_8N);
+ } else {
+ output_bom_f = TRUE;
+ output_encoding = nkf_enc_from_index(UTF_8_BOM);
+ }
+ } else {
+ int enc_idx;
+ if ('1'== cp[0] && '6'==cp[1]) {
+ cp += 2;
+ enc_idx = UTF_16;
+ } else if ('3'== cp[0] && '2'==cp[1]) {
+ cp += 2;
+ enc_idx = UTF_32;
+ } else {
+ output_encoding = nkf_enc_from_index(UTF_8);
+ continue;
+ }
+ if (cp[0]=='L') {
+ cp++;
+ output_endian = ENDIAN_LITTLE;
+ } else if (cp[0] == 'B') {
+ cp++;
+ } else {
+ output_encoding = nkf_enc_from_index(enc_idx);
+ continue;
+ }
+ if (cp[0] == '0'){
+ cp++;
+ enc_idx = enc_idx == UTF_16
+ ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
+ : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
+ } else {
+ output_bom_f = TRUE;
+ enc_idx = enc_idx == UTF_16
+ ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
+ : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
+ }
+ output_encoding = nkf_enc_from_index(enc_idx);
+ }
+ continue;
+#endif
+#ifdef UTF8_INPUT_ENABLE
+ case 'W': /* UTF input */
+ if (cp[0] == '8') {
+ cp++;
+ input_encoding = nkf_enc_from_index(UTF_8);
+ }else{
+ int enc_idx;
+ if ('1'== cp[0] && '6'==cp[1]) {
+ cp += 2;
+ input_endian = ENDIAN_BIG;
+ enc_idx = UTF_16;
+ } else if ('3'== cp[0] && '2'==cp[1]) {
+ cp += 2;
+ input_endian = ENDIAN_BIG;
+ enc_idx = UTF_32;
+ } else {
+ input_encoding = nkf_enc_from_index(UTF_8);
+ continue;
+ }
+ if (cp[0]=='L') {
+ cp++;
+ input_endian = ENDIAN_LITTLE;
+ } else if (cp[0] == 'B') {
+ cp++;
+ input_endian = ENDIAN_BIG;
+ }
+ enc_idx = (enc_idx == UTF_16
+ ? (input_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
+ : (input_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE));
+ input_encoding = nkf_enc_from_index(enc_idx);
+ }
+ continue;
+#endif
+ /* Input code assumption */
+ case 'J': /* ISO-2022-JP input */
+ input_encoding = nkf_enc_from_index(ISO_2022_JP);
+ continue;
+ case 'E': /* EUC-JP input */
+ input_encoding = nkf_enc_from_index(EUCJP_NKF);
+ continue;
+ case 'S': /* Shift_JIS input */
+ input_encoding = nkf_enc_from_index(SHIFT_JIS);
+ continue;
+ case 'Z': /* Convert X0208 alphabet to asii */
+ /* alpha_f
+ bit:0 Convert JIS X 0208 Alphabet to ASCII
+ bit:1 Convert Kankaku to one space
+ bit:2 Convert Kankaku to two spaces
+ bit:3 Convert HTML Entity
+ bit:4 Convert JIS X 0208 Katakana to JIS X 0201 Katakana
+ */
+ while ('0'<= *cp && *cp <='9') {
+ alpha_f |= 1 << (*cp++ - '0');
+ }
+ if (!alpha_f) alpha_f = 1;
+ continue;
+ case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
+ x0201_f = FALSE; /* No X0201->X0208 conversion */
+ /* accept X0201
+ ESC-(-I in JIS, EUC, MS Kanji
+ SI/SO in JIS, EUC, MS Kanji
+ SS2 in EUC, JIS, not in MS Kanji
+ MS Kanji (0xa0-0xdf)
+ output X0201
+ ESC-(-I in JIS (0x20-0x5f)
+ SS2 in EUC (0xa0-0xdf)
+ 0xa0-0xd in MS Kanji (0xa0-0xdf)
+ */
+ continue;
+ case 'X': /* Convert X0201 kana to X0208 */
+ x0201_f = TRUE;
+ continue;
+ case 'F': /* prserve new lines */
+ fold_preserve_f = TRUE;
+ case 'f': /* folding -f60 or -f */
+ fold_f = TRUE;
+ fold_len = 0;
+ while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
+ fold_len *= 10;
+ fold_len += *cp++ - '0';
+ }
+ if (!(0<fold_len && fold_len<BUFSIZ))
+ fold_len = DEFAULT_FOLD;
+ if (*cp=='-') {
+ fold_margin = 0;
+ cp++;
+ while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
+ fold_margin *= 10;
+ fold_margin += *cp++ - '0';
+ }
+ }
+ continue;
+ case 'm': /* MIME support */
+ /* mime_decode_f = TRUE; */ /* this has too large side effects... */
+ if (*cp=='B'||*cp=='Q') {
+ mime_decode_mode = *cp++;
+ mimebuf_f = FIXED_MIME;
+ } else if (*cp=='N') {
+ mime_f = TRUE; cp++;
+ } else if (*cp=='S') {
+ mime_f = STRICT_MIME; cp++;
+ } else if (*cp=='0') {
+ mime_decode_f = FALSE;
+ mime_f = FALSE; cp++;
+ } else {
+ mime_f = STRICT_MIME;
+ }
+ continue;
+ case 'M': /* MIME output */
+ if (*cp=='B') {
+ mimeout_mode = 'B';
+ mimeout_f = FIXED_MIME; cp++;
+ } else if (*cp=='Q') {
+ mimeout_mode = 'Q';
+ mimeout_f = FIXED_MIME; cp++;
+ } else {
+ mimeout_f = TRUE;
+ }
+ continue;
+ case 'B': /* Broken JIS support */
+ /* bit:0 no ESC JIS
+ bit:1 allow any x on ESC-(-x or ESC-$-x
+ bit:2 reset to ascii on NL
+ */
+ if ('9'>= *cp && *cp>='0')
+ broken_f |= 1<<(*cp++ -'0');
+ else
+ broken_f |= TRUE;
+ continue;
+#ifndef PERL_XS
+ case 'O':/* for Output file */
+ file_out_f = TRUE;
+ continue;
+#endif
+ case 'c':/* add cr code */
+ eolmode_f = CRLF;
+ continue;
+ case 'd':/* delete cr code */
+ eolmode_f = LF;
+ continue;
+ case 'I': /* ISO-2022-JP output */
+ iso2022jp_f = TRUE;
+ continue;
+ case 'L': /* line mode */
+ if (*cp=='u') { /* unix */
+ eolmode_f = LF; cp++;
+ } else if (*cp=='m') { /* mac */
+ eolmode_f = CR; cp++;
+ } else if (*cp=='w') { /* windows */
+ eolmode_f = CRLF; cp++;
+ } else if (*cp=='0') { /* no conversion */
+ eolmode_f = 0; cp++;
+ }
+ continue;
+#ifndef PERL_XS
+ case 'g':
+ if ('2' <= *cp && *cp <= '9') {
+ guess_f = 2;
+ cp++;
+ } else if (*cp == '0' || *cp == '1') {
+ guess_f = 1;
+ cp++;
+ } else {
+ guess_f = 1;
+ }
+ continue;
+#endif
+ case SP:
+ /* module muliple options in a string are allowed for Perl moudle */
+ while(*cp && *cp++!='-');
+ continue;
+ default:
+#if !defined(PERL_XS) && !defined(WIN32DLL)
+ fprintf(stderr, "unknown option: -%c\n", *(cp-1));
+#endif
+ /* bogus option but ignored */
+ return -1;
+ }
+ }
+ return 0;