2 * Copyright (c) 1987, Fujitsu LTD. (Itaru ICHIKAWA).
3 * Copyright (c) 1996-2010, The nkf Project.
5 * This software is provided 'as-is', without any express or implied
6 * warranty. In no event will the authors be held liable for any damages
7 * arising from the use of this software.
9 * Permission is granted to anyone to use this software for any purpose,
10 * including commercial applications, and to alter it and redistribute it
11 * freely, subject to the following restrictions:
13 * 1. The origin of this software must not be misrepresented; you must not
14 * claim that you wrote the original software. If you use this software
15 * in a product, an acknowledgment in the product documentation would be
16 * appreciated but is not required.
18 * 2. Altered source versions must be plainly marked as such, and must not be
19 * misrepresented as being the original software.
21 * 3. This notice may not be removed or altered from any source distribution.
23 #define NKF_VERSION "2.1.1"
24 #define NKF_RELEASE_DATE "2010-04-14"
26 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa).\n" \
27 "Copyright (C) 1996-2010, The nkf Project."
38 # define INCL_DOSERRORS
44 /* state of output_mode and input_mode
123 NKF_ENCODING_TABLE_SIZE,
124 JIS_X_0201_1976_K = 0x1013, /* I */ /* JIS C 6220-1969 */
125 /* JIS_X_0201_1976_R = 0x1014, */ /* J */ /* JIS C 6220-1969 */
126 /* JIS_X_0208_1978 = 0x1040, */ /* @ */ /* JIS C 6226-1978 */
127 /* JIS_X_0208_1983 = 0x1087, */ /* B */ /* JIS C 6226-1983 */
128 JIS_X_0208 = 0x1168, /* @B */
129 JIS_X_0212 = 0x1159, /* D */
130 /* JIS_X_0213_2000_1 = 0x1228, */ /* O */
131 JIS_X_0213_2 = 0x1229, /* P */
132 JIS_X_0213_1 = 0x1233 /* Q */
135 static nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
136 static nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
137 static nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
138 static nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
139 static nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
140 static void j_oconv(nkf_char c2, nkf_char c1);
141 static void s_oconv(nkf_char c2, nkf_char c1);
142 static void e_oconv(nkf_char c2, nkf_char c1);
143 static void w_oconv(nkf_char c2, nkf_char c1);
144 static void w_oconv16(nkf_char c2, nkf_char c1);
145 static void w_oconv32(nkf_char c2, nkf_char c1);
149 nkf_char (*iconv)(nkf_char c2, nkf_char c1, nkf_char c0);
150 void (*oconv)(nkf_char c2, nkf_char c1);
151 } nkf_native_encoding;
153 nkf_native_encoding NkfEncodingASCII = { "ASCII", e_iconv, e_oconv };
154 nkf_native_encoding NkfEncodingISO_2022_JP = { "ISO-2022-JP", e_iconv, j_oconv };
155 nkf_native_encoding NkfEncodingShift_JIS = { "Shift_JIS", s_iconv, s_oconv };
156 nkf_native_encoding NkfEncodingEUC_JP = { "EUC-JP", e_iconv, e_oconv };
157 nkf_native_encoding NkfEncodingUTF_8 = { "UTF-8", w_iconv, w_oconv };
158 nkf_native_encoding NkfEncodingUTF_16 = { "UTF-16", w_iconv16, w_oconv16 };
159 nkf_native_encoding NkfEncodingUTF_32 = { "UTF-32", w_iconv32, w_oconv32 };
164 const nkf_native_encoding *base_encoding;
167 nkf_encoding nkf_encoding_table[] = {
168 {ASCII, "US-ASCII", &NkfEncodingASCII},
169 {ISO_8859_1, "ISO-8859-1", &NkfEncodingASCII},
170 {ISO_2022_JP, "ISO-2022-JP", &NkfEncodingISO_2022_JP},
171 {CP50220, "CP50220", &NkfEncodingISO_2022_JP},
172 {CP50221, "CP50221", &NkfEncodingISO_2022_JP},
173 {CP50222, "CP50222", &NkfEncodingISO_2022_JP},
174 {ISO_2022_JP_1, "ISO-2022-JP-1", &NkfEncodingISO_2022_JP},
175 {ISO_2022_JP_3, "ISO-2022-JP-3", &NkfEncodingISO_2022_JP},
176 {ISO_2022_JP_2004, "ISO-2022-JP-2004", &NkfEncodingISO_2022_JP},
177 {SHIFT_JIS, "Shift_JIS", &NkfEncodingShift_JIS},
178 {WINDOWS_31J, "Windows-31J", &NkfEncodingShift_JIS},
179 {CP10001, "CP10001", &NkfEncodingShift_JIS},
180 {EUC_JP, "EUC-JP", &NkfEncodingEUC_JP},
181 {EUCJP_NKF, "eucJP-nkf", &NkfEncodingEUC_JP},
182 {CP51932, "CP51932", &NkfEncodingEUC_JP},
183 {EUCJP_MS, "eucJP-MS", &NkfEncodingEUC_JP},
184 {EUCJP_ASCII, "eucJP-ASCII", &NkfEncodingEUC_JP},
185 {SHIFT_JISX0213, "Shift_JISX0213", &NkfEncodingShift_JIS},
186 {SHIFT_JIS_2004, "Shift_JIS-2004", &NkfEncodingShift_JIS},
187 {EUC_JISX0213, "EUC-JISX0213", &NkfEncodingEUC_JP},
188 {EUC_JIS_2004, "EUC-JIS-2004", &NkfEncodingEUC_JP},
189 {UTF_8, "UTF-8", &NkfEncodingUTF_8},
190 {UTF_8N, "UTF-8N", &NkfEncodingUTF_8},
191 {UTF_8_BOM, "UTF-8-BOM", &NkfEncodingUTF_8},
192 {UTF8_MAC, "UTF8-MAC", &NkfEncodingUTF_8},
193 {UTF_16, "UTF-16", &NkfEncodingUTF_16},
194 {UTF_16BE, "UTF-16BE", &NkfEncodingUTF_16},
195 {UTF_16BE_BOM, "UTF-16BE-BOM", &NkfEncodingUTF_16},
196 {UTF_16LE, "UTF-16LE", &NkfEncodingUTF_16},
197 {UTF_16LE_BOM, "UTF-16LE-BOM", &NkfEncodingUTF_16},
198 {UTF_32, "UTF-32", &NkfEncodingUTF_32},
199 {UTF_32BE, "UTF-32BE", &NkfEncodingUTF_32},
200 {UTF_32BE_BOM, "UTF-32BE-BOM", &NkfEncodingUTF_32},
201 {UTF_32LE, "UTF-32LE", &NkfEncodingUTF_32},
202 {UTF_32LE_BOM, "UTF-32LE-BOM", &NkfEncodingUTF_32},
203 {BINARY, "BINARY", &NkfEncodingASCII},
210 } encoding_name_to_id_table[] = {
215 {"ISO-2022-JP", ISO_2022_JP},
216 {"ISO2022JP-CP932", CP50220},
217 {"CP50220", CP50220},
218 {"CP50221", CP50221},
219 {"CSISO2022JP", CP50221},
220 {"CP50222", CP50222},
221 {"ISO-2022-JP-1", ISO_2022_JP_1},
222 {"ISO-2022-JP-3", ISO_2022_JP_3},
223 {"ISO-2022-JP-2004", ISO_2022_JP_2004},
224 {"SHIFT_JIS", SHIFT_JIS},
227 {"WINDOWS-31J", WINDOWS_31J},
228 {"CSWINDOWS31J", WINDOWS_31J},
229 {"CP932", WINDOWS_31J},
230 {"MS932", WINDOWS_31J},
231 {"CP10001", CP10001},
234 {"EUCJP-NKF", EUCJP_NKF},
235 {"CP51932", CP51932},
236 {"EUC-JP-MS", EUCJP_MS},
237 {"EUCJP-MS", EUCJP_MS},
238 {"EUCJPMS", EUCJP_MS},
239 {"EUC-JP-ASCII", EUCJP_ASCII},
240 {"EUCJP-ASCII", EUCJP_ASCII},
241 {"SHIFT_JISX0213", SHIFT_JISX0213},
242 {"SHIFT_JIS-2004", SHIFT_JIS_2004},
243 {"EUC-JISX0213", EUC_JISX0213},
244 {"EUC-JIS-2004", EUC_JIS_2004},
247 {"UTF-8-BOM", UTF_8_BOM},
248 {"UTF8-MAC", UTF8_MAC},
249 {"UTF-8-MAC", UTF8_MAC},
251 {"UTF-16BE", UTF_16BE},
252 {"UTF-16BE-BOM", UTF_16BE_BOM},
253 {"UTF-16LE", UTF_16LE},
254 {"UTF-16LE-BOM", UTF_16LE_BOM},
256 {"UTF-32BE", UTF_32BE},
257 {"UTF-32BE-BOM", UTF_32BE_BOM},
258 {"UTF-32LE", UTF_32LE},
259 {"UTF-32LE-BOM", UTF_32LE_BOM},
264 #if defined(DEFAULT_CODE_JIS)
265 #define DEFAULT_ENCIDX ISO_2022_JP
266 #elif defined(DEFAULT_CODE_SJIS)
267 #define DEFAULT_ENCIDX SHIFT_JIS
268 #elif defined(DEFAULT_CODE_WINDOWS_31J)
269 #define DEFAULT_ENCIDX WINDOWS_31J
270 #elif defined(DEFAULT_CODE_EUC)
271 #define DEFAULT_ENCIDX EUC_JP
272 #elif defined(DEFAULT_CODE_UTF8)
273 #define DEFAULT_ENCIDX UTF_8
277 #define is_alnum(c) \
278 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
280 /* I don't trust portablity of toupper */
281 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
282 #define nkf_isoctal(c) ('0'<=c && c<='7')
283 #define nkf_isdigit(c) ('0'<=c && c<='9')
284 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
285 #define nkf_isblank(c) (c == SP || c == TAB)
286 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
287 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
288 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
289 #define nkf_isprint(c) (SP<=c && c<='~')
290 #define nkf_isgraph(c) ('!'<=c && c<='~')
291 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
292 ('A'<=c&&c<='F') ? (c-'A'+10) : \
293 ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
294 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
295 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
296 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
297 ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
298 && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
300 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
301 #define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c <= 0x5F)
303 #define HOLD_SIZE 1024
304 #if defined(INT_IS_SHORT)
305 #define IOBUF_SIZE 2048
307 #define IOBUF_SIZE 16384
310 #define DEFAULT_J 'B'
311 #define DEFAULT_R 'B'
318 /* MIME preprocessor */
320 #ifdef EASYWIN /*Easy Win */
321 extern POINT _BufferSize;
330 void (*status_func)(struct input_code *, nkf_char);
331 nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
335 static const char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
336 static nkf_encoding *input_encoding = NULL;
337 static nkf_encoding *output_encoding = NULL;
339 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
341 * 0: Shift_JIS, eucJP-ascii
346 #define UCS_MAP_ASCII 0
348 #define UCS_MAP_CP932 2
349 #define UCS_MAP_CP10001 3
350 static int ms_ucs_map_f = UCS_MAP_ASCII;
352 #ifdef UTF8_INPUT_ENABLE
353 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
354 static int no_cp932ext_f = FALSE;
355 /* ignore ZERO WIDTH NO-BREAK SPACE */
356 static int no_best_fit_chars_f = FALSE;
357 static int input_endian = ENDIAN_BIG;
358 static nkf_char unicode_subchar = '?'; /* the regular substitution character */
359 static void (*encode_fallback)(nkf_char c) = NULL;
360 static void w_status(struct input_code *, nkf_char);
362 #ifdef UTF8_OUTPUT_ENABLE
363 static int output_bom_f = FALSE;
364 static int output_endian = ENDIAN_BIG;
367 static void std_putc(nkf_char c);
368 static nkf_char std_getc(FILE *f);
369 static nkf_char std_ungetc(nkf_char c,FILE *f);
371 static nkf_char broken_getc(FILE *f);
372 static nkf_char broken_ungetc(nkf_char c,FILE *f);
374 static nkf_char mime_getc(FILE *f);
376 static void mime_putc(nkf_char c);
380 #if !defined(PERL_XS) && !defined(WIN32DLL)
381 static unsigned char stdibuf[IOBUF_SIZE];
382 static unsigned char stdobuf[IOBUF_SIZE];
385 #define NKF_UNSPECIFIED (-TRUE)
388 static int unbuf_f = FALSE;
389 static int estab_f = FALSE;
390 static int nop_f = FALSE;
391 static int binmode_f = TRUE; /* binary mode */
392 static int rot_f = FALSE; /* rot14/43 mode */
393 static int hira_f = FALSE; /* hira/kata henkan */
394 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
395 static int mime_f = MIME_DECODE_DEFAULT; /* convert MIME B base64 or Q */
396 static int mime_decode_f = FALSE; /* mime decode is explicitly on */
397 static int mimebuf_f = FALSE; /* MIME buffered input */
398 static int broken_f = FALSE; /* convert ESC-less broken JIS */
399 static int iso8859_f = FALSE; /* ISO8859 through */
400 static int mimeout_f = FALSE; /* base64 mode */
401 static int x0201_f = NKF_UNSPECIFIED; /* convert JIS X 0201 */
402 static int iso2022jp_f = FALSE; /* replace non ISO-2022-JP with GETA */
404 #ifdef UNICODE_NORMALIZATION
405 static int nfc_f = FALSE;
406 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
407 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
411 static int cap_f = FALSE;
412 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
413 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
415 static int url_f = FALSE;
416 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
417 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
420 #define PREFIX_EUCG3 NKF_INT32_C(0x8F00)
421 #define CLASS_MASK NKF_INT32_C(0xFF000000)
422 #define CLASS_UNICODE NKF_INT32_C(0x01000000)
423 #define VALUE_MASK NKF_INT32_C(0x00FFFFFF)
424 #define UNICODE_BMP_MAX NKF_INT32_C(0x0000FFFF)
425 #define UNICODE_MAX NKF_INT32_C(0x0010FFFF)
426 #define nkf_char_euc3_new(c) ((c) | PREFIX_EUCG3)
427 #define nkf_char_unicode_new(c) ((c) | CLASS_UNICODE)
428 #define nkf_char_unicode_p(c) ((c & CLASS_MASK) == CLASS_UNICODE)
429 #define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= UNICODE_BMP_MAX)
430 #define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= UNICODE_MAX)
432 #ifdef NUMCHAR_OPTION
433 static int numchar_f = FALSE;
434 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
435 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
439 static int noout_f = FALSE;
440 static void no_putc(nkf_char c);
441 static int debug_f = FALSE;
442 static void debug(const char *str);
443 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
446 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
447 static void set_input_codename(const char *codename);
450 static int exec_f = 0;
453 #ifdef SHIFTJIS_CP932
454 /* invert IBM extended characters to others */
455 static int cp51932_f = FALSE;
457 /* invert NEC-selected IBM extended characters to IBM extended characters */
458 static int cp932inv_f = TRUE;
460 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
461 #endif /* SHIFTJIS_CP932 */
463 static int x0212_f = FALSE;
464 static int x0213_f = FALSE;
466 static unsigned char prefix_table[256];
468 static void e_status(struct input_code *, nkf_char);
469 static void s_status(struct input_code *, nkf_char);
471 struct input_code input_code_list[] = {
472 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
473 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
474 #ifdef UTF8_INPUT_ENABLE
475 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
476 {"UTF-16", 0, 0, 0, {0, 0, 0}, NULL, w_iconv16, 0},
477 {"UTF-32", 0, 0, 0, {0, 0, 0}, NULL, w_iconv32, 0},
482 static int mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
483 static int base64_count = 0;
485 /* X0208 -> ASCII converter */
488 static int f_line = 0; /* chars in line */
489 static int f_prev = 0;
490 static int fold_preserve_f = FALSE; /* preserve new lines */
491 static int fold_f = FALSE;
492 static int fold_len = 0;
495 static unsigned char kanji_intro = DEFAULT_J;
496 static unsigned char ascii_intro = DEFAULT_R;
500 #define FOLD_MARGIN 10
501 #define DEFAULT_FOLD 60
503 static int fold_margin = FOLD_MARGIN;
505 /* process default */
508 no_connection2(nkf_char c2, nkf_char c1, nkf_char c0)
510 fprintf(stderr,"nkf internal module connection failure.\n");
516 no_connection(nkf_char c2, nkf_char c1)
518 no_connection2(c2,c1,0);
521 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
522 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
524 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
525 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
526 static void (*o_eol_conv)(nkf_char c2,nkf_char c1) = no_connection;
527 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
528 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
529 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
530 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
532 /* static redirections */
534 static void (*o_putc)(nkf_char c) = std_putc;
536 static nkf_char (*i_getc)(FILE *f) = std_getc; /* general input */
537 static nkf_char (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
539 static nkf_char (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
540 static nkf_char (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
542 static void (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
544 static nkf_char (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
545 static nkf_char (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
547 /* for strict mime */
548 static nkf_char (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
549 static nkf_char (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
552 static int output_mode = ASCII; /* output kanji mode */
553 static int input_mode = ASCII; /* input kanji mode */
554 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
556 /* X0201 / X0208 conversion tables */
558 /* X0201 kana conversion table */
560 static const unsigned char cv[]= {
561 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
562 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
563 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
564 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
565 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
566 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
567 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
568 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
569 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
570 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
571 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
572 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
573 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
574 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
575 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
576 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
580 /* X0201 kana conversion table for daguten */
582 static const unsigned char dv[]= {
583 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
584 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
585 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
586 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
587 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
588 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
589 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
590 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
591 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
592 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
593 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
594 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
595 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
596 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
597 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
598 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
601 /* X0201 kana conversion table for han-daguten */
603 static const unsigned char ev[]= {
604 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
605 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
606 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
607 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
608 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
610 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
611 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
614 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
615 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
616 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
617 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
618 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
619 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
623 /* X0208 kigou conversion table */
624 /* 0x8140 - 0x819e */
625 static const unsigned char fv[] = {
627 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
628 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
629 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
630 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
631 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
632 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
633 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
634 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
635 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
636 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
637 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
638 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
643 static int option_mode = 0;
644 static int file_out_f = FALSE;
646 static int overwrite_f = FALSE;
647 static int preserve_time_f = FALSE;
648 static int backup_f = FALSE;
649 static char *backup_suffix = "";
652 static int eolmode_f = 0; /* CR, LF, CRLF */
653 static int input_eol = 0; /* 0: unestablished, EOF: MIXED */
654 static nkf_char prev_cr = 0; /* CR or 0 */
655 #ifdef EASYWIN /*Easy Win */
656 static int end_check;
660 nkf_xmalloc(size_t size)
664 if (size == 0) size = 1;
668 perror("can't malloc");
676 nkf_xrealloc(void *ptr, size_t size)
678 if (size == 0) size = 1;
680 ptr = realloc(ptr, size);
682 perror("can't realloc");
689 #define nkf_xfree(ptr) free(ptr)
692 nkf_str_caseeql(const char *src, const char *target)
695 for (i = 0; src[i] && target[i]; i++) {
696 if (nkf_toupper(src[i]) != nkf_toupper(target[i])) return FALSE;
698 if (src[i] || target[i]) return FALSE;
703 nkf_enc_from_index(int idx)
705 if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
708 return &nkf_encoding_table[idx];
712 nkf_enc_find_index(const char *name)
715 if (name[0] == 'X' && *(name+1) == '-') name += 2;
716 for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
717 if (nkf_str_caseeql(encoding_name_to_id_table[i].name, name)) {
718 return encoding_name_to_id_table[i].id;
725 nkf_enc_find(const char *name)
728 idx = nkf_enc_find_index(name);
729 if (idx < 0) return 0;
730 return nkf_enc_from_index(idx);
733 #define nkf_enc_name(enc) (enc)->name
734 #define nkf_enc_to_index(enc) (enc)->id
735 #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
736 #define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
737 #define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
738 #define nkf_enc_asciicompat(enc) (\
739 nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
740 nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
741 #define nkf_enc_unicode_p(enc) (\
742 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
743 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
744 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
745 #define nkf_enc_cp5022x_p(enc) (\
746 nkf_enc_to_index(enc) == CP50220 ||\
747 nkf_enc_to_index(enc) == CP50221 ||\
748 nkf_enc_to_index(enc) == CP50222)
750 #ifdef DEFAULT_CODE_LOCALE
754 #ifdef HAVE_LANGINFO_H
755 return nl_langinfo(CODESET);
756 #elif defined(__WIN32__)
758 sprintf(buf, "CP%d", GetACP());
760 #elif defined(__OS2__)
761 # if defined(INT_IS_SHORT)
767 ULONG ulCP[1], ulncp;
768 DosQueryCp(sizeof(ulCP), ulCP, &ulncp);
769 if (ulCP[0] == 932 || ulCP[0] == 943)
770 strcpy(buf, "Shift_JIS");
772 sprintf(buf, "CP%lu", ulCP[0]);
780 nkf_locale_encoding()
782 nkf_encoding *enc = 0;
783 const char *encname = nkf_locale_charmap();
785 enc = nkf_enc_find(encname);
788 #endif /* DEFAULT_CODE_LOCALE */
793 return &nkf_encoding_table[UTF_8];
797 nkf_default_encoding()
799 nkf_encoding *enc = 0;
800 #ifdef DEFAULT_CODE_LOCALE
801 enc = nkf_locale_encoding();
802 #elif defined(DEFAULT_ENCIDX)
803 enc = nkf_enc_from_index(DEFAULT_ENCIDX);
805 if (!enc) enc = nkf_utf8_encoding();
816 nkf_buf_new(int length)
818 nkf_buf_t *buf = nkf_xmalloc(sizeof(nkf_buf_t));
819 buf->ptr = nkf_xmalloc(sizeof(nkf_char) * length);
827 nkf_buf_dispose(nkf_buf_t *buf)
834 #define nkf_buf_length(buf) ((buf)->len)
835 #define nkf_buf_empty_p(buf) ((buf)->len == 0)
838 nkf_buf_at(nkf_buf_t *buf, int index)
840 assert(index <= buf->len);
841 return buf->ptr[index];
845 nkf_buf_clear(nkf_buf_t *buf)
851 nkf_buf_push(nkf_buf_t *buf, nkf_char c)
853 if (buf->capa <= buf->len) {
856 buf->ptr[buf->len++] = c;
860 nkf_buf_pop(nkf_buf_t *buf)
862 assert(!nkf_buf_empty_p(buf));
863 return buf->ptr[--buf->len];
866 /* Normalization Form C */
869 #define fprintf dllprintf
875 fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n");
882 "Usage: nkf -[flags] [--] [in file] .. [out file for -O flag]\n"
883 #ifdef UTF8_OUTPUT_ENABLE
884 " j/s/e/w Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
885 " UTF options is -w[8[0],{16,32}[{B,L}[0]]]\n"
888 #ifdef UTF8_INPUT_ENABLE
889 " J/S/E/W Specify input encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
890 " UTF option is -W[8,[16,32][B,L]]\n"
892 " J/S/E Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
896 " m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:nonstrict,0:no decode]\n"
897 " M[BQ] MIME encode [B:base64 Q:quoted]\n"
898 " f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
901 " Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n"
902 " 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n"
903 " 4: JISX0208 Katakana to JISX0201 Katakana\n"
904 " X,x Convert Halfwidth Katakana to Fullwidth or preserve it\n"
907 " O Output to File (DEFAULT 'nkf.out')\n"
908 " L[uwm] Line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
911 " --ic=<encoding> Specify the input encoding\n"
912 " --oc=<encoding> Specify the output encoding\n"
913 " --hiragana --katakana Hiragana/Katakana Conversion\n"
914 " --katakana-hiragana Converts each other\n"
918 " --{cap, url}-input Convert hex after ':' or '%%'\n"
920 #ifdef NUMCHAR_OPTION
921 " --numchar-input Convert Unicode Character Reference\n"
923 #ifdef UTF8_INPUT_ENABLE
924 " --fb-{skip, html, xml, perl, java, subchar}\n"
925 " Specify unassigned character's replacement\n"
930 " --in-place[=SUF] Overwrite original files\n"
931 " --overwrite[=SUF] Preserve timestamp of original files\n"
933 " -g --guess Guess the input code\n"
934 " -v --version Print the version\n"
935 " --help/-V Print this help / configuration\n"
941 show_configuration(void)
944 "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n"
945 " Compile-time options:\n"
946 " Compiled at: " __DATE__ " " __TIME__ "\n"
949 " Default output encoding: "
950 #ifdef DEFAULT_CODE_LOCALE
951 "LOCALE (%s)\n", nkf_enc_name(nkf_default_encoding())
952 #elif defined(DEFAULT_ENCIDX)
953 "CONFIG (%s)\n", nkf_enc_name(nkf_default_encoding())
959 " Default output end of line: "
960 #if DEFAULT_NEWLINE == CR
962 #elif DEFAULT_NEWLINE == CRLF
968 " Decode MIME encoded string: "
969 #if MIME_DECODE_DEFAULT
975 " Convert JIS X 0201 Katakana: "
982 " --help, --version output: "
983 #if HELP_OUTPUT_HELP_OUTPUT
994 get_backup_filename(const char *suffix, const char *filename)
996 char *backup_filename;
997 int asterisk_count = 0;
999 int filename_length = strlen(filename);
1001 for(i = 0; suffix[i]; i++){
1002 if(suffix[i] == '*') asterisk_count++;
1006 backup_filename = nkf_xmalloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1007 for(i = 0, j = 0; suffix[i];){
1008 if(suffix[i] == '*'){
1009 backup_filename[j] = '\0';
1010 strncat(backup_filename, filename, filename_length);
1012 j += filename_length;
1014 backup_filename[j++] = suffix[i++];
1017 backup_filename[j] = '\0';
1019 j = filename_length + strlen(suffix);
1020 backup_filename = nkf_xmalloc(j + 1);
1021 strcpy(backup_filename, filename);
1022 strcat(backup_filename, suffix);
1023 backup_filename[j] = '\0';
1025 return backup_filename;
1029 #ifdef UTF8_INPUT_ENABLE
1031 nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
1036 if(c >= NKF_INT32_C(1)<<shift){
1038 (*f)(0, bin2hex(c>>shift));
1049 encode_fallback_html(nkf_char c)
1054 if(c >= NKF_INT32_C(1000000))
1055 (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
1056 if(c >= NKF_INT32_C(100000))
1057 (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
1059 (*oconv)(0, 0x30+(c/10000 )%10);
1061 (*oconv)(0, 0x30+(c/1000 )%10);
1063 (*oconv)(0, 0x30+(c/100 )%10);
1065 (*oconv)(0, 0x30+(c/10 )%10);
1067 (*oconv)(0, 0x30+ c %10);
1073 encode_fallback_xml(nkf_char c)
1078 nkf_each_char_to_hex(oconv, c);
1084 encode_fallback_java(nkf_char c)
1088 if(!nkf_char_unicode_bmp_p(c)){
1092 (*oconv)(0, bin2hex(c>>20));
1093 (*oconv)(0, bin2hex(c>>16));
1097 (*oconv)(0, bin2hex(c>>12));
1098 (*oconv)(0, bin2hex(c>> 8));
1099 (*oconv)(0, bin2hex(c>> 4));
1100 (*oconv)(0, bin2hex(c ));
1105 encode_fallback_perl(nkf_char c)
1110 nkf_each_char_to_hex(oconv, c);
1116 encode_fallback_subchar(nkf_char c)
1118 c = unicode_subchar;
1119 (*oconv)((c>>8)&0xFF, c&0xFF);
1124 static const struct {
1148 {"katakana-hiragana","h3"},
1156 #ifdef UTF8_OUTPUT_ENABLE
1166 {"fb-subchar=", ""},
1168 #ifdef UTF8_INPUT_ENABLE
1169 {"utf8-input", "W"},
1170 {"utf16-input", "W16"},
1171 {"no-cp932ext", ""},
1172 {"no-best-fit-chars",""},
1174 #ifdef UNICODE_NORMALIZATION
1175 {"utf8mac-input", ""},
1187 #ifdef NUMCHAR_OPTION
1188 {"numchar-input", ""},
1194 #ifdef SHIFTJIS_CP932
1205 set_input_encoding(nkf_encoding *enc)
1207 switch (nkf_enc_to_index(enc)) {
1214 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1215 #ifdef SHIFTJIS_CP932
1218 #ifdef UTF8_OUTPUT_ENABLE
1219 ms_ucs_map_f = UCS_MAP_CP932;
1229 case ISO_2022_JP_2004:
1236 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1237 #ifdef SHIFTJIS_CP932
1240 #ifdef UTF8_OUTPUT_ENABLE
1241 ms_ucs_map_f = UCS_MAP_CP932;
1246 #ifdef SHIFTJIS_CP932
1249 #ifdef UTF8_OUTPUT_ENABLE
1250 ms_ucs_map_f = UCS_MAP_CP10001;
1258 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1259 #ifdef SHIFTJIS_CP932
1262 #ifdef UTF8_OUTPUT_ENABLE
1263 ms_ucs_map_f = UCS_MAP_CP932;
1267 #ifdef SHIFTJIS_CP932
1270 #ifdef UTF8_OUTPUT_ENABLE
1271 ms_ucs_map_f = UCS_MAP_MS;
1275 #ifdef SHIFTJIS_CP932
1278 #ifdef UTF8_OUTPUT_ENABLE
1279 ms_ucs_map_f = UCS_MAP_ASCII;
1282 case SHIFT_JISX0213:
1283 case SHIFT_JIS_2004:
1285 #ifdef SHIFTJIS_CP932
1292 #ifdef SHIFTJIS_CP932
1296 #ifdef UTF8_INPUT_ENABLE
1297 #ifdef UNICODE_NORMALIZATION
1305 input_endian = ENDIAN_BIG;
1309 input_endian = ENDIAN_LITTLE;
1314 input_endian = ENDIAN_BIG;
1318 input_endian = ENDIAN_LITTLE;
1325 set_output_encoding(nkf_encoding *enc)
1327 switch (nkf_enc_to_index(enc)) {
1329 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1330 #ifdef SHIFTJIS_CP932
1331 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1333 #ifdef UTF8_OUTPUT_ENABLE
1334 ms_ucs_map_f = UCS_MAP_CP932;
1338 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1339 #ifdef SHIFTJIS_CP932
1340 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1342 #ifdef UTF8_OUTPUT_ENABLE
1343 ms_ucs_map_f = UCS_MAP_CP932;
1347 #ifdef SHIFTJIS_CP932
1348 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1353 #ifdef SHIFTJIS_CP932
1354 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1360 #ifdef SHIFTJIS_CP932
1361 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1367 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1368 #ifdef UTF8_OUTPUT_ENABLE
1369 ms_ucs_map_f = UCS_MAP_CP932;
1373 #ifdef UTF8_OUTPUT_ENABLE
1374 ms_ucs_map_f = UCS_MAP_CP10001;
1379 #ifdef SHIFTJIS_CP932
1380 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1382 #ifdef UTF8_OUTPUT_ENABLE
1383 ms_ucs_map_f = UCS_MAP_ASCII;
1388 #ifdef SHIFTJIS_CP932
1389 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1391 #ifdef UTF8_OUTPUT_ENABLE
1392 ms_ucs_map_f = UCS_MAP_ASCII;
1396 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1397 #ifdef SHIFTJIS_CP932
1398 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1400 #ifdef UTF8_OUTPUT_ENABLE
1401 ms_ucs_map_f = UCS_MAP_CP932;
1406 #ifdef UTF8_OUTPUT_ENABLE
1407 ms_ucs_map_f = UCS_MAP_MS;
1412 #ifdef UTF8_OUTPUT_ENABLE
1413 ms_ucs_map_f = UCS_MAP_ASCII;
1416 case SHIFT_JISX0213:
1417 case SHIFT_JIS_2004:
1419 #ifdef SHIFTJIS_CP932
1420 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1427 #ifdef SHIFTJIS_CP932
1428 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1431 #ifdef UTF8_OUTPUT_ENABLE
1433 output_bom_f = TRUE;
1437 output_bom_f = TRUE;
1440 output_endian = ENDIAN_LITTLE;
1441 output_bom_f = FALSE;
1444 output_endian = ENDIAN_LITTLE;
1445 output_bom_f = TRUE;
1449 output_bom_f = TRUE;
1452 output_endian = ENDIAN_LITTLE;
1453 output_bom_f = FALSE;
1456 output_endian = ENDIAN_LITTLE;
1457 output_bom_f = TRUE;
1463 static struct input_code*
1464 find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1467 struct input_code *p = input_code_list;
1469 if (iconv_func == p->iconv_func){
1479 set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1481 #ifdef INPUT_CODE_FIX
1482 if (f || !input_encoding)
1489 #ifdef INPUT_CODE_FIX
1490 && (f == -TRUE || !input_encoding) /* -TRUE means "FORCE" */
1496 if (estab_f && iconv_for_check != iconv){
1497 struct input_code *p = find_inputcode_byfunc(iconv);
1499 set_input_codename(p->name);
1502 iconv_for_check = iconv;
1509 x0212_shift(nkf_char c)
1514 if (0x75 <= c && c <= 0x7f){
1515 ret = c + (0x109 - 0x75);
1518 if (0x75 <= c && c <= 0x7f){
1519 ret = c + (0x113 - 0x75);
1527 x0212_unshift(nkf_char c)
1530 if (0x7f <= c && c <= 0x88){
1531 ret = c + (0x75 - 0x7f);
1532 }else if (0x89 <= c && c <= 0x92){
1533 ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
1537 #endif /* X0212_ENABLE */
1540 e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
1546 if((0x21 <= ndx && ndx <= 0x2F)){
1547 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
1548 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1550 }else if(0x6E <= ndx && ndx <= 0x7E){
1551 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
1552 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1558 else if(nkf_isgraph(ndx)){
1560 const unsigned short *ptr;
1561 ptr = x0212_shiftjis[ndx - 0x21];
1563 val = ptr[(c1 & 0x7f) - 0x21];
1572 c2 = x0212_shift(c2);
1574 #endif /* X0212_ENABLE */
1576 if(0x7F < c2) return 1;
1577 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
1578 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1583 s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
1585 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
1588 static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
1589 if (0xFC < c1) return 1;
1590 #ifdef SHIFTJIS_CP932
1591 if (!cp932inv_f && is_ibmext_in_sjis(c2)){
1592 val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
1599 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
1600 val = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
1606 #endif /* SHIFTJIS_CP932 */
1608 if (!x0213_f && is_ibmext_in_sjis(c2)){
1609 val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
1612 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
1625 if(x0213_f && c2 >= 0xF0){
1626 if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
1627 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
1628 }else{ /* 78<=k<=94 */
1629 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
1630 if (0x9E < c1) c2++;
1633 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
1634 #define SJ6394 0x0161 /* 63 - 94 ku offset */
1635 c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
1636 if (0x9E < c1) c2++;
1639 c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
1646 c2 = x0212_unshift(c2);
1653 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
1655 nkf_unicode_to_utf8(nkf_char val, nkf_char *p1, nkf_char *p2, nkf_char *p3, nkf_char *p4)
1663 }else if (val < 0x800){
1664 *p1 = 0xc0 | (val >> 6);
1665 *p2 = 0x80 | (val & 0x3f);
1668 } else if (nkf_char_unicode_bmp_p(val)) {
1669 *p1 = 0xe0 | (val >> 12);
1670 *p2 = 0x80 | ((val >> 6) & 0x3f);
1671 *p3 = 0x80 | ( val & 0x3f);
1673 } else if (nkf_char_unicode_value_p(val)) {
1674 *p1 = 0xf0 | (val >> 18);
1675 *p2 = 0x80 | ((val >> 12) & 0x3f);
1676 *p3 = 0x80 | ((val >> 6) & 0x3f);
1677 *p4 = 0x80 | ( val & 0x3f);
1687 nkf_utf8_to_unicode(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
1694 else if (c1 <= 0xC3) {
1695 /* trail byte or invalid */
1698 else if (c1 <= 0xDF) {
1700 wc = (c1 & 0x1F) << 6;
1703 else if (c1 <= 0xEF) {
1705 wc = (c1 & 0x0F) << 12;
1706 wc |= (c2 & 0x3F) << 6;
1709 else if (c2 <= 0xF4) {
1711 wc = (c1 & 0x0F) << 18;
1712 wc |= (c2 & 0x3F) << 12;
1713 wc |= (c3 & 0x3F) << 6;
1723 #ifdef UTF8_INPUT_ENABLE
1725 unicode_to_jis_common2(nkf_char c1, nkf_char c0,
1726 const unsigned short *const *pp, nkf_char psize,
1727 nkf_char *p2, nkf_char *p1)
1730 const unsigned short *p;
1733 if (pp == 0) return 1;
1736 if (c1 < 0 || psize <= c1) return 1;
1738 if (p == 0) return 1;
1741 if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
1743 if (val == 0) return 1;
1744 if (no_cp932ext_f && (
1745 (val>>8) == 0x2D || /* NEC special characters */
1746 val > NKF_INT32_C(0xF300) /* IBM extended characters */
1754 if (c2 == SO) c2 = JIS_X_0201_1976_K;
1762 unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
1764 const unsigned short *const *pp;
1765 const unsigned short *const *const *ppp;
1766 static const char no_best_fit_chars_table_C2[] =
1767 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1768 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1769 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
1770 0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
1771 static const char no_best_fit_chars_table_C2_ms[] =
1772 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1773 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1774 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
1775 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
1776 static const char no_best_fit_chars_table_932_C2[] =
1777 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1778 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1779 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1780 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
1781 static const char no_best_fit_chars_table_932_C3[] =
1782 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1783 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1784 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1785 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
1791 }else if(c2 < 0xe0){
1792 if(no_best_fit_chars_f){
1793 if(ms_ucs_map_f == UCS_MAP_CP932){
1796 if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
1799 if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1802 }else if(!cp932inv_f){
1805 if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
1808 if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1811 }else if(ms_ucs_map_f == UCS_MAP_MS){
1812 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
1813 }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1831 ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
1832 ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
1833 ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
1835 ret = unicode_to_jis_common2(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
1836 }else if(c0 < 0xF0){
1837 if(no_best_fit_chars_f){
1838 if(ms_ucs_map_f == UCS_MAP_CP932){
1839 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
1840 }else if(ms_ucs_map_f == UCS_MAP_MS){
1845 if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
1848 if(c0 == 0x92) return 1;
1853 if(c1 == 0x80 || c0 == 0x9C) return 1;
1856 }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1861 if(c0 == 0x94) return 1;
1864 if(c0 == 0xBB) return 1;
1874 if(c0 == 0x95) return 1;
1877 if(c0 == 0xA5) return 1;
1884 if(c0 == 0x8D) return 1;
1887 if(c0 == 0x9E && !cp932inv_f) return 1;
1890 if(0xA0 <= c0 && c0 <= 0xA5) return 1;
1898 ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
1899 ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
1900 ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
1902 ret = unicode_to_jis_common2(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
1904 #ifdef SHIFTJIS_CP932
1905 if (!ret && !cp932inv_f && is_eucg3(*p2)) {
1907 if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
1908 s2e_conv(s2, s1, p2, p1);
1917 #ifdef UTF8_OUTPUT_ENABLE
1919 e2w_conv(nkf_char c2, nkf_char c1)
1921 const unsigned short *p;
1923 if (c2 == JIS_X_0201_1976_K) {
1924 if (ms_ucs_map_f == UCS_MAP_CP10001) {
1932 p = euc_to_utf8_1byte;
1934 } else if (is_eucg3(c2)){
1935 if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
1938 c2 = (c2&0x7f) - 0x21;
1939 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
1940 p = x0212_to_utf8_2bytes[c2];
1946 c2 = (c2&0x7f) - 0x21;
1947 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
1949 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
1950 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
1951 euc_to_utf8_2bytes_ms[c2];
1956 c1 = (c1 & 0x7f) - 0x21;
1957 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
1964 w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
1971 }else if (0xc0 <= c2 && c2 <= 0xef) {
1972 ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
1973 #ifdef NUMCHAR_OPTION
1976 if (p1) *p1 = nkf_char_unicode_new(nkf_utf8_to_unicode(c2, c1, c0, 0));
1984 #ifdef UTF8_INPUT_ENABLE
1986 w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
1988 nkf_char c1, c2, c3, c4;
1995 else if (nkf_char_unicode_bmp_p(val)){
1996 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
1997 ret = unicode_to_jis_common(c1, c2, c3, p2, p1);
2000 *p1 = nkf_char_unicode_new(val);
2006 *p1 = nkf_char_unicode_new(val);
2013 e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
2015 if (c2 == JIS_X_0201_1976_K || c2 == SS2){
2016 if (iso2022jp_f && !x0201_f) {
2017 c2 = GETA1; c1 = GETA2;
2019 c2 = JIS_X_0201_1976_K;
2023 }else if (c2 == 0x8f){
2027 if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
2028 /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2029 c1 = nkf_char_unicode_new((c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC);
2032 c2 = (c2 << 8) | (c1 & 0x7f);
2034 #ifdef SHIFTJIS_CP932
2037 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2038 s2e_conv(s2, s1, &c2, &c1);
2045 #endif /* SHIFTJIS_CP932 */
2047 #endif /* X0212_ENABLE */
2048 } else if ((c2 == EOF) || (c2 == 0) || c2 < SP || c2 == ISO_8859_1) {
2051 if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
2052 /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2053 c1 = nkf_char_unicode_new((c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000);
2058 #ifdef SHIFTJIS_CP932
2059 if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
2061 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2062 s2e_conv(s2, s1, &c2, &c1);
2069 #endif /* SHIFTJIS_CP932 */
2077 s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
2079 if (c2 == JIS_X_0201_1976_K || (0xA1 <= c2 && c2 <= 0xDF)) {
2080 if (iso2022jp_f && !x0201_f) {
2081 c2 = GETA1; c1 = GETA2;
2085 } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
2087 } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
2089 if(c1 == 0x7F) return 0;
2090 c1 = nkf_char_unicode_new((c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000);
2093 nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
2094 if (ret) return ret;
2101 w_iconv(nkf_char c1, nkf_char c2, nkf_char c3)
2103 nkf_char ret = 0, c4 = 0;
2104 static const char w_iconv_utf8_1st_byte[] =
2106 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2107 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2108 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
2109 40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
2116 if (c1 < 0 || 0xff < c1) {
2117 }else if (c1 == 0) { /* 0 : 1 byte*/
2119 } else if ((c1 & 0xC0) == 0x80) { /* 0x80-0xbf : trail byte */
2122 switch (w_iconv_utf8_1st_byte[c1 - 0xC0]) {
2124 if (c2 < 0x80 || 0xBF < c2) return 0;
2127 if (c3 == 0) return -1;
2128 if (c2 < 0xA0 || 0xBF < c2 || (c3 & 0xC0) != 0x80)
2133 if (c3 == 0) return -1;
2134 if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80)
2138 if (c3 == 0) return -1;
2139 if (c2 < 0x80 || 0x9F < c2 || (c3 & 0xC0) != 0x80)
2143 if (c3 == 0) return -2;
2144 if (c2 < 0x90 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2148 if (c3 == 0) return -2;
2149 if (c2 < 0x80 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2153 if (c3 == 0) return -2;
2154 if (c2 < 0x80 || 0x8F < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2162 if (c1 == 0 || c1 == EOF){
2163 } else if ((c1 & 0xf8) == 0xf0) { /* 4 bytes */
2164 c2 = nkf_char_unicode_new(nkf_utf8_to_unicode(c1, c2, c3, c4));
2167 ret = w2e_conv(c1, c2, c3, &c1, &c2);
2175 #define NKF_ICONV_INVALID_CODE_RANGE -13
2177 unicode_iconv(nkf_char wc)
2185 }else if ((wc>>11) == 27) {
2186 /* unpaired surrogate */
2187 return NKF_ICONV_INVALID_CODE_RANGE;
2188 }else if (wc < 0xFFFF) {
2189 ret = w16e_conv(wc, &c2, &c1);
2190 if (ret) return ret;
2191 }else if (wc < 0x10FFFF) {
2193 c1 = nkf_char_unicode_new(wc);
2195 return NKF_ICONV_INVALID_CODE_RANGE;
2201 #define NKF_ICONV_NEED_ONE_MORE_BYTE -1
2202 #define NKF_ICONV_NEED_TWO_MORE_BYTES -2
2203 #define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00))
2205 nkf_iconv_utf_16(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2214 if (input_endian == ENDIAN_BIG) {
2215 if (0xD8 <= c1 && c1 <= 0xDB) {
2216 if (0xDC <= c3 && c3 <= 0xDF) {
2217 wc = UTF16_TO_UTF32(c1 << 8 | c2, c3 << 8 | c4);
2218 } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2223 if (0xD8 <= c2 && c2 <= 0xDB) {
2224 if (0xDC <= c4 && c4 <= 0xDF) {
2225 wc = UTF16_TO_UTF32(c2 << 8 | c1, c4 << 8 | c3);
2226 } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2232 return (*unicode_iconv)(wc);
2236 w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
2239 return 16; /* different from w_iconv32 */
2243 w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
2246 return 32; /* different from w_iconv16 */
2250 nkf_iconv_utf_32(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2259 switch(input_endian){
2261 wc = c2 << 16 | c3 << 8 | c4;
2264 wc = c3 << 16 | c2 << 8 | c1;
2267 wc = c1 << 16 | c4 << 8 | c3;
2270 wc = c4 << 16 | c1 << 8 | c2;
2273 return NKF_ICONV_INVALID_CODE_RANGE;
2276 return (*unicode_iconv)(wc);
2280 #define output_ascii_escape_sequence(mode) do { \
2281 if (output_mode != ASCII && output_mode != ISO_8859_1) { \
2284 (*o_putc)(ascii_intro); \
2285 output_mode = mode; \
2290 output_escape_sequence(int mode)
2292 if (output_mode == mode)
2300 case JIS_X_0201_1976_K:
2308 (*o_putc)(kanji_intro);
2333 j_oconv(nkf_char c2, nkf_char c1)
2335 #ifdef NUMCHAR_OPTION
2336 if (c2 == 0 && nkf_char_unicode_p(c1)){
2337 w16e_conv(c1, &c2, &c1);
2338 if (c2 == 0 && nkf_char_unicode_p(c1)){
2339 c2 = c1 & VALUE_MASK;
2340 if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
2343 c2 = 0x7F + c1 / 94;
2344 c1 = 0x21 + c1 % 94;
2346 if (encode_fallback) (*encode_fallback)(c1);
2353 output_ascii_escape_sequence(ASCII);
2356 else if (c2 == EOF) {
2357 output_ascii_escape_sequence(ASCII);
2360 else if (c2 == ISO_8859_1) {
2361 output_ascii_escape_sequence(ISO_8859_1);
2364 else if (c2 == JIS_X_0201_1976_K) {
2365 output_escape_sequence(JIS_X_0201_1976_K);
2368 } else if (is_eucg3(c2)){
2369 output_escape_sequence(x0213_f ? JIS_X_0213_2 : JIS_X_0212);
2370 (*o_putc)(c2 & 0x7f);
2375 ? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1
2376 : c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1) return;
2377 output_escape_sequence(x0213_f ? JIS_X_0213_1 : JIS_X_0208);
2384 e_oconv(nkf_char c2, nkf_char c1)
2386 if (c2 == 0 && nkf_char_unicode_p(c1)){
2387 w16e_conv(c1, &c2, &c1);
2388 if (c2 == 0 && nkf_char_unicode_p(c1)){
2389 c2 = c1 & VALUE_MASK;
2390 if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
2394 c2 += c2 < 10 ? 0x75 : 0x8FEB;
2395 c1 = 0x21 + c1 % 94;
2398 (*o_putc)((c2 & 0x7f) | 0x080);
2399 (*o_putc)(c1 | 0x080);
2401 (*o_putc)((c2 & 0x7f) | 0x080);
2402 (*o_putc)(c1 | 0x080);
2406 if (encode_fallback) (*encode_fallback)(c1);
2414 } else if (c2 == 0) {
2415 output_mode = ASCII;
2417 } else if (c2 == JIS_X_0201_1976_K) {
2418 output_mode = EUC_JP;
2419 (*o_putc)(SS2); (*o_putc)(c1|0x80);
2420 } else if (c2 == ISO_8859_1) {
2421 output_mode = ISO_8859_1;
2422 (*o_putc)(c1 | 0x080);
2424 } else if (is_eucg3(c2)){
2425 output_mode = EUC_JP;
2426 #ifdef SHIFTJIS_CP932
2429 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2430 s2e_conv(s2, s1, &c2, &c1);
2435 output_mode = ASCII;
2437 }else if (is_eucg3(c2)){
2440 (*o_putc)((c2 & 0x7f) | 0x080);
2441 (*o_putc)(c1 | 0x080);
2444 (*o_putc)((c2 & 0x7f) | 0x080);
2445 (*o_putc)(c1 | 0x080);
2449 if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
2450 set_iconv(FALSE, 0);
2451 return; /* too late to rescue this char */
2453 output_mode = EUC_JP;
2454 (*o_putc)(c2 | 0x080);
2455 (*o_putc)(c1 | 0x080);
2460 s_oconv(nkf_char c2, nkf_char c1)
2462 #ifdef NUMCHAR_OPTION
2463 if (c2 == 0 && nkf_char_unicode_p(c1)){
2464 w16e_conv(c1, &c2, &c1);
2465 if (c2 == 0 && nkf_char_unicode_p(c1)){
2466 c2 = c1 & VALUE_MASK;
2467 if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
2470 c2 = c1 / 188 + (cp932inv_f ? 0xF0 : 0xEB);
2472 c1 += 0x40 + (c1 > 0x3e);
2477 if(encode_fallback)(*encode_fallback)(c1);
2486 } else if (c2 == 0) {
2487 output_mode = ASCII;
2489 } else if (c2 == JIS_X_0201_1976_K) {
2490 output_mode = SHIFT_JIS;
2492 } else if (c2 == ISO_8859_1) {
2493 output_mode = ISO_8859_1;
2494 (*o_putc)(c1 | 0x080);
2496 } else if (is_eucg3(c2)){
2497 output_mode = SHIFT_JIS;
2498 if (e2s_conv(c2, c1, &c2, &c1) == 0){
2504 if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
2505 set_iconv(FALSE, 0);
2506 return; /* too late to rescue this char */
2508 output_mode = SHIFT_JIS;
2509 e2s_conv(c2, c1, &c2, &c1);
2511 #ifdef SHIFTJIS_CP932
2513 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2514 nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
2520 #endif /* SHIFTJIS_CP932 */
2523 if (prefix_table[(unsigned char)c1]){
2524 (*o_putc)(prefix_table[(unsigned char)c1]);
2530 #ifdef UTF8_OUTPUT_ENABLE
2532 w_oconv(nkf_char c2, nkf_char c1)
2538 output_bom_f = FALSE;
2549 if (c2 == 0 && nkf_char_unicode_p(c1)){
2550 val = c1 & VALUE_MASK;
2551 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2553 if (c2) (*o_putc)(c2);
2554 if (c3) (*o_putc)(c3);
2555 if (c4) (*o_putc)(c4);
2562 val = e2w_conv(c2, c1);
2564 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2566 if (c2) (*o_putc)(c2);
2567 if (c3) (*o_putc)(c3);
2568 if (c4) (*o_putc)(c4);
2574 w_oconv16(nkf_char c2, nkf_char c1)
2577 output_bom_f = FALSE;
2578 if (output_endian == ENDIAN_LITTLE){
2592 if (c2 == 0 && nkf_char_unicode_p(c1)) {
2593 if (nkf_char_unicode_bmp_p(c1)) {
2594 c2 = (c1 >> 8) & 0xff;
2598 if (c1 <= UNICODE_MAX) {
2599 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0); /* high surrogate */
2600 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
2601 if (output_endian == ENDIAN_LITTLE){
2602 (*o_putc)(c2 & 0xff);
2603 (*o_putc)((c2 >> 8) & 0xff);
2604 (*o_putc)(c1 & 0xff);
2605 (*o_putc)((c1 >> 8) & 0xff);
2607 (*o_putc)((c2 >> 8) & 0xff);
2608 (*o_putc)(c2 & 0xff);
2609 (*o_putc)((c1 >> 8) & 0xff);
2610 (*o_putc)(c1 & 0xff);
2616 nkf_char val = e2w_conv(c2, c1);
2617 c2 = (val >> 8) & 0xff;
2622 if (output_endian == ENDIAN_LITTLE){
2632 w_oconv32(nkf_char c2, nkf_char c1)
2635 output_bom_f = FALSE;
2636 if (output_endian == ENDIAN_LITTLE){
2654 if (c2 == ISO_8859_1) {
2656 } else if (c2 == 0 && nkf_char_unicode_p(c1)) {
2659 c1 = e2w_conv(c2, c1);
2662 if (output_endian == ENDIAN_LITTLE){
2663 (*o_putc)( c1 & 0xFF);
2664 (*o_putc)((c1 >> 8) & 0xFF);
2665 (*o_putc)((c1 >> 16) & 0xFF);
2669 (*o_putc)((c1 >> 16) & 0xFF);
2670 (*o_putc)((c1 >> 8) & 0xFF);
2671 (*o_putc)( c1 & 0xFF);
2676 #define SCORE_L2 (1) /* Kanji Level 2 */
2677 #define SCORE_KANA (SCORE_L2 << 1) /* Halfwidth Katakana */
2678 #define SCORE_DEPEND (SCORE_KANA << 1) /* MD Characters */
2679 #define SCORE_CP932 (SCORE_DEPEND << 1) /* IBM extended characters */
2680 #define SCORE_X0212 (SCORE_CP932 << 1) /* JIS X 0212 */
2681 #define SCORE_NO_EXIST (SCORE_X0212 << 1) /* Undefined Characters */
2682 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME selected */
2683 #define SCORE_ERROR (SCORE_iMIME << 1) /* Error */
2685 #define SCORE_INIT (SCORE_iMIME)
2687 static const nkf_char score_table_A0[] = {
2690 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
2691 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
2694 static const nkf_char score_table_F0[] = {
2695 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
2696 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
2697 SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
2698 SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
2702 set_code_score(struct input_code *ptr, nkf_char score)
2705 ptr->score |= score;
2710 clr_code_score(struct input_code *ptr, nkf_char score)
2713 ptr->score &= ~score;
2718 code_score(struct input_code *ptr)
2720 nkf_char c2 = ptr->buf[0];
2721 #ifdef UTF8_OUTPUT_ENABLE
2722 nkf_char c1 = ptr->buf[1];
2725 set_code_score(ptr, SCORE_ERROR);
2726 }else if (c2 == SS2){
2727 set_code_score(ptr, SCORE_KANA);
2728 }else if (c2 == 0x8f){
2729 set_code_score(ptr, SCORE_X0212);
2730 #ifdef UTF8_OUTPUT_ENABLE
2731 }else if (!e2w_conv(c2, c1)){
2732 set_code_score(ptr, SCORE_NO_EXIST);
2734 }else if ((c2 & 0x70) == 0x20){
2735 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2736 }else if ((c2 & 0x70) == 0x70){
2737 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2738 }else if ((c2 & 0x70) >= 0x50){
2739 set_code_score(ptr, SCORE_L2);
2744 status_disable(struct input_code *ptr)
2749 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2753 status_push_ch(struct input_code *ptr, nkf_char c)
2755 ptr->buf[ptr->index++] = c;
2759 status_clear(struct input_code *ptr)
2766 status_reset(struct input_code *ptr)
2769 ptr->score = SCORE_INIT;
2773 status_reinit(struct input_code *ptr)
2776 ptr->_file_stat = 0;
2780 status_check(struct input_code *ptr, nkf_char c)
2782 if (c <= DEL && estab_f){
2788 s_status(struct input_code *ptr, nkf_char c)
2792 status_check(ptr, c);
2797 }else if (nkf_char_unicode_p(c)){
2799 }else if (0xa1 <= c && c <= 0xdf){
2800 status_push_ch(ptr, SS2);
2801 status_push_ch(ptr, c);
2804 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
2806 status_push_ch(ptr, c);
2807 }else if (0xed <= c && c <= 0xee){
2809 status_push_ch(ptr, c);
2810 #ifdef SHIFTJIS_CP932
2811 }else if (is_ibmext_in_sjis(c)){
2813 status_push_ch(ptr, c);
2814 #endif /* SHIFTJIS_CP932 */
2816 }else if (0xf0 <= c && c <= 0xfc){
2818 status_push_ch(ptr, c);
2819 #endif /* X0212_ENABLE */
2821 status_disable(ptr);
2825 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2826 status_push_ch(ptr, c);
2827 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2831 status_disable(ptr);
2835 #ifdef SHIFTJIS_CP932
2836 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
2837 status_push_ch(ptr, c);
2838 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
2839 set_code_score(ptr, SCORE_CP932);
2844 #endif /* SHIFTJIS_CP932 */
2845 status_disable(ptr);
2848 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2849 status_push_ch(ptr, c);
2850 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2851 set_code_score(ptr, SCORE_CP932);
2854 status_disable(ptr);
2861 e_status(struct input_code *ptr, nkf_char c)
2865 status_check(ptr, c);
2870 }else if (nkf_char_unicode_p(c)){
2872 }else if (SS2 == c || (0xa1 <= c && c <= 0xfe)){
2874 status_push_ch(ptr, c);
2876 }else if (0x8f == c){
2878 status_push_ch(ptr, c);
2879 #endif /* X0212_ENABLE */
2881 status_disable(ptr);
2885 if (0xa1 <= c && c <= 0xfe){
2886 status_push_ch(ptr, c);
2890 status_disable(ptr);
2895 if (0xa1 <= c && c <= 0xfe){
2897 status_push_ch(ptr, c);
2899 status_disable(ptr);
2901 #endif /* X0212_ENABLE */
2905 #ifdef UTF8_INPUT_ENABLE
2907 w_status(struct input_code *ptr, nkf_char c)
2911 status_check(ptr, c);
2916 }else if (nkf_char_unicode_p(c)){
2918 }else if (0xc0 <= c && c <= 0xdf){
2920 status_push_ch(ptr, c);
2921 }else if (0xe0 <= c && c <= 0xef){
2923 status_push_ch(ptr, c);
2924 }else if (0xf0 <= c && c <= 0xf4){
2926 status_push_ch(ptr, c);
2928 status_disable(ptr);
2933 if (0x80 <= c && c <= 0xbf){
2934 status_push_ch(ptr, c);
2935 if (ptr->index > ptr->stat){
2936 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2937 && ptr->buf[2] == 0xbf);
2938 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2939 &ptr->buf[0], &ptr->buf[1]);
2946 status_disable(ptr);
2950 if (0x80 <= c && c <= 0xbf){
2951 if (ptr->index < ptr->stat){
2952 status_push_ch(ptr, c);
2957 status_disable(ptr);
2965 code_status(nkf_char c)
2967 int action_flag = 1;
2968 struct input_code *result = 0;
2969 struct input_code *p = input_code_list;
2971 if (!p->status_func) {
2975 if (!p->status_func)
2977 (p->status_func)(p, c);
2980 }else if(p->stat == 0){
2991 if (result && !estab_f){
2992 set_iconv(TRUE, result->iconv_func);
2993 }else if (c <= DEL){
2994 struct input_code *ptr = input_code_list;
3004 nkf_buf_t *std_gc_buf;
3005 nkf_char broken_state;
3006 nkf_buf_t *broken_buf;
3007 nkf_char mimeout_state;
3011 static nkf_state_t *nkf_state = NULL;
3013 #define STD_GC_BUFSIZE (256)
3016 nkf_state_init(void)
3019 nkf_buf_clear(nkf_state->std_gc_buf);
3020 nkf_buf_clear(nkf_state->broken_buf);
3021 nkf_buf_clear(nkf_state->nfc_buf);
3024 nkf_state = nkf_xmalloc(sizeof(nkf_state_t));
3025 nkf_state->std_gc_buf = nkf_buf_new(STD_GC_BUFSIZE);
3026 nkf_state->broken_buf = nkf_buf_new(3);
3027 nkf_state->nfc_buf = nkf_buf_new(9);
3029 nkf_state->broken_state = 0;
3030 nkf_state->mimeout_state = 0;
3037 if (!nkf_buf_empty_p(nkf_state->std_gc_buf)){
3038 return nkf_buf_pop(nkf_state->std_gc_buf);
3045 std_ungetc(nkf_char c, FILE *f)
3047 nkf_buf_push(nkf_state->std_gc_buf, c);
3053 std_putc(nkf_char c)
3060 static nkf_char hold_buf[HOLD_SIZE*2];
3061 static int hold_count = 0;
3063 push_hold_buf(nkf_char c2)
3065 if (hold_count >= HOLD_SIZE*2)
3067 hold_buf[hold_count++] = c2;
3068 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3072 h_conv(FILE *f, nkf_char c1, nkf_char c2)
3078 /** it must NOT be in the kanji shifte sequence */
3079 /** it must NOT be written in JIS7 */
3080 /** and it must be after 2 byte 8bit code */
3086 while ((c2 = (*i_getc)(f)) != EOF) {
3092 if (push_hold_buf(c2) == EOF || estab_f) {
3098 struct input_code *p = input_code_list;
3099 struct input_code *result = p;
3104 if (p->status_func && p->score < result->score) {
3109 set_iconv(TRUE, result->iconv_func);
3114 ** 1) EOF is detected, or
3115 ** 2) Code is established, or
3116 ** 3) Buffer is FULL (but last word is pushed)
3118 ** in 1) and 3) cases, we continue to use
3119 ** Kanji codes by oconv and leave estab_f unchanged.
3124 while (hold_index < hold_count){
3125 c1 = hold_buf[hold_index++];
3126 if (nkf_char_unicode_p(c1)) {
3130 else if (c1 <= DEL){
3133 }else if (iconv == s_iconv && 0xa1 <= c1 && c1 <= 0xdf){
3134 (*iconv)(JIS_X_0201_1976_K, c1, 0);
3137 if (hold_index < hold_count){
3138 c2 = hold_buf[hold_index++];
3148 switch ((*iconv)(c1, c2, 0)) { /* can be EUC/SJIS/UTF-8 */
3151 if (hold_index < hold_count){
3152 c3 = hold_buf[hold_index++];
3153 } else if ((c3 = (*i_getc)(f)) == EOF) {
3158 if (hold_index < hold_count){
3159 c4 = hold_buf[hold_index++];
3160 } else if ((c4 = (*i_getc)(f)) == EOF) {
3165 (*iconv)(c1, c2, (c3<<8)|c4);
3168 /* 3 bytes EUC or UTF-8 */
3169 if (hold_index < hold_count){
3170 c3 = hold_buf[hold_index++];
3171 } else if ((c3 = (*i_getc)(f)) == EOF) {
3177 (*iconv)(c1, c2, c3);
3180 if (c3 == EOF) break;
3186 * Check and Ignore BOM
3192 switch(c2 = (*i_getc)(f)){
3194 if((c2 = (*i_getc)(f)) == 0x00){
3195 if((c2 = (*i_getc)(f)) == 0xFE){
3196 if((c2 = (*i_getc)(f)) == 0xFF){
3197 if(!input_encoding){
3198 set_iconv(TRUE, w_iconv32);
3200 if (iconv == w_iconv32) {
3201 input_endian = ENDIAN_BIG;
3204 (*i_ungetc)(0xFF,f);
3205 }else (*i_ungetc)(c2,f);
3206 (*i_ungetc)(0xFE,f);
3207 }else if(c2 == 0xFF){
3208 if((c2 = (*i_getc)(f)) == 0xFE){
3209 if(!input_encoding){
3210 set_iconv(TRUE, w_iconv32);
3212 if (iconv == w_iconv32) {
3213 input_endian = ENDIAN_2143;
3216 (*i_ungetc)(0xFF,f);
3217 }else (*i_ungetc)(c2,f);
3218 (*i_ungetc)(0xFF,f);
3219 }else (*i_ungetc)(c2,f);
3220 (*i_ungetc)(0x00,f);
3221 }else (*i_ungetc)(c2,f);
3222 (*i_ungetc)(0x00,f);
3225 if((c2 = (*i_getc)(f)) == 0xBB){
3226 if((c2 = (*i_getc)(f)) == 0xBF){
3227 if(!input_encoding){
3228 set_iconv(TRUE, w_iconv);
3230 if (iconv == w_iconv) {
3233 (*i_ungetc)(0xBF,f);
3234 }else (*i_ungetc)(c2,f);
3235 (*i_ungetc)(0xBB,f);
3236 }else (*i_ungetc)(c2,f);
3237 (*i_ungetc)(0xEF,f);
3240 if((c2 = (*i_getc)(f)) == 0xFF){
3241 if((c2 = (*i_getc)(f)) == 0x00){
3242 if((c2 = (*i_getc)(f)) == 0x00){
3243 if(!input_encoding){
3244 set_iconv(TRUE, w_iconv32);
3246 if (iconv == w_iconv32) {
3247 input_endian = ENDIAN_3412;
3250 (*i_ungetc)(0x00,f);
3251 }else (*i_ungetc)(c2,f);
3252 (*i_ungetc)(0x00,f);
3253 }else (*i_ungetc)(c2,f);
3254 if(!input_encoding){
3255 set_iconv(TRUE, w_iconv16);
3257 if (iconv == w_iconv16) {
3258 input_endian = ENDIAN_BIG;
3261 (*i_ungetc)(0xFF,f);
3262 }else (*i_ungetc)(c2,f);
3263 (*i_ungetc)(0xFE,f);
3266 if((c2 = (*i_getc)(f)) == 0xFE){
3267 if((c2 = (*i_getc)(f)) == 0x00){
3268 if((c2 = (*i_getc)(f)) == 0x00){
3269 if(!input_encoding){
3270 set_iconv(TRUE, w_iconv32);
3272 if (iconv == w_iconv32) {
3273 input_endian = ENDIAN_LITTLE;
3276 (*i_ungetc)(0x00,f);
3277 }else (*i_ungetc)(c2,f);
3278 (*i_ungetc)(0x00,f);
3279 }else (*i_ungetc)(c2,f);
3280 if(!input_encoding){
3281 set_iconv(TRUE, w_iconv16);
3283 if (iconv == w_iconv16) {
3284 input_endian = ENDIAN_LITTLE;
3287 (*i_ungetc)(0xFE,f);
3288 }else (*i_ungetc)(c2,f);
3289 (*i_ungetc)(0xFF,f);
3298 broken_getc(FILE *f)
3302 if (!nkf_buf_empty_p(nkf_state->broken_buf)) {
3303 return nkf_buf_pop(nkf_state->broken_buf);
3306 if (c=='$' && nkf_state->broken_state != ESC
3307 && (input_mode == ASCII || input_mode == JIS_X_0201_1976_K)) {
3309 nkf_state->broken_state = 0;
3310 if (c1=='@'|| c1=='B') {
3311 nkf_buf_push(nkf_state->broken_buf, c1);
3312 nkf_buf_push(nkf_state->broken_buf, c);
3318 } else if (c=='(' && nkf_state->broken_state != ESC
3319 && (input_mode == JIS_X_0208 || input_mode == JIS_X_0201_1976_K)) {
3321 nkf_state->broken_state = 0;
3322 if (c1=='J'|| c1=='B') {
3323 nkf_buf_push(nkf_state->broken_buf, c1);
3324 nkf_buf_push(nkf_state->broken_buf, c);
3331 nkf_state->broken_state = c;
3337 broken_ungetc(nkf_char c, FILE *f)
3339 if (nkf_buf_length(nkf_state->broken_buf) < 2)
3340 nkf_buf_push(nkf_state->broken_buf, c);
3345 eol_conv(nkf_char c2, nkf_char c1)
3347 if (guess_f && input_eol != EOF) {
3348 if (c2 == 0 && c1 == LF) {
3349 if (!input_eol) input_eol = prev_cr ? CRLF : LF;
3350 else if (input_eol != (prev_cr ? CRLF : LF)) input_eol = EOF;
3351 } else if (c2 == 0 && c1 == CR && input_eol == LF) input_eol = EOF;
3353 else if (!input_eol) input_eol = CR;
3354 else if (input_eol != CR) input_eol = EOF;
3356 if (prev_cr || (c2 == 0 && c1 == LF)) {
3358 if (eolmode_f != LF) (*o_eol_conv)(0, CR);
3359 if (eolmode_f != CR) (*o_eol_conv)(0, LF);
3361 if (c2 == 0 && c1 == CR) prev_cr = CR;
3362 else if (c2 != 0 || c1 != LF) (*o_eol_conv)(c2, c1);
3366 put_newline(void (*func)(nkf_char))
3368 switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) {
3383 oconv_newline(void (*func)(nkf_char, nkf_char))
3385 switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) {
3400 Return value of fold_conv()
3402 LF add newline and output char
3403 CR add newline and output nothing
3406 1 (or else) normal output
3408 fold state in prev (previous character)
3410 >0x80 Japanese (X0208/X0201)
3415 This fold algorthm does not preserve heading space in a line.
3416 This is the main difference from fmt.
3419 #define char_size(c2,c1) (c2?2:1)
3422 fold_conv(nkf_char c2, nkf_char c1)
3425 nkf_char fold_state;
3427 if (c1== CR && !fold_preserve_f) {
3428 fold_state=0; /* ignore cr */
3429 }else if (c1== LF&&f_prev==CR && fold_preserve_f) {
3431 fold_state=0; /* ignore cr */
3432 } else if (c1== BS) {
3433 if (f_line>0) f_line--;
3435 } else if (c2==EOF && f_line != 0) { /* close open last line */
3437 } else if ((c1==LF && !fold_preserve_f)
3438 || ((c1==CR||(c1==LF&&f_prev!=CR))
3439 && fold_preserve_f)) {
3441 if (fold_preserve_f) {
3445 } else if ((f_prev == c1 && !fold_preserve_f)
3446 || (f_prev == LF && fold_preserve_f)
3447 ) { /* duplicate newline */
3450 fold_state = LF; /* output two newline */
3456 if (f_prev&0x80) { /* Japanese? */
3458 fold_state = 0; /* ignore given single newline */
3459 } else if (f_prev==SP) {
3463 if (++f_line<=fold_len)
3467 fold_state = CR; /* fold and output nothing */
3471 } else if (c1=='\f') {
3474 fold_state = LF; /* output newline and clear */
3475 } else if ((c2==0 && nkf_isblank(c1)) || (c2 == '!' && c1 == '!')) {
3476 /* X0208 kankaku or ascii space */
3478 fold_state = 0; /* remove duplicate spaces */
3481 if (++f_line<=fold_len)
3482 fold_state = SP; /* output ASCII space only */
3484 f_prev = SP; f_line = 0;
3485 fold_state = CR; /* fold and output nothing */
3489 prev0 = f_prev; /* we still need this one... , but almost done */
3491 if (c2 || c2 == JIS_X_0201_1976_K)
3492 f_prev |= 0x80; /* this is Japanese */
3493 f_line += char_size(c2,c1);
3494 if (f_line<=fold_len) { /* normal case */
3497 if (f_line>fold_len+fold_margin) { /* too many kinsoku suspension */
3498 f_line = char_size(c2,c1);
3499 fold_state = LF; /* We can't wait, do fold now */
3500 } else if (c2 == JIS_X_0201_1976_K) {
3501 /* simple kinsoku rules return 1 means no folding */
3502 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
3503 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
3504 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
3505 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
3506 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
3507 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
3508 else if (SP<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
3510 fold_state = LF;/* add one new f_line before this character */
3513 fold_state = LF;/* add one new f_line before this character */
3516 /* kinsoku point in ASCII */
3517 if ( c1==')'|| /* { [ ( */
3528 /* just after special */
3529 } else if (!is_alnum(prev0)) {
3530 f_line = char_size(c2,c1);
3532 } else if ((prev0==SP) || /* ignored new f_line */
3533 (prev0==LF)|| /* ignored new f_line */
3534 (prev0&0x80)) { /* X0208 - ASCII */
3535 f_line = char_size(c2,c1);
3536 fold_state = LF;/* add one new f_line before this character */
3538 fold_state = 1; /* default no fold in ASCII */
3542 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
3543 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
3544 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
3545 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
3546 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
3547 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
3548 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
3549 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
3550 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
3551 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
3552 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
3553 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
3554 /* default no fold in kinsoku */
3557 f_line = char_size(c2,c1);
3558 /* add one new f_line before this character */
3561 f_line = char_size(c2,c1);
3563 /* add one new f_line before this character */
3568 /* terminator process */
3569 switch(fold_state) {
3571 oconv_newline(o_fconv);
3577 oconv_newline(o_fconv);
3588 static nkf_char z_prev2=0,z_prev1=0;
3591 z_conv(nkf_char c2, nkf_char c1)
3594 /* if (c2) c1 &= 0x7f; assertion */
3596 if (c2 == JIS_X_0201_1976_K && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) {
3602 if (z_prev2 == JIS_X_0201_1976_K) {
3603 if (c2 == JIS_X_0201_1976_K) {
3604 if (c1 == (0xde&0x7f)) { /*
\e$BByE@
\e(B */
3606 (*o_zconv)(dv[(z_prev1-SP)*2], dv[(z_prev1-SP)*2+1]);
3608 } else if (c1 == (0xdf&0x7f) && ev[(z_prev1-SP)*2]) { /*
\e$BH>ByE@
\e(B */
3610 (*o_zconv)(ev[(z_prev1-SP)*2], ev[(z_prev1-SP)*2+1]);
3615 (*o_zconv)(cv[(z_prev1-SP)*2], cv[(z_prev1-SP)*2+1]);
3617 if (c2 == JIS_X_0201_1976_K) {
3618 if (dv[(c1-SP)*2] || ev[(c1-SP)*2]) {
3619 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
3624 (*o_zconv)(cv[(c1-SP)*2], cv[(c1-SP)*2+1]);
3635 if (alpha_f&1 && c2 == 0x23) {
3636 /* JISX0208 Alphabet */
3638 } else if (c2 == 0x21) {
3639 /* JISX0208 Kigou */
3644 } else if (alpha_f&4) {
3649 } else if (alpha_f&1 && 0x20<c1 && c1<0x7f && fv[c1-0x20]) {
3655 if (alpha_f&8 && c2 == 0) {
3657 const char *entity = 0;
3659 case '>': entity = ">"; break;
3660 case '<': entity = "<"; break;
3661 case '\"': entity = """; break;
3662 case '&': entity = "&"; break;
3665 while (*entity) (*o_zconv)(0, *entity++);
3671 /* JIS X 0208 Katakana to JIS X 0201 Katakana */
3676 /* U+3002 (0x8142) Ideographic Full Stop -> U+FF61 (0xA1) Halfwidth Ideographic Full Stop */
3680 /* U+300C (0x8175) Left Corner Bracket -> U+FF62 (0xA2) Halfwidth Left Corner Bracket */
3684 /* U+300D (0x8176) Right Corner Bracket -> U+FF63 (0xA3) Halfwidth Right Corner Bracket */
3688 /* U+3001 (0x8141) Ideographic Comma -> U+FF64 (0xA4) Halfwidth Ideographic Comma */
3692 /* U+30FB (0x8145) Katakana Middle Dot -> U+FF65 (0xA5) Halfwidth Katakana Middle Dot */
3696 /* U+30FC (0x815B) Katakana-Hiragana Prolonged Sound Mark -> U+FF70 (0xB0) Halfwidth Katakana-Hiragana Prolonged Sound Mark */
3700 /* U+309B (0x814A) Katakana-Hiragana Voiced Sound Mark -> U+FF9E (0xDE) Halfwidth Katakana Voiced Sound Mark */
3704 /* U+309C (0x814B) Katakana-Hiragana Semi-Voiced Sound Mark -> U+FF9F (0xDF) Halfwidth Katakana Semi-Voiced Sound Mark */
3709 (*o_zconv)(JIS_X_0201_1976_K, c);
3712 } else if (c2 == 0x25) {
3713 /* JISX0208 Katakana */
3714 static const int fullwidth_to_halfwidth[] =
3716 0x0000, 0x2700, 0x3100, 0x2800, 0x3200, 0x2900, 0x3300, 0x2A00,
3717 0x3400, 0x2B00, 0x3500, 0x3600, 0x365E, 0x3700, 0x375E, 0x3800,
3718 0x385E, 0x3900, 0x395E, 0x3A00, 0x3A5E, 0x3B00, 0x3B5E, 0x3C00,
3719 0x3C5E, 0x3D00, 0x3D5E, 0x3E00, 0x3E5E, 0x3F00, 0x3F5E, 0x4000,
3720 0x405E, 0x4100, 0x415E, 0x2F00, 0x4200, 0x425E, 0x4300, 0x435E,
3721 0x4400, 0x445E, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900, 0x4A00,
3722 0x4A5E, 0x4A5F, 0x4B00, 0x4B5E, 0x4B5F, 0x4C00, 0x4C5E, 0x4C5F,
3723 0x4D00, 0x4D5E, 0x4D5F, 0x4E00, 0x4E5E, 0x4E5F, 0x4F00, 0x5000,
3724 0x5100, 0x5200, 0x5300, 0x2C00, 0x5400, 0x2D00, 0x5500, 0x2E00,
3725 0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x5B00, 0x0000, 0x5C00,
3726 0x0000, 0x0000, 0x2600, 0x5D00, 0x335E, 0x0000, 0x0000, 0x0000,
3727 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
3729 if (fullwidth_to_halfwidth[c1-0x20]){
3730 c2 = fullwidth_to_halfwidth[c1-0x20];
3731 (*o_zconv)(JIS_X_0201_1976_K, c2>>8);
3733 (*o_zconv)(JIS_X_0201_1976_K, c2&0xFF);
3743 #define rot13(c) ( \
3745 (c <= 'M') ? (c + 13): \
3746 (c <= 'Z') ? (c - 13): \
3748 (c <= 'm') ? (c + 13): \
3749 (c <= 'z') ? (c - 13): \
3753 #define rot47(c) ( \
3755 ( c <= 'O') ? (c + 47) : \
3756 ( c <= '~') ? (c - 47) : \
3761 rot_conv(nkf_char c2, nkf_char c1)
3763 if (c2 == 0 || c2 == JIS_X_0201_1976_K || c2 == ISO_8859_1) {
3769 (*o_rot_conv)(c2,c1);
3773 hira_conv(nkf_char c2, nkf_char c1)
3777 if (0x20 < c1 && c1 < 0x74) {
3779 (*o_hira_conv)(c2,c1);
3781 } else if (c1 == 0x74 && nkf_enc_unicode_p(output_encoding)) {
3783 c1 = nkf_char_unicode_new(0x3094);
3784 (*o_hira_conv)(c2,c1);
3787 } else if (c2 == 0x21 && (c1 == 0x33 || c1 == 0x34)) {
3789 (*o_hira_conv)(c2,c1);
3794 if (c2 == 0 && c1 == nkf_char_unicode_new(0x3094)) {
3797 } else if (c2 == 0x24 && 0x20 < c1 && c1 < 0x74) {
3799 } else if (c2 == 0x21 && (c1 == 0x35 || c1 == 0x36)) {
3803 (*o_hira_conv)(c2,c1);
3808 iso2022jp_check_conv(nkf_char c2, nkf_char c1)
3810 #define RANGE_NUM_MAX 18
3811 static const nkf_char range[RANGE_NUM_MAX][2] = {
3832 nkf_char start, end, c;
3834 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
3838 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
3843 for (i = 0; i < RANGE_NUM_MAX; i++) {
3844 start = range[i][0];
3847 if (c >= start && c <= end) {
3852 (*o_iso2022jp_check_conv)(c2,c1);
3856 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
3858 static const unsigned char *mime_pattern[] = {
3859 (const unsigned char *)"\075?EUC-JP?B?",
3860 (const unsigned char *)"\075?SHIFT_JIS?B?",
3861 (const unsigned char *)"\075?ISO-8859-1?Q?",
3862 (const unsigned char *)"\075?ISO-8859-1?B?",
3863 (const unsigned char *)"\075?ISO-2022-JP?B?",
3864 (const unsigned char *)"\075?ISO-2022-JP?B?",
3865 (const unsigned char *)"\075?ISO-2022-JP?Q?",
3866 #if defined(UTF8_INPUT_ENABLE)
3867 (const unsigned char *)"\075?UTF-8?B?",
3868 (const unsigned char *)"\075?UTF-8?Q?",
3870 (const unsigned char *)"\075?US-ASCII?Q?",
3875 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
3876 nkf_char (*mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0) = {
3877 e_iconv, s_iconv, 0, 0, 0, 0,
3878 #if defined(UTF8_INPUT_ENABLE)
3884 static const nkf_char mime_encode[] = {
3885 EUC_JP, SHIFT_JIS, ISO_8859_1, ISO_8859_1, JIS_X_0208, JIS_X_0201_1976_K, JIS_X_0201_1976_K,
3886 #if defined(UTF8_INPUT_ENABLE)
3893 static const nkf_char mime_encode_method[] = {
3894 'B', 'B','Q', 'B', 'B', 'B', 'Q',
3895 #if defined(UTF8_INPUT_ENABLE)
3903 /* MIME preprocessor fifo */
3905 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
3906 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
3907 #define mime_input_buf(n) mime_input_state.buf[(n)&MIME_BUF_MASK]
3909 unsigned char buf[MIME_BUF_SIZE];
3911 unsigned int last; /* decoded */
3912 unsigned int input; /* undecoded */
3914 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
3916 #define MAXRECOVER 20
3919 mime_input_buf_unshift(nkf_char c)
3921 mime_input_buf(--mime_input_state.top) = (unsigned char)c;
3925 mime_ungetc(nkf_char c, FILE *f)
3927 mime_input_buf_unshift(c);
3932 mime_ungetc_buf(nkf_char c, FILE *f)
3935 (*i_mungetc_buf)(c,f);
3937 mime_input_buf(--mime_input_state.input) = (unsigned char)c;
3942 mime_getc_buf(FILE *f)
3944 /* we don't keep eof of mime_input_buf, becase it contains ?= as
3945 a terminator. It was checked in mime_integrity. */
3946 return ((mimebuf_f)?
3947 (*i_mgetc_buf)(f):mime_input_buf(mime_input_state.input++));
3951 switch_mime_getc(void)
3953 if (i_getc!=mime_getc) {
3954 i_mgetc = i_getc; i_getc = mime_getc;
3955 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
3956 if(mime_f==STRICT_MIME) {
3957 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
3958 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
3964 unswitch_mime_getc(void)
3966 if(mime_f==STRICT_MIME) {
3967 i_mgetc = i_mgetc_buf;
3968 i_mungetc = i_mungetc_buf;
3971 i_ungetc = i_mungetc;
3972 if(mime_iconv_back)set_iconv(FALSE, mime_iconv_back);
3973 mime_iconv_back = NULL;
3977 mime_integrity(FILE *f, const unsigned char *p)
3981 /* In buffered mode, read until =? or NL or buffer full
3983 mime_input_state.input = mime_input_state.top;
3984 mime_input_state.last = mime_input_state.top;
3986 while(*p) mime_input_buf(mime_input_state.input++) = *p++;
3988 q = mime_input_state.input;
3989 while((c=(*i_getc)(f))!=EOF) {
3990 if (((mime_input_state.input-mime_input_state.top)&MIME_BUF_MASK)==0) {
3991 break; /* buffer full */
3993 if (c=='=' && d=='?') {
3994 /* checked. skip header, start decode */
3995 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
3996 /* mime_last_input = mime_input_state.input; */
3997 mime_input_state.input = q;
4001 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
4003 /* Should we check length mod 4? */
4004 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
4007 /* In case of Incomplete MIME, no MIME decode */
4008 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
4009 mime_input_state.last = mime_input_state.input; /* point undecoded buffer */
4010 mime_decode_mode = 1; /* no decode on mime_input_buf last in mime_getc */
4011 switch_mime_getc(); /* anyway we need buffered getc */
4016 mime_begin_strict(FILE *f)
4020 const unsigned char *p,*q;
4021 nkf_char r[MAXRECOVER]; /* recovery buffer, max mime pattern length */
4023 mime_decode_mode = FALSE;
4024 /* =? has been checked */
4026 p = mime_pattern[j];
4029 for(i=2;p[i]>SP;i++) { /* start at =? */
4030 if (((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i]) {
4031 /* pattern fails, try next one */
4033 while (mime_pattern[++j]) {
4034 p = mime_pattern[j];
4035 for(k=2;k<i;k++) /* assume length(p) > i */
4036 if (p[k]!=q[k]) break;
4037 if (k==i && nkf_toupper(c1)==p[k]) break;
4039 p = mime_pattern[j];
4040 if (p) continue; /* found next one, continue */
4041 /* all fails, output from recovery buffer */
4049 mime_decode_mode = p[i-2];
4051 mime_iconv_back = iconv;
4052 set_iconv(FALSE, mime_priority_func[j]);
4053 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
4055 if (mime_decode_mode=='B') {
4056 mimebuf_f = unbuf_f;
4058 /* do MIME integrity check */
4059 return mime_integrity(f,mime_pattern[j]);
4073 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
4074 /* re-read and convert again from mime_buffer. */
4076 /* =? has been checked */
4077 k = mime_input_state.last;
4078 mime_input_buf(mime_input_state.last++)='='; mime_input_buf(mime_input_state.last++)='?';
4079 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
4080 /* We accept any character type even if it is breaked by new lines */
4081 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4082 if (c1==LF||c1==SP||c1==CR||
4083 c1=='-'||c1=='_'||is_alnum(c1)) continue;
4085 /* Failed. But this could be another MIME preemble */
4087 mime_input_state.last--;
4093 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4094 if (!(++i<MAXRECOVER) || c1==EOF) break;
4095 if (c1=='b'||c1=='B') {
4096 mime_decode_mode = 'B';
4097 } else if (c1=='q'||c1=='Q') {
4098 mime_decode_mode = 'Q';
4102 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4103 if (!(++i<MAXRECOVER) || c1==EOF) break;
4105 mime_decode_mode = FALSE;
4111 if (!mime_decode_mode) {
4112 /* false MIME premble, restart from mime_buffer */
4113 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
4114 /* Since we are in MIME mode until buffer becomes empty, */
4115 /* we never go into mime_begin again for a while. */
4118 /* discard mime preemble, and goto MIME mode */
4119 mime_input_state.last = k;
4120 /* do no MIME integrity check */
4121 return c1; /* used only for checking EOF */
4132 debug(const char *str)
4135 fprintf(stderr, "%s\n", str ? str : "NULL");
4141 set_input_codename(const char *codename)
4143 if (!input_codename) {
4144 input_codename = codename;
4145 } else if (strcmp(codename, input_codename) != 0) {
4146 input_codename = "";
4151 get_guessed_code(void)
4153 if (input_codename && !*input_codename) {
4154 input_codename = "BINARY";
4156 struct input_code *p = find_inputcode_byfunc(iconv);
4157 if (!input_codename) {
4158 input_codename = "ASCII";
4159 } else if (strcmp(input_codename, "Shift_JIS") == 0) {
4160 if (p->score & (SCORE_DEPEND|SCORE_CP932))
4161 input_codename = "CP932";
4162 } else if (strcmp(input_codename, "EUC-JP") == 0) {
4163 if (p->score & (SCORE_X0212))
4164 input_codename = "EUCJP-MS";
4165 else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4166 input_codename = "CP51932";
4167 } else if (strcmp(input_codename, "ISO-2022-JP") == 0) {
4168 if (p->score & (SCORE_KANA))
4169 input_codename = "CP50221";
4170 else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4171 input_codename = "CP50220";
4174 return input_codename;
4177 #if !defined(PERL_XS) && !defined(WIN32DLL)
4179 print_guessed_code(char *filename)
4181 if (filename != NULL) printf("%s: ", filename);
4182 if (input_codename && !*input_codename) {
4185 input_codename = get_guessed_code();
4187 printf("%s\n", input_codename);
4191 input_eol == CR ? " (CR)" :
4192 input_eol == LF ? " (LF)" :
4193 input_eol == CRLF ? " (CRLF)" :
4194 input_eol == EOF ? " (MIXED NL)" :
4204 hex_getc(nkf_char ch, FILE *f, nkf_char (*g)(FILE *f), nkf_char (*u)(nkf_char c, FILE *f))
4206 nkf_char c1, c2, c3;
4212 if (!nkf_isxdigit(c2)){
4217 if (!nkf_isxdigit(c3)){
4222 return (hex2bin(c2) << 4) | hex2bin(c3);
4228 return hex_getc(':', f, i_cgetc, i_cungetc);
4232 cap_ungetc(nkf_char c, FILE *f)
4234 return (*i_cungetc)(c, f);
4240 return hex_getc('%', f, i_ugetc, i_uungetc);
4244 url_ungetc(nkf_char c, FILE *f)
4246 return (*i_uungetc)(c, f);
4250 #ifdef NUMCHAR_OPTION
4252 numchar_getc(FILE *f)
4254 nkf_char (*g)(FILE *) = i_ngetc;
4255 nkf_char (*u)(nkf_char c ,FILE *f) = i_nungetc;
4266 if (buf[i] == 'x' || buf[i] == 'X'){
4267 for (j = 0; j < 7; j++){
4269 if (!nkf_isxdigit(buf[i])){
4276 c |= hex2bin(buf[i]);
4279 for (j = 0; j < 8; j++){
4283 if (!nkf_isdigit(buf[i])){
4290 c += hex2bin(buf[i]);
4296 return nkf_char_unicode_new(c);
4306 numchar_ungetc(nkf_char c, FILE *f)
4308 return (*i_nungetc)(c, f);
4312 #ifdef UNICODE_NORMALIZATION
4317 nkf_char (*g)(FILE *f) = i_nfc_getc;
4318 nkf_char (*u)(nkf_char c ,FILE *f) = i_nfc_ungetc;
4319 nkf_buf_t *buf = nkf_state->nfc_buf;
4320 const unsigned char *array;
4321 int lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
4322 nkf_char c = (*g)(f);
4324 if (c == EOF || c > 0xFF || (c & 0xc0) == 0x80) return c;
4326 nkf_buf_push(buf, c);
4328 while (lower <= upper) {
4329 int mid = (lower+upper) / 2;
4331 array = normalization_table[mid].nfd;
4332 for (len=0; len < NORMALIZATION_TABLE_NFD_LENGTH && array[len]; len++) {
4333 if (len >= nkf_buf_length(buf)) {
4337 lower = 1, upper = 0;
4340 nkf_buf_push(buf, c);
4342 if (array[len] != nkf_buf_at(buf, len)) {
4343 if (array[len] < nkf_buf_at(buf, len)) lower = mid + 1;
4344 else upper = mid - 1;
4351 array = normalization_table[mid].nfc;
4353 for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++)
4354 nkf_buf_push(buf, array[i]);
4358 } while (lower <= upper);
4360 while (nkf_buf_length(buf) > 1) (*u)(nkf_buf_pop(buf), f);
4361 c = nkf_buf_pop(buf);
4367 nfc_ungetc(nkf_char c, FILE *f)
4369 return (*i_nfc_ungetc)(c, f);
4371 #endif /* UNICODE_NORMALIZATION */
4375 base64decode(nkf_char c)
4380 i = c - 'A'; /* A..Z 0-25 */
4381 } else if (c == '_') {
4382 i = '?' /* 63 */ ; /* _ 63 */
4384 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
4386 } else if (c > '/') {
4387 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
4388 } else if (c == '+' || c == '-') {
4389 i = '>' /* 62 */ ; /* + and - 62 */
4391 i = '?' /* 63 */ ; /* / 63 */
4399 nkf_char c1, c2, c3, c4, cc;
4400 nkf_char t1, t2, t3, t4, mode, exit_mode;
4401 nkf_char lwsp_count;
4404 nkf_char lwsp_size = 128;
4406 if (mime_input_state.top != mime_input_state.last) { /* Something is in FIFO */
4407 return mime_input_buf(mime_input_state.top++);
4409 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
4410 mime_decode_mode=FALSE;
4411 unswitch_mime_getc();
4412 return (*i_getc)(f);
4415 if (mimebuf_f == FIXED_MIME)
4416 exit_mode = mime_decode_mode;
4419 if (mime_decode_mode == 'Q') {
4420 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4422 if (c1=='_' && mimebuf_f != FIXED_MIME) return SP;
4423 if (c1<=SP || DEL<=c1) {
4424 mime_decode_mode = exit_mode; /* prepare for quit */
4427 if (c1!='=' && (c1!='?' || mimebuf_f == FIXED_MIME)) {
4431 mime_decode_mode = exit_mode; /* prepare for quit */
4432 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
4433 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
4434 /* end Q encoding */
4435 input_mode = exit_mode;
4437 lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
4438 while ((c1=(*i_getc)(f))!=EOF) {
4443 if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4451 if ((c1=(*i_getc)(f))!=EOF && c1 == LF) {
4452 if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4467 lwsp_buf[lwsp_count] = (unsigned char)c1;
4468 if (lwsp_count++>lwsp_size){
4470 lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4471 lwsp_buf = lwsp_buf_new;
4477 if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
4479 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4480 i_ungetc(lwsp_buf[lwsp_count],f);
4483 nkf_xfree(lwsp_buf);
4486 if (c1=='='&&c2<SP) { /* this is soft wrap */
4487 while((c1 = (*i_mgetc)(f)) <=SP) {
4488 if (c1 == EOF) return (EOF);
4490 mime_decode_mode = 'Q'; /* still in MIME */
4491 goto restart_mime_q;
4494 mime_decode_mode = 'Q'; /* still in MIME */
4498 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
4499 if (c2<=SP) return c2;
4500 mime_decode_mode = 'Q'; /* still in MIME */
4501 return ((hex2bin(c2)<<4) + hex2bin(c3));
4504 if (mime_decode_mode != 'B') {
4505 mime_decode_mode = FALSE;
4506 return (*i_mgetc)(f);
4510 /* Base64 encoding */
4512 MIME allows line break in the middle of
4513 Base64, but we are very pessimistic in decoding
4514 in unbuf mode because MIME encoded code may broken by
4515 less or editor's control sequence (such as ESC-[-K in unbuffered
4516 mode. ignore incomplete MIME.
4518 mode = mime_decode_mode;
4519 mime_decode_mode = exit_mode; /* prepare for quit */
4521 while ((c1 = (*i_mgetc)(f))<=SP) {
4526 if ((c2 = (*i_mgetc)(f))<=SP) {
4529 if (mime_f != STRICT_MIME) goto mime_c2_retry;
4530 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4533 if ((c1 == '?') && (c2 == '=')) {
4536 lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
4537 while ((c1=(*i_getc)(f))!=EOF) {
4542 if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4550 if ((c1=(*i_getc)(f))!=EOF) {
4554 } else if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4569 lwsp_buf[lwsp_count] = (unsigned char)c1;
4570 if (lwsp_count++>lwsp_size){
4572 lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4573 lwsp_buf = lwsp_buf_new;
4579 if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
4581 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4582 i_ungetc(lwsp_buf[lwsp_count],f);
4585 nkf_xfree(lwsp_buf);
4589 if ((c3 = (*i_mgetc)(f))<=SP) {
4592 if (mime_f != STRICT_MIME) goto mime_c3_retry;
4593 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4597 if ((c4 = (*i_mgetc)(f))<=SP) {
4600 if (mime_f != STRICT_MIME) goto mime_c4_retry;
4601 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4605 mime_decode_mode = mode; /* still in MIME sigh... */
4607 /* BASE 64 decoding */
4609 t1 = 0x3f & base64decode(c1);
4610 t2 = 0x3f & base64decode(c2);
4611 t3 = 0x3f & base64decode(c3);
4612 t4 = 0x3f & base64decode(c4);
4613 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
4615 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
4616 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
4618 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
4619 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
4621 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
4626 return mime_input_buf(mime_input_state.top++);
4629 static const char basis_64[] =
4630 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
4632 #define MIMEOUT_BUF_LENGTH 74
4634 char buf[MIMEOUT_BUF_LENGTH+1];
4638 /*nkf_char mime_lastchar2, mime_lastchar1;*/
4641 open_mime(nkf_char mode)
4643 const unsigned char *p;
4646 p = mime_pattern[0];
4647 for(i=0;mime_pattern[i];i++) {
4648 if (mode == mime_encode[i]) {
4649 p = mime_pattern[i];
4653 mimeout_mode = mime_encode_method[i];
4655 if (base64_count>45) {
4656 if (mimeout_state.count>0 && nkf_isblank(mimeout_state.buf[i])){
4657 (*o_mputc)(mimeout_state.buf[i]);
4660 put_newline(o_mputc);
4663 if (mimeout_state.count>0 && nkf_isspace(mimeout_state.buf[i])) {
4667 for (;i<mimeout_state.count;i++) {
4668 if (nkf_isspace(mimeout_state.buf[i])) {
4669 (*o_mputc)(mimeout_state.buf[i]);
4679 j = mimeout_state.count;
4680 mimeout_state.count = 0;
4682 mime_putc(mimeout_state.buf[i]);
4687 mime_prechar(nkf_char c2, nkf_char c1)
4689 if (mimeout_mode > 0){
4691 if (base64_count + mimeout_state.count/3*4> 73){
4692 (*o_base64conv)(EOF,0);
4693 oconv_newline(o_base64conv);
4694 (*o_base64conv)(0,SP);
4698 if ((c2 != 0 || c1 > DEL) && base64_count + mimeout_state.count/3*4> 66) {
4699 (*o_base64conv)(EOF,0);
4700 oconv_newline(o_base64conv);
4701 (*o_base64conv)(0,SP);
4707 if (c2 != EOF && base64_count + mimeout_state.count/3*4> 60) {
4708 mimeout_mode = (output_mode==ASCII ||output_mode == ISO_8859_1) ? 'Q' : 'B';
4709 open_mime(output_mode);
4710 (*o_base64conv)(EOF,0);
4711 oconv_newline(o_base64conv);
4712 (*o_base64conv)(0,SP);
4731 switch(mimeout_mode) {
4736 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4)]);
4742 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2)]);
4747 if (mimeout_mode > 0) {
4748 if (mimeout_f!=FIXED_MIME) {
4750 } else if (mimeout_mode != 'Q')
4756 mimeout_addchar(nkf_char c)
4758 switch(mimeout_mode) {
4763 } else if(!nkf_isalnum(c)) {
4765 (*o_mputc)(bin2hex(((c>>4)&0xf)));
4766 (*o_mputc)(bin2hex((c&0xf)));
4774 nkf_state->mimeout_state=c;
4775 (*o_mputc)(basis_64[c>>2]);
4780 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
4781 nkf_state->mimeout_state=c;
4786 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2) | ((c & 0xC0) >>6)]);
4787 (*o_mputc)(basis_64[c & 0x3F]);
4799 mime_putc(nkf_char c)
4804 if (mimeout_f == FIXED_MIME){
4805 if (mimeout_mode == 'Q'){
4806 if (base64_count > 71){
4807 if (c!=CR && c!=LF) {
4809 put_newline(o_mputc);
4814 if (base64_count > 71){
4816 put_newline(o_mputc);
4819 if (c == EOF) { /* c==EOF */
4823 if (c != EOF) { /* c==EOF */
4829 /* mimeout_f != FIXED_MIME */
4831 if (c == EOF) { /* c==EOF */
4832 if (mimeout_mode == -1 && mimeout_state.count > 1) open_mime(output_mode);
4833 j = mimeout_state.count;
4834 mimeout_state.count = 0;
4836 if (mimeout_mode > 0) {
4837 if (!nkf_isblank(mimeout_state.buf[j-1])) {
4839 if (nkf_isspace(mimeout_state.buf[i]) && base64_count < 71){
4842 mimeout_addchar(mimeout_state.buf[i]);
4846 mimeout_addchar(mimeout_state.buf[i]);
4850 mimeout_addchar(mimeout_state.buf[i]);
4856 mimeout_addchar(mimeout_state.buf[i]);
4862 if (mimeout_state.count > 0){
4863 lastchar = mimeout_state.buf[mimeout_state.count - 1];
4868 if (mimeout_mode=='Q') {
4869 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
4870 if (c == CR || c == LF) {
4875 } else if (c <= SP) {
4877 if (base64_count > 70) {
4878 put_newline(o_mputc);
4881 if (!nkf_isblank(c)) {
4886 if (base64_count > 70) {
4888 put_newline(o_mputc);
4891 open_mime(output_mode);
4893 if (!nkf_noescape_mime(c)) {
4906 if (mimeout_mode <= 0) {
4907 if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 ||
4908 output_mode == UTF_8)) {
4909 if (nkf_isspace(c)) {
4911 if (mimeout_mode == -1) {
4914 if (c==CR || c==LF) {
4916 open_mime(output_mode);
4922 for (i=0;i<mimeout_state.count;i++) {
4923 (*o_mputc)(mimeout_state.buf[i]);
4924 if (mimeout_state.buf[i] == CR || mimeout_state.buf[i] == LF){
4935 mimeout_state.buf[0] = (char)c;
4936 mimeout_state.count = 1;
4938 if (base64_count > 1
4939 && base64_count + mimeout_state.count > 76
4940 && mimeout_state.buf[0] != CR && mimeout_state.buf[0] != LF){
4941 static const char *str = "boundary=\"";
4942 static int len = 10;
4945 for (; i < mimeout_state.count - len; ++i) {
4946 if (!strncmp(mimeout_state.buf+i, str, len)) {
4952 if (i == 0 || i == mimeout_state.count - len) {
4953 put_newline(o_mputc);
4955 if (!nkf_isspace(mimeout_state.buf[0])){
4962 for (j = 0; j <= i; ++j) {
4963 (*o_mputc)(mimeout_state.buf[j]);
4965 put_newline(o_mputc);
4967 for (; j <= mimeout_state.count; ++j) {
4968 mimeout_state.buf[j - i] = mimeout_state.buf[j];
4970 mimeout_state.count -= i;
4973 mimeout_state.buf[mimeout_state.count++] = (char)c;
4974 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
4975 open_mime(output_mode);
4980 if (lastchar==CR || lastchar == LF){
4981 for (i=0;i<mimeout_state.count;i++) {
4982 (*o_mputc)(mimeout_state.buf[i]);
4985 mimeout_state.count = 0;
4988 for (i=0;i<mimeout_state.count-1;i++) {
4989 (*o_mputc)(mimeout_state.buf[i]);
4992 mimeout_state.buf[0] = SP;
4993 mimeout_state.count = 1;
4995 open_mime(output_mode);
4998 /* mimeout_mode == 'B', 1, 2 */
4999 if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 ||
5000 output_mode == UTF_8)) {
5001 if (lastchar == CR || lastchar == LF){
5002 if (nkf_isblank(c)) {
5003 for (i=0;i<mimeout_state.count;i++) {
5004 mimeout_addchar(mimeout_state.buf[i]);
5006 mimeout_state.count = 0;
5009 for (i=0;i<mimeout_state.count;i++) {
5010 (*o_mputc)(mimeout_state.buf[i]);
5013 mimeout_state.count = 0;
5015 mimeout_state.buf[mimeout_state.count++] = (char)c;
5018 if (nkf_isspace(c)) {
5019 for (i=0;i<mimeout_state.count;i++) {
5020 if (SP<mimeout_state.buf[i] && mimeout_state.buf[i]<DEL) {
5022 for (i=0;i<mimeout_state.count;i++) {
5023 (*o_mputc)(mimeout_state.buf[i]);
5026 mimeout_state.count = 0;
5029 mimeout_state.buf[mimeout_state.count++] = (char)c;
5030 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
5032 for (i=0;i<mimeout_state.count;i++) {
5033 (*o_mputc)(mimeout_state.buf[i]);
5036 mimeout_state.count = 0;
5040 if (mimeout_state.count>0 && SP<c && c!='=') {
5041 mimeout_state.buf[mimeout_state.count++] = (char)c;
5042 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
5043 j = mimeout_state.count;
5044 mimeout_state.count = 0;
5046 mimeout_addchar(mimeout_state.buf[i]);
5053 if (mimeout_state.count>0) {
5054 j = mimeout_state.count;
5055 mimeout_state.count = 0;
5057 if (mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF)
5059 mimeout_addchar(mimeout_state.buf[i]);
5065 (*o_mputc)(mimeout_state.buf[i]);
5067 open_mime(output_mode);
5074 base64_conv(nkf_char c2, nkf_char c1)
5076 mime_prechar(c2, c1);
5077 (*o_base64conv)(c2,c1);
5081 typedef struct nkf_iconv_t {
5084 size_t input_buffer_size;
5085 char *output_buffer;
5086 size_t output_buffer_size;
5090 nkf_iconv_new(char *tocode, char *fromcode)
5092 nkf_iconv_t converter;
5094 converter->input_buffer_size = IOBUF_SIZE;
5095 converter->input_buffer = nkf_xmalloc(converter->input_buffer_size);
5096 converter->output_buffer_size = IOBUF_SIZE * 2;
5097 converter->output_buffer = nkf_xmalloc(converter->output_buffer_size);
5098 converter->cd = iconv_open(tocode, fromcode);
5099 if (converter->cd == (iconv_t)-1)
5103 perror(fprintf("iconv doesn't support %s to %s conversion.", fromcode, tocode));
5106 perror("can't iconv_open");
5112 nkf_iconv_convert(nkf_iconv_t *converter, FILE *input)
5114 size_t invalid = (size_t)0;
5115 char *input_buffer = converter->input_buffer;
5116 size_t input_length = (size_t)0;
5117 char *output_buffer = converter->output_buffer;
5118 size_t output_length = converter->output_buffer_size;
5123 while ((c = (*i_getc)(f)) != EOF) {
5124 input_buffer[input_length++] = c;
5125 if (input_length < converter->input_buffer_size) break;
5129 size_t ret = iconv(converter->cd, &input_buffer, &input_length, &output_buffer, &output_length);
5130 while (output_length-- > 0) {
5131 (*o_putc)(output_buffer[converter->output_buffer_size-output_length]);
5133 if (ret == (size_t) - 1) {
5136 if (input_buffer != converter->input_buffer)
5137 memmove(converter->input_buffer, input_buffer, input_length);
5140 converter->output_buffer_size *= 2;
5141 output_buffer = realloc(converter->outbuf, converter->output_buffer_size);
5142 if (output_buffer == NULL) {
5143 perror("can't realloc");
5146 converter->output_buffer = output_buffer;
5149 perror("can't iconv");
5162 nkf_iconv_close(nkf_iconv_t *convert)
5164 nkf_xfree(converter->inbuf);
5165 nkf_xfree(converter->outbuf);
5166 iconv_close(converter->cd);
5175 struct input_code *p = input_code_list;
5187 mime_f = MIME_DECODE_DEFAULT;
5188 mime_decode_f = FALSE;
5193 x0201_f = NKF_UNSPECIFIED;
5194 iso2022jp_f = FALSE;
5195 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
5196 ms_ucs_map_f = UCS_MAP_ASCII;
5198 #ifdef UTF8_INPUT_ENABLE
5199 no_cp932ext_f = FALSE;
5200 no_best_fit_chars_f = FALSE;
5201 encode_fallback = NULL;
5202 unicode_subchar = '?';
5203 input_endian = ENDIAN_BIG;
5205 #ifdef UTF8_OUTPUT_ENABLE
5206 output_bom_f = FALSE;
5207 output_endian = ENDIAN_BIG;
5209 #ifdef UNICODE_NORMALIZATION
5225 #ifdef SHIFTJIS_CP932
5235 for (i = 0; i < 256; i++){
5236 prefix_table[i] = 0;
5240 mimeout_state.count = 0;
5245 fold_preserve_f = FALSE;
5248 kanji_intro = DEFAULT_J;
5249 ascii_intro = DEFAULT_R;
5250 fold_margin = FOLD_MARGIN;
5251 o_zconv = no_connection;
5252 o_fconv = no_connection;
5253 o_eol_conv = no_connection;
5254 o_rot_conv = no_connection;
5255 o_hira_conv = no_connection;
5256 o_base64conv = no_connection;
5257 o_iso2022jp_check_conv = no_connection;
5260 i_ungetc = std_ungetc;
5262 i_bungetc = std_ungetc;
5265 i_mungetc = std_ungetc;
5266 i_mgetc_buf = std_getc;
5267 i_mungetc_buf = std_ungetc;
5268 output_mode = ASCII;
5270 mime_decode_mode = FALSE;
5276 z_prev2=0,z_prev1=0;
5278 iconv_for_check = 0;
5280 input_codename = NULL;
5281 input_encoding = NULL;
5282 output_encoding = NULL;
5290 module_connection(void)
5292 if (input_encoding) set_input_encoding(input_encoding);
5293 if (!output_encoding) {
5294 output_encoding = nkf_default_encoding();
5296 if (!output_encoding) {
5297 if (noout_f || guess_f) output_encoding = nkf_enc_from_index(ISO_2022_JP);
5300 set_output_encoding(output_encoding);
5301 oconv = nkf_enc_to_oconv(output_encoding);
5303 if (nkf_enc_unicode_p(output_encoding))
5304 output_mode = UTF_8;
5306 if (x0201_f == NKF_UNSPECIFIED) {
5307 x0201_f = X0201_DEFAULT;
5310 /* replace continucation module, from output side */
5312 /* output redicrection */
5314 if (noout_f || guess_f){
5321 if (mimeout_f == TRUE) {
5322 o_base64conv = oconv; oconv = base64_conv;
5324 /* base64_count = 0; */
5327 if (eolmode_f || guess_f) {
5328 o_eol_conv = oconv; oconv = eol_conv;
5331 o_rot_conv = oconv; oconv = rot_conv;
5334 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
5337 o_hira_conv = oconv; oconv = hira_conv;
5340 o_fconv = oconv; oconv = fold_conv;
5343 if (alpha_f || x0201_f) {
5344 o_zconv = oconv; oconv = z_conv;
5348 i_ungetc = std_ungetc;
5349 /* input redicrection */
5352 i_cgetc = i_getc; i_getc = cap_getc;
5353 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
5356 i_ugetc = i_getc; i_getc = url_getc;
5357 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
5360 #ifdef NUMCHAR_OPTION
5362 i_ngetc = i_getc; i_getc = numchar_getc;
5363 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
5366 #ifdef UNICODE_NORMALIZATION
5368 i_nfc_getc = i_getc; i_getc = nfc_getc;
5369 i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
5372 if (mime_f && mimebuf_f==FIXED_MIME) {
5373 i_mgetc = i_getc; i_getc = mime_getc;
5374 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
5377 i_bgetc = i_getc; i_getc = broken_getc;
5378 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
5380 if (input_encoding) {
5381 set_iconv(-TRUE, nkf_enc_to_iconv(input_encoding));
5383 set_iconv(FALSE, e_iconv);
5387 struct input_code *p = input_code_list;
5396 Conversion main loop. Code detection only.
5399 #if !defined(PERL_XS) && !defined(WIN32DLL)
5406 module_connection();
5407 while ((c = (*i_getc)(f)) != EOF)
5414 #define NEXT continue /* no output, get next */
5415 #define SKIP c2=0;continue /* no output, get next */
5416 #define MORE c2=c1;continue /* need one more byte */
5417 #define SEND ; /* output c1 and c2, get next */
5418 #define LAST break /* end of loop, go closing */
5419 #define set_input_mode(mode) do { \
5420 input_mode = mode; \
5422 set_input_codename("ISO-2022-JP"); \
5423 debug("ISO-2022-JP"); \
5427 kanji_convert(FILE *f)
5429 nkf_char c1=0, c2=0, c3=0, c4=0;
5430 int shift_mode = 0; /* 0, 1, 2, 3 */
5432 int is_8bit = FALSE;
5434 if (input_encoding && !nkf_enc_asciicompat(input_encoding)) {
5439 output_mode = ASCII;
5441 if (module_connection() < 0) {
5442 #if !defined(PERL_XS) && !defined(WIN32DLL)
5443 fprintf(stderr, "no output encoding given\n");
5449 #ifdef UTF8_INPUT_ENABLE
5450 if(iconv == w_iconv32){
5451 while ((c1 = (*i_getc)(f)) != EOF &&
5452 (c2 = (*i_getc)(f)) != EOF &&
5453 (c3 = (*i_getc)(f)) != EOF &&
5454 (c4 = (*i_getc)(f)) != EOF) {
5455 nkf_iconv_utf_32(c1, c2, c3, c4);
5459 else if (iconv == w_iconv16) {
5460 while ((c1 = (*i_getc)(f)) != EOF &&
5461 (c2 = (*i_getc)(f)) != EOF) {
5462 if (nkf_iconv_utf_16(c1, c2, 0, 0) == -2 &&
5463 (c3 = (*i_getc)(f)) != EOF &&
5464 (c4 = (*i_getc)(f)) != EOF) {
5465 nkf_iconv_utf_16(c1, c2, c3, c4);
5472 while ((c1 = (*i_getc)(f)) != EOF) {
5473 #ifdef INPUT_CODE_FIX
5474 if (!input_encoding)
5480 /* in case of 8th bit is on */
5481 if (!estab_f&&!mime_decode_mode) {
5482 /* in case of not established yet */
5483 /* It is still ambiguious */
5484 if (h_conv(f, c2, c1)==EOF) {
5492 /* in case of already established */
5494 /* ignore bogus code */
5502 /* 2nd byte of 7 bit code or SJIS */
5506 else if (nkf_char_unicode_p(c1)) {
5512 if (input_mode == JIS_X_0208 && DEL <= c1 && c1 < 0x92) {
5515 }else if (input_codename && input_codename[0] == 'I' &&
5516 0xA1 <= c1 && c1 <= 0xDF) {
5517 /* JIS X 0201 Katakana in 8bit JIS */
5518 c2 = JIS_X_0201_1976_K;
5521 } else if (c1 > DEL) {
5523 if (!estab_f && !iso8859_f) {
5524 /* not established yet */
5526 } else { /* estab_f==TRUE */
5532 else if ((iconv == s_iconv && 0xA0 <= c1 && c1 <= 0xDF) ||
5533 (ms_ucs_map_f == UCS_MAP_CP10001 && (c1 == 0xFD || c1 == 0xFE))) {
5535 c2 = JIS_X_0201_1976_K;
5540 /* already established */
5544 } else if (SP < c1 && c1 < DEL) {
5545 /* in case of Roman characters */
5547 /* output 1 shifted byte */
5551 } else if (nkf_byte_jisx0201_katakana_p(c1)){
5552 /* output 1 shifted byte */
5553 c2 = JIS_X_0201_1976_K;
5556 /* look like bogus code */
5559 } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
5560 input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) {
5561 /* in case of Kanji shifted */
5563 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
5564 /* Check MIME code */
5565 if ((c1 = (*i_getc)(f)) == EOF) {
5568 } else if (c1 == '?') {
5569 /* =? is mime conversion start sequence */
5570 if(mime_f == STRICT_MIME) {
5571 /* check in real detail */
5572 if (mime_begin_strict(f) == EOF)
5575 } else if (mime_begin(f) == EOF)
5584 /* normal ASCII code */
5587 } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
5590 } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
5593 } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
5594 if ((c1 = (*i_getc)(f)) == EOF) {
5598 else if (c1 == '&') {
5600 if ((c1 = (*i_getc)(f)) == EOF) {
5606 else if (c1 == '$') {
5608 if ((c1 = (*i_getc)(f)) == EOF) {
5609 /* don't send bogus code
5611 (*oconv)(0, '$'); */
5613 } else if (c1 == '@' || c1 == 'B') {
5615 set_input_mode(JIS_X_0208);
5617 } else if (c1 == '(') {
5619 if ((c1 = (*i_getc)(f)) == EOF) {
5620 /* don't send bogus code
5626 } else if (c1 == '@'|| c1 == 'B') {
5628 set_input_mode(JIS_X_0208);
5631 } else if (c1 == 'D'){
5632 set_input_mode(JIS_X_0212);
5634 #endif /* X0212_ENABLE */
5635 } else if (c1 == 'O' || c1 == 'Q'){
5636 set_input_mode(JIS_X_0213_1);
5638 } else if (c1 == 'P'){
5639 set_input_mode(JIS_X_0213_2);
5642 /* could be some special code */
5649 } else if (broken_f&0x2) {
5650 /* accept any ESC-(-x as broken code ... */
5651 input_mode = JIS_X_0208;
5660 } else if (c1 == '(') {
5662 if ((c1 = (*i_getc)(f)) == EOF) {
5663 /* don't send bogus code
5665 (*oconv)(0, '('); */
5668 else if (c1 == 'I') {
5669 /* JIS X 0201 Katakana */
5670 set_input_mode(JIS_X_0201_1976_K);
5673 else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
5674 /* ISO-646IRV:1983 or JIS X 0201 Roman or JUNET */
5675 set_input_mode(ASCII);
5678 else if (broken_f&0x2) {
5679 set_input_mode(ASCII);
5688 else if (c1 == '.') {
5690 if ((c1 = (*i_getc)(f)) == EOF) {
5693 else if (c1 == 'A') {
5704 else if (c1 == 'N') {
5707 if (g2 == ISO_8859_1) {
5722 } else if (c1 == ESC && iconv == s_iconv) {
5723 /* ESC in Shift_JIS */
5724 if ((c1 = (*i_getc)(f)) == EOF) {
5727 } else if (c1 == '$') {
5729 if ((c1 = (*i_getc)(f)) == EOF) {
5731 } else if (('E' <= c1 && c1 <= 'G') ||
5732 ('O' <= c1 && c1 <= 'Q')) {
5740 static const nkf_char jphone_emoji_first_table[7] =
5741 {0xE1E0, 0xDFE0, 0xE2E0, 0xE3E0, 0xE4E0, 0xDFE0, 0xE0E0};
5742 c3 = nkf_char_unicode_new(jphone_emoji_first_table[c1 % 7]);
5743 if ((c1 = (*i_getc)(f)) == EOF) LAST;
5744 while (SP <= c1 && c1 <= 'z') {
5745 (*oconv)(0, c1 + c3);
5746 if ((c1 = (*i_getc)(f)) == EOF) LAST;
5761 } else if (c1 == LF || c1 == CR) {
5763 input_mode = ASCII; set_iconv(FALSE, 0);
5765 } else if (mime_decode_f && !mime_decode_mode){
5767 if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
5775 } else { /* if (c1 == CR)*/
5776 if ((c1=(*i_getc)(f))!=EOF) {
5780 } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
5800 switch ((*iconv)(c2, c1, 0)) { /* can be EUC / SJIS / UTF-8 */
5803 if ((c3 = (*i_getc)(f)) != EOF) {
5806 if ((c4 = (*i_getc)(f)) != EOF) {
5808 (*iconv)(c2, c1, c3|c4);
5813 /* 3 bytes EUC or UTF-8 */
5814 if ((c3 = (*i_getc)(f)) != EOF) {
5816 (*iconv)(c2, c1, c3);
5824 0x7F <= c2 && c2 <= 0x92 &&
5825 0x21 <= c1 && c1 <= 0x7E) {
5827 c1 = nkf_char_unicode_new((c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000);
5830 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
5834 (*oconv)(PREFIX_EUCG3 | c2, c1);
5836 #endif /* X0212_ENABLE */
5838 (*oconv)(PREFIX_EUCG3 | c2, c1);
5841 (*oconv)(input_mode, c1); /* other special case */
5847 /* goto next_word */
5852 (*iconv)(EOF, 0, 0);
5853 if (!input_codename)
5856 struct input_code *p = input_code_list;
5857 struct input_code *result = p;
5859 if (p->score < result->score) result = p;
5862 set_input_codename(result->name);
5864 debug(result->name);
5872 * int options(unsigned char *cp)
5879 options(unsigned char *cp)
5883 unsigned char *cp_back = NULL;
5888 while(*cp && *cp++!='-');
5889 while (*cp || cp_back) {
5897 case '-': /* literal options */
5898 if (!*cp || *cp == SP) { /* ignore the rest of arguments */
5902 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
5903 p = (unsigned char *)long_option[i].name;
5904 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
5905 if (*p == cp[j] || cp[j] == SP){
5912 #if !defined(PERL_XS) && !defined(WIN32DLL)
5913 fprintf(stderr, "unknown long option: --%s\n", cp);
5917 while(*cp && *cp != SP && cp++);
5918 if (long_option[i].alias[0]){
5920 cp = (unsigned char *)long_option[i].alias;
5923 if (strcmp(long_option[i].name, "help") == 0){
5928 if (strcmp(long_option[i].name, "ic=") == 0){
5929 enc = nkf_enc_find((char *)p);
5931 input_encoding = enc;
5934 if (strcmp(long_option[i].name, "oc=") == 0){
5935 enc = nkf_enc_find((char *)p);
5936 /* if (enc <= 0) continue; */
5938 output_encoding = enc;
5941 if (strcmp(long_option[i].name, "guess=") == 0){
5942 if (p[0] == '0' || p[0] == '1') {
5950 if (strcmp(long_option[i].name, "overwrite") == 0){
5953 preserve_time_f = TRUE;
5956 if (strcmp(long_option[i].name, "overwrite=") == 0){
5959 preserve_time_f = TRUE;
5961 backup_suffix = (char *)p;
5964 if (strcmp(long_option[i].name, "in-place") == 0){
5967 preserve_time_f = FALSE;
5970 if (strcmp(long_option[i].name, "in-place=") == 0){
5973 preserve_time_f = FALSE;
5975 backup_suffix = (char *)p;
5980 if (strcmp(long_option[i].name, "cap-input") == 0){
5984 if (strcmp(long_option[i].name, "url-input") == 0){
5989 #ifdef NUMCHAR_OPTION
5990 if (strcmp(long_option[i].name, "numchar-input") == 0){
5996 if (strcmp(long_option[i].name, "no-output") == 0){
6000 if (strcmp(long_option[i].name, "debug") == 0){
6005 if (strcmp(long_option[i].name, "cp932") == 0){
6006 #ifdef SHIFTJIS_CP932
6010 #ifdef UTF8_OUTPUT_ENABLE
6011 ms_ucs_map_f = UCS_MAP_CP932;
6015 if (strcmp(long_option[i].name, "no-cp932") == 0){
6016 #ifdef SHIFTJIS_CP932
6020 #ifdef UTF8_OUTPUT_ENABLE
6021 ms_ucs_map_f = UCS_MAP_ASCII;
6025 #ifdef SHIFTJIS_CP932
6026 if (strcmp(long_option[i].name, "cp932inv") == 0){
6033 if (strcmp(long_option[i].name, "x0212") == 0){
6040 if (strcmp(long_option[i].name, "exec-in") == 0){
6044 if (strcmp(long_option[i].name, "exec-out") == 0){
6049 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
6050 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
6051 no_cp932ext_f = TRUE;
6054 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
6055 no_best_fit_chars_f = TRUE;
6058 if (strcmp(long_option[i].name, "fb-skip") == 0){
6059 encode_fallback = NULL;
6062 if (strcmp(long_option[i].name, "fb-html") == 0){
6063 encode_fallback = encode_fallback_html;
6066 if (strcmp(long_option[i].name, "fb-xml") == 0){
6067 encode_fallback = encode_fallback_xml;
6070 if (strcmp(long_option[i].name, "fb-java") == 0){
6071 encode_fallback = encode_fallback_java;
6074 if (strcmp(long_option[i].name, "fb-perl") == 0){
6075 encode_fallback = encode_fallback_perl;
6078 if (strcmp(long_option[i].name, "fb-subchar") == 0){
6079 encode_fallback = encode_fallback_subchar;
6082 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
6083 encode_fallback = encode_fallback_subchar;
6084 unicode_subchar = 0;
6086 /* decimal number */
6087 for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
6088 unicode_subchar *= 10;
6089 unicode_subchar += hex2bin(p[i]);
6091 }else if(p[1] == 'x' || p[1] == 'X'){
6092 /* hexadecimal number */
6093 for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
6094 unicode_subchar <<= 4;
6095 unicode_subchar |= hex2bin(p[i]);
6099 for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
6100 unicode_subchar *= 8;
6101 unicode_subchar += hex2bin(p[i]);
6104 w16e_conv(unicode_subchar, &i, &j);
6105 unicode_subchar = i<<8 | j;
6109 #ifdef UTF8_OUTPUT_ENABLE
6110 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
6111 ms_ucs_map_f = UCS_MAP_MS;
6115 #ifdef UNICODE_NORMALIZATION
6116 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
6121 if (strcmp(long_option[i].name, "prefix=") == 0){
6122 if (nkf_isgraph(p[0])){
6123 for (i = 1; nkf_isgraph(p[i]); i++){
6124 prefix_table[p[i]] = p[0];
6129 #if !defined(PERL_XS) && !defined(WIN32DLL)
6130 fprintf(stderr, "unsupported long option: --%s\n", long_option[i].name);
6135 case 'b': /* buffered mode */
6138 case 'u': /* non bufferd mode */
6141 case 't': /* transparent mode */
6146 } else if (*cp=='2') {
6150 * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
6158 case 'j': /* JIS output */
6160 output_encoding = nkf_enc_from_index(ISO_2022_JP);
6162 case 'e': /* AT&T EUC output */
6163 output_encoding = nkf_enc_from_index(EUCJP_NKF);
6165 case 's': /* SJIS output */
6166 output_encoding = nkf_enc_from_index(SHIFT_JIS);
6168 case 'l': /* ISO8859 Latin-1 support, no conversion */
6169 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
6170 input_encoding = nkf_enc_from_index(ISO_8859_1);
6172 case 'i': /* Kanji IN ESC-$-@/B */
6173 if (*cp=='@'||*cp=='B')
6174 kanji_intro = *cp++;
6176 case 'o': /* ASCII IN ESC-(-J/B/H */
6177 /* ESC ( H was used in initial JUNET messages */
6178 if (*cp=='J'||*cp=='B'||*cp=='H')
6179 ascii_intro = *cp++;
6183 bit:1 katakana->hiragana
6184 bit:2 hiragana->katakana
6186 if ('9'>= *cp && *cp>='0')
6187 hira_f |= (*cp++ -'0');
6194 #if defined(MSDOS) || defined(__OS2__)
6201 show_configuration();
6209 #ifdef UTF8_OUTPUT_ENABLE
6210 case 'w': /* UTF-{8,16,32} output */
6215 output_encoding = nkf_enc_from_index(UTF_8N);
6217 output_bom_f = TRUE;
6218 output_encoding = nkf_enc_from_index(UTF_8_BOM);
6222 if ('1'== cp[0] && '6'==cp[1]) {
6225 } else if ('3'== cp[0] && '2'==cp[1]) {
6229 output_encoding = nkf_enc_from_index(UTF_8);
6234 output_endian = ENDIAN_LITTLE;
6235 output_bom_f = TRUE;
6236 } else if (cp[0] == 'B') {
6238 output_bom_f = TRUE;
6241 output_bom_f = FALSE;
6243 enc_idx = enc_idx == UTF_16
6244 ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
6245 : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
6247 enc_idx = enc_idx == UTF_16
6248 ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
6249 : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
6251 output_encoding = nkf_enc_from_index(enc_idx);
6255 #ifdef UTF8_INPUT_ENABLE
6256 case 'W': /* UTF input */
6259 input_encoding = nkf_enc_from_index(UTF_8);
6262 if ('1'== cp[0] && '6'==cp[1]) {
6264 input_endian = ENDIAN_BIG;
6266 } else if ('3'== cp[0] && '2'==cp[1]) {
6268 input_endian = ENDIAN_BIG;
6271 input_encoding = nkf_enc_from_index(UTF_8);
6276 input_endian = ENDIAN_LITTLE;
6277 } else if (cp[0] == 'B') {
6279 input_endian = ENDIAN_BIG;
6281 enc_idx = (enc_idx == UTF_16
6282 ? (input_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
6283 : (input_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE));
6284 input_encoding = nkf_enc_from_index(enc_idx);
6288 /* Input code assumption */
6289 case 'J': /* ISO-2022-JP input */
6290 input_encoding = nkf_enc_from_index(ISO_2022_JP);
6292 case 'E': /* EUC-JP input */
6293 input_encoding = nkf_enc_from_index(EUCJP_NKF);
6295 case 'S': /* Shift_JIS input */
6296 input_encoding = nkf_enc_from_index(SHIFT_JIS);
6298 case 'Z': /* Convert X0208 alphabet to asii */
6300 bit:0 Convert JIS X 0208 Alphabet to ASCII
6301 bit:1 Convert Kankaku to one space
6302 bit:2 Convert Kankaku to two spaces
6303 bit:3 Convert HTML Entity
6304 bit:4 Convert JIS X 0208 Katakana to JIS X 0201 Katakana
6306 while ('0'<= *cp && *cp <='4') {
6307 alpha_f |= 1 << (*cp++ - '0');
6311 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
6312 x0201_f = FALSE; /* No X0201->X0208 conversion */
6314 ESC-(-I in JIS, EUC, MS Kanji
6315 SI/SO in JIS, EUC, MS Kanji
6316 SS2 in EUC, JIS, not in MS Kanji
6317 MS Kanji (0xa0-0xdf)
6319 ESC-(-I in JIS (0x20-0x5f)
6320 SS2 in EUC (0xa0-0xdf)
6321 0xa0-0xd in MS Kanji (0xa0-0xdf)
6324 case 'X': /* Convert X0201 kana to X0208 */
6327 case 'F': /* prserve new lines */
6328 fold_preserve_f = TRUE;
6329 case 'f': /* folding -f60 or -f */
6332 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
6334 fold_len += *cp++ - '0';
6336 if (!(0<fold_len && fold_len<BUFSIZ))
6337 fold_len = DEFAULT_FOLD;
6341 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
6343 fold_margin += *cp++ - '0';
6347 case 'm': /* MIME support */
6348 /* mime_decode_f = TRUE; */ /* this has too large side effects... */
6349 if (*cp=='B'||*cp=='Q') {
6350 mime_decode_mode = *cp++;
6351 mimebuf_f = FIXED_MIME;
6352 } else if (*cp=='N') {
6353 mime_f = TRUE; cp++;
6354 } else if (*cp=='S') {
6355 mime_f = STRICT_MIME; cp++;
6356 } else if (*cp=='0') {
6357 mime_decode_f = FALSE;
6358 mime_f = FALSE; cp++;
6360 mime_f = STRICT_MIME;
6363 case 'M': /* MIME output */
6366 mimeout_f = FIXED_MIME; cp++;
6367 } else if (*cp=='Q') {
6369 mimeout_f = FIXED_MIME; cp++;
6374 case 'B': /* Broken JIS support */
6376 bit:1 allow any x on ESC-(-x or ESC-$-x
6377 bit:2 reset to ascii on NL
6379 if ('9'>= *cp && *cp>='0')
6380 broken_f |= 1<<(*cp++ -'0');
6385 case 'O':/* for Output file */
6389 case 'c':/* add cr code */
6392 case 'd':/* delete cr code */
6395 case 'I': /* ISO-2022-JP output */
6398 case 'L': /* line mode */
6399 if (*cp=='u') { /* unix */
6400 eolmode_f = LF; cp++;
6401 } else if (*cp=='m') { /* mac */
6402 eolmode_f = CR; cp++;
6403 } else if (*cp=='w') { /* windows */
6404 eolmode_f = CRLF; cp++;
6405 } else if (*cp=='0') { /* no conversion */
6406 eolmode_f = 0; cp++;
6411 if ('2' <= *cp && *cp <= '9') {
6414 } else if (*cp == '0' || *cp == '1') {
6423 /* module muliple options in a string are allowed for Perl moudle */
6424 while(*cp && *cp++!='-');
6427 #if !defined(PERL_XS) && !defined(WIN32DLL)
6428 fprintf(stderr, "unknown option: -%c\n", *(cp-1));
6430 /* bogus option but ignored */
6438 #include "nkf32dll.c"
6439 #elif defined(PERL_XS)
6440 #else /* WIN32DLL */
6442 main(int argc, char **argv)
6447 char *outfname = NULL;
6450 #ifdef EASYWIN /*Easy Win */
6451 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
6453 #ifdef DEFAULT_CODE_LOCALE
6454 setlocale(LC_CTYPE, "");
6458 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
6459 cp = (unsigned char *)*argv;
6464 if (pipe(fds) < 0 || (pid = fork()) < 0){
6475 execvp(argv[1], &argv[1]);
6492 int debug_f_back = debug_f;
6495 int exec_f_back = exec_f;
6498 int x0212_f_back = x0212_f;
6500 int x0213_f_back = x0213_f;
6501 int guess_f_back = guess_f;
6503 guess_f = guess_f_back;
6506 debug_f = debug_f_back;
6509 exec_f = exec_f_back;
6511 x0212_f = x0212_f_back;
6512 x0213_f = x0213_f_back;
6515 if (binmode_f == TRUE)
6516 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6517 if (freopen("","wb",stdout) == NULL)
6524 setbuf(stdout, (char *) NULL);
6526 setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
6529 if (binmode_f == TRUE)
6530 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6531 if (freopen("","rb",stdin) == NULL) return (-1);
6535 setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
6539 kanji_convert(stdin);
6540 if (guess_f) print_guessed_code(NULL);
6544 int is_argument_error = FALSE;
6546 input_codename = NULL;
6549 iconv_for_check = 0;
6551 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
6553 is_argument_error = TRUE;
6561 /* reopen file for stdout */
6562 if (file_out_f == TRUE) {
6565 outfname = nkf_xmalloc(strlen(origfname)
6566 + strlen(".nkftmpXXXXXX")
6568 strcpy(outfname, origfname);
6572 for (i = strlen(outfname); i; --i){
6573 if (outfname[i - 1] == '/'
6574 || outfname[i - 1] == '\\'){
6580 strcat(outfname, "ntXXXXXX");
6582 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
6583 S_IREAD | S_IWRITE);
6585 strcat(outfname, ".nkftmpXXXXXX");
6586 fd = mkstemp(outfname);
6589 || (fd_backup = dup(fileno(stdout))) < 0
6590 || dup2(fd, fileno(stdout)) < 0
6601 outfname = "nkf.out";
6604 if(freopen(outfname, "w", stdout) == NULL) {
6608 if (binmode_f == TRUE) {
6609 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6610 if (freopen("","wb",stdout) == NULL)
6617 if (binmode_f == TRUE)
6618 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6619 if (freopen("","rb",fin) == NULL)
6624 setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
6628 char *filename = NULL;
6630 if (nfiles > 1) filename = origfname;
6631 if (guess_f) print_guessed_code(filename);
6637 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
6645 if (dup2(fd_backup, fileno(stdout)) < 0){
6648 if (stat(origfname, &sb)) {
6649 fprintf(stderr, "Can't stat %s\n", origfname);
6651 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
6652 if (chmod(outfname, sb.st_mode)) {
6653 fprintf(stderr, "Can't set permission %s\n", outfname);
6656 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
6657 if(preserve_time_f){
6658 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
6659 tb[0] = tb[1] = sb.st_mtime;
6660 if (utime(outfname, tb)) {
6661 fprintf(stderr, "Can't set timestamp %s\n", outfname);
6664 tb.actime = sb.st_atime;
6665 tb.modtime = sb.st_mtime;
6666 if (utime(outfname, &tb)) {
6667 fprintf(stderr, "Can't set timestamp %s\n", outfname);
6672 char *backup_filename = get_backup_filename(backup_suffix, origfname);
6674 unlink(backup_filename);
6676 if (rename(origfname, backup_filename)) {
6677 perror(backup_filename);
6678 fprintf(stderr, "Can't rename %s to %s\n",
6679 origfname, backup_filename);
6681 nkf_xfree(backup_filename);
6684 if (unlink(origfname)){
6689 if (rename(outfname, origfname)) {
6691 fprintf(stderr, "Can't rename %s to %s\n",
6692 outfname, origfname);
6694 nkf_xfree(outfname);
6699 if (is_argument_error)
6702 #ifdef EASYWIN /*Easy Win */
6703 if (file_out_f == FALSE)
6704 scanf("%d",&end_check);
6707 #else /* for Other OS */
6708 if (file_out_f == TRUE)
6710 #endif /*Easy Win */
6713 #endif /* WIN32DLL */