2 * Copyright (c) 1987, Fujitsu LTD. (Itaru ICHIKAWA).
3 * Copyright (c) 1996-2010, The nkf Project.
5 * This software is provided 'as-is', without any express or implied
6 * warranty. In no event will the authors be held liable for any damages
7 * arising from the use of this software.
9 * Permission is granted to anyone to use this software for any purpose,
10 * including commercial applications, and to alter it and redistribute it
11 * freely, subject to the following restrictions:
13 * 1. The origin of this software must not be misrepresented; you must not
14 * claim that you wrote the original software. If you use this software
15 * in a product, an acknowledgment in the product documentation would be
16 * appreciated but is not required.
18 * 2. Altered source versions must be plainly marked as such, and must not be
19 * misrepresented as being the original software.
21 * 3. This notice may not be removed or altered from any source distribution.
23 #define NKF_VERSION "2.1.3"
24 #define NKF_RELEASE_DATE "2012-09-13"
26 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa).\n" \
27 "Copyright (C) 1996-2012, The nkf Project."
38 # define INCL_DOSERRORS
44 /* state of output_mode and input_mode
123 NKF_ENCODING_TABLE_SIZE,
124 JIS_X_0201_1976_K = 0x1013, /* I */ /* JIS C 6220-1969 */
125 /* JIS_X_0201_1976_R = 0x1014, */ /* J */ /* JIS C 6220-1969 */
126 /* JIS_X_0208_1978 = 0x1040, */ /* @ */ /* JIS C 6226-1978 */
127 /* JIS_X_0208_1983 = 0x1087, */ /* B */ /* JIS C 6226-1983 */
128 JIS_X_0208 = 0x1168, /* @B */
129 JIS_X_0212 = 0x1159, /* D */
130 /* JIS_X_0213_2000_1 = 0x1228, */ /* O */
131 JIS_X_0213_2 = 0x1229, /* P */
132 JIS_X_0213_1 = 0x1233 /* Q */
135 static nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
136 static nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
137 static nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
138 static nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
139 static nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
140 static void j_oconv(nkf_char c2, nkf_char c1);
141 static void s_oconv(nkf_char c2, nkf_char c1);
142 static void e_oconv(nkf_char c2, nkf_char c1);
143 static void w_oconv(nkf_char c2, nkf_char c1);
144 static void w_oconv16(nkf_char c2, nkf_char c1);
145 static void w_oconv32(nkf_char c2, nkf_char c1);
149 nkf_char (*iconv)(nkf_char c2, nkf_char c1, nkf_char c0);
150 void (*oconv)(nkf_char c2, nkf_char c1);
151 } nkf_native_encoding;
153 nkf_native_encoding NkfEncodingASCII = { "ASCII", e_iconv, e_oconv };
154 nkf_native_encoding NkfEncodingISO_2022_JP = { "ISO-2022-JP", e_iconv, j_oconv };
155 nkf_native_encoding NkfEncodingShift_JIS = { "Shift_JIS", s_iconv, s_oconv };
156 nkf_native_encoding NkfEncodingEUC_JP = { "EUC-JP", e_iconv, e_oconv };
157 nkf_native_encoding NkfEncodingUTF_8 = { "UTF-8", w_iconv, w_oconv };
158 nkf_native_encoding NkfEncodingUTF_16 = { "UTF-16", w_iconv16, w_oconv16 };
159 nkf_native_encoding NkfEncodingUTF_32 = { "UTF-32", w_iconv32, w_oconv32 };
164 const nkf_native_encoding *base_encoding;
167 nkf_encoding nkf_encoding_table[] = {
168 {ASCII, "US-ASCII", &NkfEncodingASCII},
169 {ISO_8859_1, "ISO-8859-1", &NkfEncodingASCII},
170 {ISO_2022_JP, "ISO-2022-JP", &NkfEncodingISO_2022_JP},
171 {CP50220, "CP50220", &NkfEncodingISO_2022_JP},
172 {CP50221, "CP50221", &NkfEncodingISO_2022_JP},
173 {CP50222, "CP50222", &NkfEncodingISO_2022_JP},
174 {ISO_2022_JP_1, "ISO-2022-JP-1", &NkfEncodingISO_2022_JP},
175 {ISO_2022_JP_3, "ISO-2022-JP-3", &NkfEncodingISO_2022_JP},
176 {ISO_2022_JP_2004, "ISO-2022-JP-2004", &NkfEncodingISO_2022_JP},
177 {SHIFT_JIS, "Shift_JIS", &NkfEncodingShift_JIS},
178 {WINDOWS_31J, "Windows-31J", &NkfEncodingShift_JIS},
179 {CP10001, "CP10001", &NkfEncodingShift_JIS},
180 {EUC_JP, "EUC-JP", &NkfEncodingEUC_JP},
181 {EUCJP_NKF, "eucJP-nkf", &NkfEncodingEUC_JP},
182 {CP51932, "CP51932", &NkfEncodingEUC_JP},
183 {EUCJP_MS, "eucJP-MS", &NkfEncodingEUC_JP},
184 {EUCJP_ASCII, "eucJP-ASCII", &NkfEncodingEUC_JP},
185 {SHIFT_JISX0213, "Shift_JISX0213", &NkfEncodingShift_JIS},
186 {SHIFT_JIS_2004, "Shift_JIS-2004", &NkfEncodingShift_JIS},
187 {EUC_JISX0213, "EUC-JISX0213", &NkfEncodingEUC_JP},
188 {EUC_JIS_2004, "EUC-JIS-2004", &NkfEncodingEUC_JP},
189 {UTF_8, "UTF-8", &NkfEncodingUTF_8},
190 {UTF_8N, "UTF-8N", &NkfEncodingUTF_8},
191 {UTF_8_BOM, "UTF-8-BOM", &NkfEncodingUTF_8},
192 {UTF8_MAC, "UTF8-MAC", &NkfEncodingUTF_8},
193 {UTF_16, "UTF-16", &NkfEncodingUTF_16},
194 {UTF_16BE, "UTF-16BE", &NkfEncodingUTF_16},
195 {UTF_16BE_BOM, "UTF-16BE-BOM", &NkfEncodingUTF_16},
196 {UTF_16LE, "UTF-16LE", &NkfEncodingUTF_16},
197 {UTF_16LE_BOM, "UTF-16LE-BOM", &NkfEncodingUTF_16},
198 {UTF_32, "UTF-32", &NkfEncodingUTF_32},
199 {UTF_32BE, "UTF-32BE", &NkfEncodingUTF_32},
200 {UTF_32BE_BOM, "UTF-32BE-BOM", &NkfEncodingUTF_32},
201 {UTF_32LE, "UTF-32LE", &NkfEncodingUTF_32},
202 {UTF_32LE_BOM, "UTF-32LE-BOM", &NkfEncodingUTF_32},
203 {BINARY, "BINARY", &NkfEncodingASCII},
210 } encoding_name_to_id_table[] = {
215 {"ISO-2022-JP", ISO_2022_JP},
216 {"ISO2022JP-CP932", CP50220},
217 {"CP50220", CP50220},
218 {"CP50221", CP50221},
219 {"CSISO2022JP", CP50221},
220 {"CP50222", CP50222},
221 {"ISO-2022-JP-1", ISO_2022_JP_1},
222 {"ISO-2022-JP-3", ISO_2022_JP_3},
223 {"ISO-2022-JP-2004", ISO_2022_JP_2004},
224 {"SHIFT_JIS", SHIFT_JIS},
226 {"MS_Kanji", SHIFT_JIS},
228 {"WINDOWS-31J", WINDOWS_31J},
229 {"CSWINDOWS31J", WINDOWS_31J},
230 {"CP932", WINDOWS_31J},
231 {"MS932", WINDOWS_31J},
232 {"CP10001", CP10001},
235 {"EUCJP-NKF", EUCJP_NKF},
236 {"CP51932", CP51932},
237 {"EUC-JP-MS", EUCJP_MS},
238 {"EUCJP-MS", EUCJP_MS},
239 {"EUCJPMS", EUCJP_MS},
240 {"EUC-JP-ASCII", EUCJP_ASCII},
241 {"EUCJP-ASCII", EUCJP_ASCII},
242 {"SHIFT_JISX0213", SHIFT_JISX0213},
243 {"SHIFT_JIS-2004", SHIFT_JIS_2004},
244 {"EUC-JISX0213", EUC_JISX0213},
245 {"EUC-JIS-2004", EUC_JIS_2004},
248 {"UTF-8-BOM", UTF_8_BOM},
249 {"UTF8-MAC", UTF8_MAC},
250 {"UTF-8-MAC", UTF8_MAC},
252 {"UTF-16BE", UTF_16BE},
253 {"UTF-16BE-BOM", UTF_16BE_BOM},
254 {"UTF-16LE", UTF_16LE},
255 {"UTF-16LE-BOM", UTF_16LE_BOM},
257 {"UTF-32BE", UTF_32BE},
258 {"UTF-32BE-BOM", UTF_32BE_BOM},
259 {"UTF-32LE", UTF_32LE},
260 {"UTF-32LE-BOM", UTF_32LE_BOM},
265 #if defined(DEFAULT_CODE_JIS)
266 #define DEFAULT_ENCIDX ISO_2022_JP
267 #elif defined(DEFAULT_CODE_SJIS)
268 #define DEFAULT_ENCIDX SHIFT_JIS
269 #elif defined(DEFAULT_CODE_WINDOWS_31J)
270 #define DEFAULT_ENCIDX WINDOWS_31J
271 #elif defined(DEFAULT_CODE_EUC)
272 #define DEFAULT_ENCIDX EUC_JP
273 #elif defined(DEFAULT_CODE_UTF8)
274 #define DEFAULT_ENCIDX UTF_8
278 #define is_alnum(c) \
279 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
281 /* I don't trust portablity of toupper */
282 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
283 #define nkf_isoctal(c) ('0'<=c && c<='7')
284 #define nkf_isdigit(c) ('0'<=c && c<='9')
285 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
286 #define nkf_isblank(c) (c == SP || c == TAB)
287 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
288 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
289 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
290 #define nkf_isprint(c) (SP<=c && c<='~')
291 #define nkf_isgraph(c) ('!'<=c && c<='~')
292 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
293 ('A'<=c&&c<='F') ? (c-'A'+10) : \
294 ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
295 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
296 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
297 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
298 ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
299 && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
301 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
302 #define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c <= 0x5F)
304 #define HOLD_SIZE 1024
305 #if defined(INT_IS_SHORT)
306 #define IOBUF_SIZE 2048
308 #define IOBUF_SIZE 16384
311 #define DEFAULT_J 'B'
312 #define DEFAULT_R 'B'
319 /* MIME preprocessor */
321 #ifdef EASYWIN /*Easy Win */
322 extern POINT _BufferSize;
331 void (*status_func)(struct input_code *, nkf_char);
332 nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
336 static const char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
337 static nkf_encoding *input_encoding = NULL;
338 static nkf_encoding *output_encoding = NULL;
340 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
342 * 0: Shift_JIS, eucJP-ascii
347 #define UCS_MAP_ASCII 0
349 #define UCS_MAP_CP932 2
350 #define UCS_MAP_CP10001 3
351 static int ms_ucs_map_f = UCS_MAP_ASCII;
353 #ifdef UTF8_INPUT_ENABLE
354 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
355 static int no_cp932ext_f = FALSE;
356 /* ignore ZERO WIDTH NO-BREAK SPACE */
357 static int no_best_fit_chars_f = FALSE;
358 static int input_endian = ENDIAN_BIG;
359 static int input_bom_f = FALSE;
360 static nkf_char unicode_subchar = '?'; /* the regular substitution character */
361 static void (*encode_fallback)(nkf_char c) = NULL;
362 static void w_status(struct input_code *, nkf_char);
364 #ifdef UTF8_OUTPUT_ENABLE
365 static int output_bom_f = FALSE;
366 static int output_endian = ENDIAN_BIG;
369 static void std_putc(nkf_char c);
370 static nkf_char std_getc(FILE *f);
371 static nkf_char std_ungetc(nkf_char c,FILE *f);
373 static nkf_char broken_getc(FILE *f);
374 static nkf_char broken_ungetc(nkf_char c,FILE *f);
376 static nkf_char mime_getc(FILE *f);
378 static void mime_putc(nkf_char c);
382 #if !defined(PERL_XS) && !defined(WIN32DLL)
383 static unsigned char stdibuf[IOBUF_SIZE];
384 static unsigned char stdobuf[IOBUF_SIZE];
387 #define NKF_UNSPECIFIED (-TRUE)
390 static int unbuf_f = FALSE;
391 static int estab_f = FALSE;
392 static int nop_f = FALSE;
393 static int binmode_f = TRUE; /* binary mode */
394 static int rot_f = FALSE; /* rot14/43 mode */
395 static int hira_f = FALSE; /* hira/kata henkan */
396 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
397 static int mime_f = MIME_DECODE_DEFAULT; /* convert MIME B base64 or Q */
398 static int mime_decode_f = FALSE; /* mime decode is explicitly on */
399 static int mimebuf_f = FALSE; /* MIME buffered input */
400 static int broken_f = FALSE; /* convert ESC-less broken JIS */
401 static int iso8859_f = FALSE; /* ISO8859 through */
402 static int mimeout_f = FALSE; /* base64 mode */
403 static int x0201_f = NKF_UNSPECIFIED; /* convert JIS X 0201 */
404 static int iso2022jp_f = FALSE; /* replace non ISO-2022-JP with GETA */
406 #ifdef UNICODE_NORMALIZATION
407 static int nfc_f = FALSE;
408 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
409 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
413 static int cap_f = FALSE;
414 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
415 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
417 static int url_f = FALSE;
418 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
419 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
422 #define PREFIX_EUCG3 NKF_INT32_C(0x8F00)
423 #define CLASS_MASK NKF_INT32_C(0xFF000000)
424 #define CLASS_UNICODE NKF_INT32_C(0x01000000)
425 #define VALUE_MASK NKF_INT32_C(0x00FFFFFF)
426 #define UNICODE_BMP_MAX NKF_INT32_C(0x0000FFFF)
427 #define UNICODE_MAX NKF_INT32_C(0x0010FFFF)
428 #define nkf_char_euc3_new(c) ((c) | PREFIX_EUCG3)
429 #define nkf_char_unicode_new(c) ((c) | CLASS_UNICODE)
430 #define nkf_char_unicode_p(c) ((c & CLASS_MASK) == CLASS_UNICODE)
431 #define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= UNICODE_BMP_MAX)
432 #define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= UNICODE_MAX)
434 #ifdef NUMCHAR_OPTION
435 static int numchar_f = FALSE;
436 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
437 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
441 static int noout_f = FALSE;
442 static void no_putc(nkf_char c);
443 static int debug_f = FALSE;
444 static void debug(const char *str);
445 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
448 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
449 static void set_input_codename(const char *codename);
452 static int exec_f = 0;
455 #ifdef SHIFTJIS_CP932
456 /* invert IBM extended characters to others */
457 static int cp51932_f = FALSE;
459 /* invert NEC-selected IBM extended characters to IBM extended characters */
460 static int cp932inv_f = TRUE;
462 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
463 #endif /* SHIFTJIS_CP932 */
465 static int x0212_f = FALSE;
466 static int x0213_f = FALSE;
468 static unsigned char prefix_table[256];
470 static void e_status(struct input_code *, nkf_char);
471 static void s_status(struct input_code *, nkf_char);
473 struct input_code input_code_list[] = {
474 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
475 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
476 #ifdef UTF8_INPUT_ENABLE
477 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
478 {"UTF-16", 0, 0, 0, {0, 0, 0}, NULL, w_iconv16, 0},
479 {"UTF-32", 0, 0, 0, {0, 0, 0}, NULL, w_iconv32, 0},
481 {NULL, 0, 0, 0, {0, 0, 0}, NULL, NULL, 0}
484 static int mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
485 static int base64_count = 0;
487 /* X0208 -> ASCII converter */
490 static int f_line = 0; /* chars in line */
491 static int f_prev = 0;
492 static int fold_preserve_f = FALSE; /* preserve new lines */
493 static int fold_f = FALSE;
494 static int fold_len = 0;
497 static unsigned char kanji_intro = DEFAULT_J;
498 static unsigned char ascii_intro = DEFAULT_R;
502 #define FOLD_MARGIN 10
503 #define DEFAULT_FOLD 60
505 static int fold_margin = FOLD_MARGIN;
507 /* process default */
510 no_connection2(nkf_char c2, nkf_char c1, nkf_char c0)
512 fprintf(stderr,"nkf internal module connection failure.\n");
518 no_connection(nkf_char c2, nkf_char c1)
520 no_connection2(c2,c1,0);
523 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
524 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
526 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
527 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
528 static void (*o_eol_conv)(nkf_char c2,nkf_char c1) = no_connection;
529 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
530 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
531 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
532 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
534 /* static redirections */
536 static void (*o_putc)(nkf_char c) = std_putc;
538 static nkf_char (*i_getc)(FILE *f) = std_getc; /* general input */
539 static nkf_char (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
541 static nkf_char (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
542 static nkf_char (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
544 static void (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
546 static nkf_char (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
547 static nkf_char (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
549 /* for strict mime */
550 static nkf_char (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
551 static nkf_char (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
554 static int output_mode = ASCII; /* output kanji mode */
555 static int input_mode = ASCII; /* input kanji mode */
556 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
558 /* X0201 / X0208 conversion tables */
560 /* X0201 kana conversion table */
562 static const unsigned char cv[]= {
563 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
564 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
565 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
566 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
567 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
568 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
569 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
570 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
571 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
572 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
573 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
574 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
575 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
576 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
577 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
578 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
582 /* X0201 kana conversion table for daguten */
584 static const unsigned char dv[]= {
585 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
586 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
587 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
588 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
589 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
590 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
591 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
592 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
593 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
594 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
595 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
596 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
597 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
598 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
603 /* X0201 kana conversion table for han-daguten */
605 static const unsigned char ev[]= {
606 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
607 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
608 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
610 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
611 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
614 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
615 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
616 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
617 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
618 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
619 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
620 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
621 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
625 /* X0208 kigou conversion table */
626 /* 0x8140 - 0x819e */
627 static const unsigned char fv[] = {
629 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
630 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
631 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
632 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
633 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
634 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
635 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
636 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
637 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
638 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
639 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
640 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
645 static int option_mode = 0;
646 static int file_out_f = FALSE;
648 static int overwrite_f = FALSE;
649 static int preserve_time_f = FALSE;
650 static int backup_f = FALSE;
651 static char *backup_suffix = "";
654 static int eolmode_f = 0; /* CR, LF, CRLF */
655 static int input_eol = 0; /* 0: unestablished, EOF: MIXED */
656 static nkf_char prev_cr = 0; /* CR or 0 */
657 #ifdef EASYWIN /*Easy Win */
658 static int end_check;
662 nkf_xmalloc(size_t size)
666 if (size == 0) size = 1;
670 perror("can't malloc");
678 nkf_xrealloc(void *ptr, size_t size)
680 if (size == 0) size = 1;
682 ptr = realloc(ptr, size);
684 perror("can't realloc");
691 #define nkf_xfree(ptr) free(ptr)
694 nkf_str_caseeql(const char *src, const char *target)
697 for (i = 0; src[i] && target[i]; i++) {
698 if (nkf_toupper(src[i]) != nkf_toupper(target[i])) return FALSE;
700 if (src[i] || target[i]) return FALSE;
705 nkf_enc_from_index(int idx)
707 if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
710 return &nkf_encoding_table[idx];
714 nkf_enc_find_index(const char *name)
717 if (name[0] == 'X' && *(name+1) == '-') name += 2;
718 for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
719 if (nkf_str_caseeql(encoding_name_to_id_table[i].name, name)) {
720 return encoding_name_to_id_table[i].id;
727 nkf_enc_find(const char *name)
730 idx = nkf_enc_find_index(name);
731 if (idx < 0) return 0;
732 return nkf_enc_from_index(idx);
735 #define nkf_enc_name(enc) (enc)->name
736 #define nkf_enc_to_index(enc) (enc)->id
737 #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
738 #define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
739 #define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
740 #define nkf_enc_asciicompat(enc) (\
741 nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
742 nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
743 #define nkf_enc_unicode_p(enc) (\
744 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
745 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
746 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
747 #define nkf_enc_cp5022x_p(enc) (\
748 nkf_enc_to_index(enc) == CP50220 ||\
749 nkf_enc_to_index(enc) == CP50221 ||\
750 nkf_enc_to_index(enc) == CP50222)
752 #ifdef DEFAULT_CODE_LOCALE
756 #ifdef HAVE_LANGINFO_H
757 return nl_langinfo(CODESET);
758 #elif defined(__WIN32__)
760 sprintf(buf, "CP%d", GetACP());
762 #elif defined(__OS2__)
763 # if defined(INT_IS_SHORT)
769 ULONG ulCP[1], ulncp;
770 DosQueryCp(sizeof(ulCP), ulCP, &ulncp);
771 if (ulCP[0] == 932 || ulCP[0] == 943)
772 strcpy(buf, "Shift_JIS");
774 sprintf(buf, "CP%lu", ulCP[0]);
782 nkf_locale_encoding()
784 nkf_encoding *enc = 0;
785 const char *encname = nkf_locale_charmap();
787 enc = nkf_enc_find(encname);
790 #endif /* DEFAULT_CODE_LOCALE */
795 return &nkf_encoding_table[UTF_8];
799 nkf_default_encoding()
801 nkf_encoding *enc = 0;
802 #ifdef DEFAULT_CODE_LOCALE
803 enc = nkf_locale_encoding();
804 #elif defined(DEFAULT_ENCIDX)
805 enc = nkf_enc_from_index(DEFAULT_ENCIDX);
807 if (!enc) enc = nkf_utf8_encoding();
818 nkf_buf_new(int length)
820 nkf_buf_t *buf = nkf_xmalloc(sizeof(nkf_buf_t));
821 buf->ptr = nkf_xmalloc(sizeof(nkf_char) * length);
829 nkf_buf_dispose(nkf_buf_t *buf)
836 #define nkf_buf_length(buf) ((buf)->len)
837 #define nkf_buf_empty_p(buf) ((buf)->len == 0)
840 nkf_buf_at(nkf_buf_t *buf, int index)
842 assert(index <= buf->len);
843 return buf->ptr[index];
847 nkf_buf_clear(nkf_buf_t *buf)
853 nkf_buf_push(nkf_buf_t *buf, nkf_char c)
855 if (buf->capa <= buf->len) {
858 buf->ptr[buf->len++] = c;
862 nkf_buf_pop(nkf_buf_t *buf)
864 assert(!nkf_buf_empty_p(buf));
865 return buf->ptr[--buf->len];
868 /* Normalization Form C */
871 #define fprintf dllprintf
877 fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n");
884 "Usage: nkf -[flags] [--] [in file] .. [out file for -O flag]\n"
885 #ifdef UTF8_OUTPUT_ENABLE
886 " j/s/e/w Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
887 " UTF options is -w[8[0],{16,32}[{B,L}[0]]]\n"
890 #ifdef UTF8_INPUT_ENABLE
891 " J/S/E/W Specify input encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
892 " UTF option is -W[8,[16,32][B,L]]\n"
894 " J/S/E Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
898 " m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:nonstrict,0:no decode]\n"
899 " M[BQ] MIME encode [B:base64 Q:quoted]\n"
900 " f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
903 " Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n"
904 " 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n"
905 " 4: JISX0208 Katakana to JISX0201 Katakana\n"
906 " X,x Convert Halfwidth Katakana to Fullwidth or preserve it\n"
909 " O Output to File (DEFAULT 'nkf.out')\n"
910 " L[uwm] Line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
913 " --ic=<encoding> Specify the input encoding\n"
914 " --oc=<encoding> Specify the output encoding\n"
915 " --hiragana --katakana Hiragana/Katakana Conversion\n"
916 " --katakana-hiragana Converts each other\n"
920 " --{cap, url}-input Convert hex after ':' or '%%'\n"
922 #ifdef NUMCHAR_OPTION
923 " --numchar-input Convert Unicode Character Reference\n"
925 #ifdef UTF8_INPUT_ENABLE
926 " --fb-{skip, html, xml, perl, java, subchar}\n"
927 " Specify unassigned character's replacement\n"
932 " --in-place[=SUF] Overwrite original files\n"
933 " --overwrite[=SUF] Preserve timestamp of original files\n"
935 " -g --guess Guess the input code\n"
936 " -v --version Print the version\n"
937 " --help/-V Print this help / configuration\n"
943 show_configuration(void)
946 "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n"
947 " Compile-time options:\n"
948 " Compiled at: " __DATE__ " " __TIME__ "\n"
951 " Default output encoding: "
952 #ifdef DEFAULT_CODE_LOCALE
953 "LOCALE (%s)\n", nkf_enc_name(nkf_default_encoding())
954 #elif defined(DEFAULT_ENCIDX)
955 "CONFIG (%s)\n", nkf_enc_name(nkf_default_encoding())
961 " Default output end of line: "
962 #if DEFAULT_NEWLINE == CR
964 #elif DEFAULT_NEWLINE == CRLF
970 " Decode MIME encoded string: "
971 #if MIME_DECODE_DEFAULT
977 " Convert JIS X 0201 Katakana: "
984 " --help, --version output: "
985 #if HELP_OUTPUT_HELP_OUTPUT
996 get_backup_filename(const char *suffix, const char *filename)
998 char *backup_filename;
999 int asterisk_count = 0;
1001 int filename_length = strlen(filename);
1003 for(i = 0; suffix[i]; i++){
1004 if(suffix[i] == '*') asterisk_count++;
1008 backup_filename = nkf_xmalloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1009 for(i = 0, j = 0; suffix[i];){
1010 if(suffix[i] == '*'){
1011 backup_filename[j] = '\0';
1012 strncat(backup_filename, filename, filename_length);
1014 j += filename_length;
1016 backup_filename[j++] = suffix[i++];
1019 backup_filename[j] = '\0';
1021 j = filename_length + strlen(suffix);
1022 backup_filename = nkf_xmalloc(j + 1);
1023 strcpy(backup_filename, filename);
1024 strcat(backup_filename, suffix);
1025 backup_filename[j] = '\0';
1027 return backup_filename;
1031 #ifdef UTF8_INPUT_ENABLE
1033 nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
1038 if(c >= NKF_INT32_C(1)<<shift){
1040 (*f)(0, bin2hex(c>>shift));
1051 encode_fallback_html(nkf_char c)
1056 if(c >= NKF_INT32_C(1000000))
1057 (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
1058 if(c >= NKF_INT32_C(100000))
1059 (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
1061 (*oconv)(0, 0x30+(c/10000 )%10);
1063 (*oconv)(0, 0x30+(c/1000 )%10);
1065 (*oconv)(0, 0x30+(c/100 )%10);
1067 (*oconv)(0, 0x30+(c/10 )%10);
1069 (*oconv)(0, 0x30+ c %10);
1075 encode_fallback_xml(nkf_char c)
1080 nkf_each_char_to_hex(oconv, c);
1086 encode_fallback_java(nkf_char c)
1090 if(!nkf_char_unicode_bmp_p(c)){
1094 (*oconv)(0, bin2hex(c>>20));
1095 (*oconv)(0, bin2hex(c>>16));
1099 (*oconv)(0, bin2hex(c>>12));
1100 (*oconv)(0, bin2hex(c>> 8));
1101 (*oconv)(0, bin2hex(c>> 4));
1102 (*oconv)(0, bin2hex(c ));
1107 encode_fallback_perl(nkf_char c)
1112 nkf_each_char_to_hex(oconv, c);
1118 encode_fallback_subchar(nkf_char c)
1120 c = unicode_subchar;
1121 (*oconv)((c>>8)&0xFF, c&0xFF);
1126 static const struct {
1150 {"katakana-hiragana","h3"},
1158 #ifdef UTF8_OUTPUT_ENABLE
1168 {"fb-subchar=", ""},
1170 #ifdef UTF8_INPUT_ENABLE
1171 {"utf8-input", "W"},
1172 {"utf16-input", "W16"},
1173 {"no-cp932ext", ""},
1174 {"no-best-fit-chars",""},
1176 #ifdef UNICODE_NORMALIZATION
1177 {"utf8mac-input", ""},
1189 #ifdef NUMCHAR_OPTION
1190 {"numchar-input", ""},
1196 #ifdef SHIFTJIS_CP932
1207 set_input_encoding(nkf_encoding *enc)
1209 switch (nkf_enc_to_index(enc)) {
1215 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1217 #ifdef SHIFTJIS_CP932
1220 #ifdef UTF8_OUTPUT_ENABLE
1221 ms_ucs_map_f = UCS_MAP_CP932;
1231 case ISO_2022_JP_2004:
1238 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1239 #ifdef SHIFTJIS_CP932
1242 #ifdef UTF8_OUTPUT_ENABLE
1243 ms_ucs_map_f = UCS_MAP_CP932;
1248 #ifdef SHIFTJIS_CP932
1251 #ifdef UTF8_OUTPUT_ENABLE
1252 ms_ucs_map_f = UCS_MAP_CP10001;
1260 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1261 #ifdef SHIFTJIS_CP932
1264 #ifdef UTF8_OUTPUT_ENABLE
1265 ms_ucs_map_f = UCS_MAP_CP932;
1269 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1270 #ifdef SHIFTJIS_CP932
1273 #ifdef UTF8_OUTPUT_ENABLE
1274 ms_ucs_map_f = UCS_MAP_MS;
1278 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1279 #ifdef SHIFTJIS_CP932
1282 #ifdef UTF8_OUTPUT_ENABLE
1283 ms_ucs_map_f = UCS_MAP_ASCII;
1286 case SHIFT_JISX0213:
1287 case SHIFT_JIS_2004:
1289 #ifdef SHIFTJIS_CP932
1296 #ifdef SHIFTJIS_CP932
1300 #ifdef UTF8_INPUT_ENABLE
1301 #ifdef UNICODE_NORMALIZATION
1309 input_endian = ENDIAN_BIG;
1313 input_endian = ENDIAN_LITTLE;
1318 input_endian = ENDIAN_BIG;
1322 input_endian = ENDIAN_LITTLE;
1329 set_output_encoding(nkf_encoding *enc)
1331 switch (nkf_enc_to_index(enc)) {
1333 #ifdef SHIFTJIS_CP932
1334 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1336 #ifdef UTF8_OUTPUT_ENABLE
1337 ms_ucs_map_f = UCS_MAP_CP932;
1341 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1342 #ifdef SHIFTJIS_CP932
1343 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1345 #ifdef UTF8_OUTPUT_ENABLE
1346 ms_ucs_map_f = UCS_MAP_CP932;
1350 #ifdef SHIFTJIS_CP932
1351 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1356 #ifdef SHIFTJIS_CP932
1357 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1363 #ifdef SHIFTJIS_CP932
1364 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1370 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1371 #ifdef UTF8_OUTPUT_ENABLE
1372 ms_ucs_map_f = UCS_MAP_CP932;
1376 #ifdef UTF8_OUTPUT_ENABLE
1377 ms_ucs_map_f = UCS_MAP_CP10001;
1382 #ifdef SHIFTJIS_CP932
1383 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1385 #ifdef UTF8_OUTPUT_ENABLE
1386 ms_ucs_map_f = UCS_MAP_ASCII;
1391 #ifdef SHIFTJIS_CP932
1392 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1394 #ifdef UTF8_OUTPUT_ENABLE
1395 ms_ucs_map_f = UCS_MAP_ASCII;
1399 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1400 #ifdef SHIFTJIS_CP932
1401 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1403 #ifdef UTF8_OUTPUT_ENABLE
1404 ms_ucs_map_f = UCS_MAP_CP932;
1408 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1410 #ifdef UTF8_OUTPUT_ENABLE
1411 ms_ucs_map_f = UCS_MAP_MS;
1415 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1417 #ifdef UTF8_OUTPUT_ENABLE
1418 ms_ucs_map_f = UCS_MAP_ASCII;
1421 case SHIFT_JISX0213:
1422 case SHIFT_JIS_2004:
1424 #ifdef SHIFTJIS_CP932
1425 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1432 #ifdef SHIFTJIS_CP932
1433 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1436 #ifdef UTF8_OUTPUT_ENABLE
1438 output_bom_f = TRUE;
1442 output_bom_f = TRUE;
1445 output_endian = ENDIAN_LITTLE;
1446 output_bom_f = FALSE;
1449 output_endian = ENDIAN_LITTLE;
1450 output_bom_f = TRUE;
1454 output_bom_f = TRUE;
1457 output_endian = ENDIAN_LITTLE;
1458 output_bom_f = FALSE;
1461 output_endian = ENDIAN_LITTLE;
1462 output_bom_f = TRUE;
1468 static struct input_code*
1469 find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1472 struct input_code *p = input_code_list;
1474 if (iconv_func == p->iconv_func){
1484 set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1486 #ifdef INPUT_CODE_FIX
1487 if (f || !input_encoding)
1494 #ifdef INPUT_CODE_FIX
1495 && (f == -TRUE || !input_encoding) /* -TRUE means "FORCE" */
1501 if (estab_f && iconv_for_check != iconv){
1502 struct input_code *p = find_inputcode_byfunc(iconv);
1504 set_input_codename(p->name);
1507 iconv_for_check = iconv;
1514 x0212_shift(nkf_char c)
1519 if (0x75 <= c && c <= 0x7f){
1520 ret = c + (0x109 - 0x75);
1523 if (0x75 <= c && c <= 0x7f){
1524 ret = c + (0x113 - 0x75);
1532 x0212_unshift(nkf_char c)
1535 if (0x7f <= c && c <= 0x88){
1536 ret = c + (0x75 - 0x7f);
1537 }else if (0x89 <= c && c <= 0x92){
1538 ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
1542 #endif /* X0212_ENABLE */
1545 e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
1551 if((0x21 <= ndx && ndx <= 0x2F)){
1552 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
1553 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1555 }else if(0x6E <= ndx && ndx <= 0x7E){
1556 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
1557 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1563 else if(nkf_isgraph(ndx)){
1565 const unsigned short *ptr;
1566 ptr = x0212_shiftjis[ndx - 0x21];
1568 val = ptr[(c1 & 0x7f) - 0x21];
1577 c2 = x0212_shift(c2);
1579 #endif /* X0212_ENABLE */
1581 if(0x7F < c2) return 1;
1582 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
1583 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1588 s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
1590 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
1593 static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
1594 if (0xFC < c1) return 1;
1595 #ifdef SHIFTJIS_CP932
1596 if (!cp932inv_f && is_ibmext_in_sjis(c2)){
1597 val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
1604 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
1605 val = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
1611 #endif /* SHIFTJIS_CP932 */
1613 if (!x0213_f && is_ibmext_in_sjis(c2)){
1614 val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
1617 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
1630 if(x0213_f && c2 >= 0xF0){
1631 if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
1632 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
1633 }else{ /* 78<=k<=94 */
1634 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
1635 if (0x9E < c1) c2++;
1638 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
1639 #define SJ6394 0x0161 /* 63 - 94 ku offset */
1640 c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
1641 if (0x9E < c1) c2++;
1644 c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
1651 c2 = x0212_unshift(c2);
1658 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
1660 nkf_unicode_to_utf8(nkf_char val, nkf_char *p1, nkf_char *p2, nkf_char *p3, nkf_char *p4)
1668 }else if (val < 0x800){
1669 *p1 = 0xc0 | (val >> 6);
1670 *p2 = 0x80 | (val & 0x3f);
1673 } else if (nkf_char_unicode_bmp_p(val)) {
1674 *p1 = 0xe0 | (val >> 12);
1675 *p2 = 0x80 | ((val >> 6) & 0x3f);
1676 *p3 = 0x80 | ( val & 0x3f);
1678 } else if (nkf_char_unicode_value_p(val)) {
1679 *p1 = 0xf0 | (val >> 18);
1680 *p2 = 0x80 | ((val >> 12) & 0x3f);
1681 *p3 = 0x80 | ((val >> 6) & 0x3f);
1682 *p4 = 0x80 | ( val & 0x3f);
1692 nkf_utf8_to_unicode(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
1699 else if (c1 <= 0xC3) {
1700 /* trail byte or invalid */
1703 else if (c1 <= 0xDF) {
1705 wc = (c1 & 0x1F) << 6;
1708 else if (c1 <= 0xEF) {
1710 wc = (c1 & 0x0F) << 12;
1711 wc |= (c2 & 0x3F) << 6;
1714 else if (c2 <= 0xF4) {
1716 wc = (c1 & 0x0F) << 18;
1717 wc |= (c2 & 0x3F) << 12;
1718 wc |= (c3 & 0x3F) << 6;
1728 #ifdef UTF8_INPUT_ENABLE
1730 unicode_to_jis_common2(nkf_char c1, nkf_char c0,
1731 const unsigned short *const *pp, nkf_char psize,
1732 nkf_char *p2, nkf_char *p1)
1735 const unsigned short *p;
1738 if (pp == 0) return 1;
1741 if (c1 < 0 || psize <= c1) return 1;
1743 if (p == 0) return 1;
1746 if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
1748 if (val == 0) return 1;
1749 if (no_cp932ext_f && (
1750 (val>>8) == 0x2D || /* NEC special characters */
1751 val > NKF_INT32_C(0xF300) /* IBM extended characters */
1759 if (c2 == SO) c2 = JIS_X_0201_1976_K;
1767 unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
1769 const unsigned short *const *pp;
1770 const unsigned short *const *const *ppp;
1771 static const char no_best_fit_chars_table_C2[] =
1772 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1773 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1774 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
1775 0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
1776 static const char no_best_fit_chars_table_C2_ms[] =
1777 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1778 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1779 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
1780 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
1781 static const char no_best_fit_chars_table_932_C2[] =
1782 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1783 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1784 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1785 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
1786 static const char no_best_fit_chars_table_932_C3[] =
1787 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1788 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1789 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1790 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
1796 }else if(c2 < 0xe0){
1797 if(no_best_fit_chars_f){
1798 if(ms_ucs_map_f == UCS_MAP_CP932){
1801 if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
1804 if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1807 }else if(!cp932inv_f){
1810 if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
1813 if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1816 }else if(ms_ucs_map_f == UCS_MAP_MS){
1817 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
1818 }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1836 ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
1837 ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
1838 ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
1840 ret = unicode_to_jis_common2(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
1841 }else if(c0 < 0xF0){
1842 if(no_best_fit_chars_f){
1843 if(ms_ucs_map_f == UCS_MAP_CP932){
1844 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
1845 }else if(ms_ucs_map_f == UCS_MAP_MS){
1850 if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
1853 if(c0 == 0x92) return 1;
1858 if(c1 == 0x80 || c0 == 0x9C) return 1;
1861 }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1866 if(c0 == 0x94) return 1;
1869 if(c0 == 0xBB) return 1;
1879 if(c0 == 0x95) return 1;
1882 if(c0 == 0xA5) return 1;
1889 if(c0 == 0x8D) return 1;
1892 if(c0 == 0x9E && !cp932inv_f) return 1;
1895 if(0xA0 <= c0 && c0 <= 0xA5) return 1;
1903 ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
1904 ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
1905 ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
1907 ret = unicode_to_jis_common2(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
1909 #ifdef SHIFTJIS_CP932
1910 if (!ret && !cp932inv_f && is_eucg3(*p2)) {
1912 if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
1913 s2e_conv(s2, s1, p2, p1);
1922 #ifdef UTF8_OUTPUT_ENABLE
1924 e2w_conv(nkf_char c2, nkf_char c1)
1926 const unsigned short *p;
1928 if (c2 == JIS_X_0201_1976_K) {
1929 if (ms_ucs_map_f == UCS_MAP_CP10001) {
1937 p = euc_to_utf8_1byte;
1939 } else if (is_eucg3(c2)){
1940 if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
1943 c2 = (c2&0x7f) - 0x21;
1944 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
1945 p = x0212_to_utf8_2bytes[c2];
1951 c2 = (c2&0x7f) - 0x21;
1952 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
1954 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
1955 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
1956 euc_to_utf8_2bytes_ms[c2];
1961 c1 = (c1 & 0x7f) - 0x21;
1962 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
1969 w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
1976 }else if (0xc0 <= c2 && c2 <= 0xef) {
1977 ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
1978 #ifdef NUMCHAR_OPTION
1981 if (p1) *p1 = nkf_char_unicode_new(nkf_utf8_to_unicode(c2, c1, c0, 0));
1989 #ifdef UTF8_INPUT_ENABLE
1991 w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
1993 nkf_char c1, c2, c3, c4;
2000 else if (nkf_char_unicode_bmp_p(val)){
2001 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2002 ret = unicode_to_jis_common(c1, c2, c3, p2, p1);
2005 *p1 = nkf_char_unicode_new(val);
2011 *p1 = nkf_char_unicode_new(val);
2018 e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
2020 if (c2 == JIS_X_0201_1976_K || c2 == SS2){
2021 if (iso2022jp_f && !x0201_f) {
2022 c2 = GETA1; c1 = GETA2;
2024 c2 = JIS_X_0201_1976_K;
2028 }else if (c2 == 0x8f){
2032 if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
2033 /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2034 c1 = nkf_char_unicode_new((c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC);
2037 c2 = (c2 << 8) | (c1 & 0x7f);
2039 #ifdef SHIFTJIS_CP932
2042 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2043 s2e_conv(s2, s1, &c2, &c1);
2050 #endif /* SHIFTJIS_CP932 */
2052 #endif /* X0212_ENABLE */
2053 } else if ((c2 == EOF) || (c2 == 0) || c2 < SP || c2 == ISO_8859_1) {
2056 if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
2057 /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2058 c1 = nkf_char_unicode_new((c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000);
2063 #ifdef SHIFTJIS_CP932
2064 if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
2066 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2067 s2e_conv(s2, s1, &c2, &c1);
2074 #endif /* SHIFTJIS_CP932 */
2082 s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
2084 if (c2 == JIS_X_0201_1976_K || (0xA1 <= c2 && c2 <= 0xDF)) {
2085 if (iso2022jp_f && !x0201_f) {
2086 c2 = GETA1; c1 = GETA2;
2090 } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
2092 } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
2094 if(c1 == 0x7F) return 0;
2095 c1 = nkf_char_unicode_new((c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000);
2098 nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
2099 if (ret) return ret;
2106 w_iconv(nkf_char c1, nkf_char c2, nkf_char c3)
2108 nkf_char ret = 0, c4 = 0;
2109 static const char w_iconv_utf8_1st_byte[] =
2111 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2112 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2113 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
2114 40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
2121 if (c1 < 0 || 0xff < c1) {
2122 }else if (c1 == 0) { /* 0 : 1 byte*/
2124 } else if ((c1 & 0xC0) == 0x80) { /* 0x80-0xbf : trail byte */
2127 switch (w_iconv_utf8_1st_byte[c1 - 0xC0]) {
2129 if (c2 < 0x80 || 0xBF < c2) return 0;
2132 if (c3 == 0) return -1;
2133 if (c2 < 0xA0 || 0xBF < c2 || (c3 & 0xC0) != 0x80)
2138 if (c3 == 0) return -1;
2139 if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80)
2143 if (c3 == 0) return -1;
2144 if (c2 < 0x80 || 0x9F < c2 || (c3 & 0xC0) != 0x80)
2148 if (c3 == 0) return -2;
2149 if (c2 < 0x90 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2153 if (c3 == 0) return -2;
2154 if (c2 < 0x80 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2158 if (c3 == 0) return -2;
2159 if (c2 < 0x80 || 0x8F < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2167 if (c1 == 0 || c1 == EOF){
2168 } else if ((c1 & 0xf8) == 0xf0) { /* 4 bytes */
2169 c2 = nkf_char_unicode_new(nkf_utf8_to_unicode(c1, c2, c3, c4));
2172 ret = w2e_conv(c1, c2, c3, &c1, &c2);
2180 #define NKF_ICONV_INVALID_CODE_RANGE -13
2182 unicode_iconv(nkf_char wc)
2190 }else if ((wc>>11) == 27) {
2191 /* unpaired surrogate */
2192 return NKF_ICONV_INVALID_CODE_RANGE;
2193 }else if (wc < 0xFFFF) {
2194 ret = w16e_conv(wc, &c2, &c1);
2195 if (ret) return ret;
2196 }else if (wc < 0x10FFFF) {
2198 c1 = nkf_char_unicode_new(wc);
2200 return NKF_ICONV_INVALID_CODE_RANGE;
2206 #define NKF_ICONV_NEED_ONE_MORE_BYTE (size_t)-1
2207 #define NKF_ICONV_NEED_TWO_MORE_BYTES (size_t)-2
2208 #define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00))
2210 nkf_iconv_utf_16(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2219 if (input_endian == ENDIAN_BIG) {
2220 if (0xD8 <= c1 && c1 <= 0xDB) {
2221 if (0xDC <= c3 && c3 <= 0xDF) {
2222 wc = UTF16_TO_UTF32(c1 << 8 | c2, c3 << 8 | c4);
2223 } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2228 if (0xD8 <= c2 && c2 <= 0xDB) {
2229 if (0xDC <= c4 && c4 <= 0xDF) {
2230 wc = UTF16_TO_UTF32(c2 << 8 | c1, c4 << 8 | c3);
2231 } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2237 return (*unicode_iconv)(wc);
2241 w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
2244 return 16; /* different from w_iconv32 */
2248 w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
2251 return 32; /* different from w_iconv16 */
2255 nkf_iconv_utf_32(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2264 switch(input_endian){
2266 wc = c2 << 16 | c3 << 8 | c4;
2269 wc = c3 << 16 | c2 << 8 | c1;
2272 wc = c1 << 16 | c4 << 8 | c3;
2275 wc = c4 << 16 | c1 << 8 | c2;
2278 return NKF_ICONV_INVALID_CODE_RANGE;
2281 return (*unicode_iconv)(wc);
2285 #define output_ascii_escape_sequence(mode) do { \
2286 if (output_mode != ASCII && output_mode != ISO_8859_1) { \
2289 (*o_putc)(ascii_intro); \
2290 output_mode = mode; \
2295 output_escape_sequence(int mode)
2297 if (output_mode == mode)
2305 case JIS_X_0201_1976_K:
2313 (*o_putc)(kanji_intro);
2338 j_oconv(nkf_char c2, nkf_char c1)
2340 #ifdef NUMCHAR_OPTION
2341 if (c2 == 0 && nkf_char_unicode_p(c1)){
2342 w16e_conv(c1, &c2, &c1);
2343 if (c2 == 0 && nkf_char_unicode_p(c1)){
2344 c2 = c1 & VALUE_MASK;
2345 if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
2348 c2 = 0x7F + c1 / 94;
2349 c1 = 0x21 + c1 % 94;
2351 if (encode_fallback) (*encode_fallback)(c1);
2358 output_ascii_escape_sequence(ASCII);
2361 else if (c2 == EOF) {
2362 output_ascii_escape_sequence(ASCII);
2365 else if (c2 == ISO_8859_1) {
2366 output_ascii_escape_sequence(ISO_8859_1);
2369 else if (c2 == JIS_X_0201_1976_K) {
2370 output_escape_sequence(JIS_X_0201_1976_K);
2373 } else if (is_eucg3(c2)){
2374 output_escape_sequence(x0213_f ? JIS_X_0213_2 : JIS_X_0212);
2375 (*o_putc)(c2 & 0x7f);
2380 ? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1
2381 : c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1) return;
2382 output_escape_sequence(x0213_f ? JIS_X_0213_1 : JIS_X_0208);
2389 e_oconv(nkf_char c2, nkf_char c1)
2391 if (c2 == 0 && nkf_char_unicode_p(c1)){
2392 w16e_conv(c1, &c2, &c1);
2393 if (c2 == 0 && nkf_char_unicode_p(c1)){
2394 c2 = c1 & VALUE_MASK;
2395 if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
2399 c2 += c2 < 10 ? 0x75 : 0x8FEB;
2400 c1 = 0x21 + c1 % 94;
2403 (*o_putc)((c2 & 0x7f) | 0x080);
2404 (*o_putc)(c1 | 0x080);
2406 (*o_putc)((c2 & 0x7f) | 0x080);
2407 (*o_putc)(c1 | 0x080);
2411 if (encode_fallback) (*encode_fallback)(c1);
2419 } else if (c2 == 0) {
2420 output_mode = ASCII;
2422 } else if (c2 == JIS_X_0201_1976_K) {
2423 output_mode = EUC_JP;
2424 (*o_putc)(SS2); (*o_putc)(c1|0x80);
2425 } else if (c2 == ISO_8859_1) {
2426 output_mode = ISO_8859_1;
2427 (*o_putc)(c1 | 0x080);
2429 } else if (is_eucg3(c2)){
2430 output_mode = EUC_JP;
2431 #ifdef SHIFTJIS_CP932
2434 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2435 s2e_conv(s2, s1, &c2, &c1);
2440 output_mode = ASCII;
2442 }else if (is_eucg3(c2)){
2445 (*o_putc)((c2 & 0x7f) | 0x080);
2446 (*o_putc)(c1 | 0x080);
2449 (*o_putc)((c2 & 0x7f) | 0x080);
2450 (*o_putc)(c1 | 0x080);
2454 if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
2455 set_iconv(FALSE, 0);
2456 return; /* too late to rescue this char */
2458 output_mode = EUC_JP;
2459 (*o_putc)(c2 | 0x080);
2460 (*o_putc)(c1 | 0x080);
2465 s_oconv(nkf_char c2, nkf_char c1)
2467 #ifdef NUMCHAR_OPTION
2468 if (c2 == 0 && nkf_char_unicode_p(c1)){
2469 w16e_conv(c1, &c2, &c1);
2470 if (c2 == 0 && nkf_char_unicode_p(c1)){
2471 c2 = c1 & VALUE_MASK;
2472 if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
2475 c2 = c1 / 188 + (cp932inv_f ? 0xF0 : 0xEB);
2477 c1 += 0x40 + (c1 > 0x3e);
2482 if(encode_fallback)(*encode_fallback)(c1);
2491 } else if (c2 == 0) {
2492 output_mode = ASCII;
2494 } else if (c2 == JIS_X_0201_1976_K) {
2495 output_mode = SHIFT_JIS;
2497 } else if (c2 == ISO_8859_1) {
2498 output_mode = ISO_8859_1;
2499 (*o_putc)(c1 | 0x080);
2501 } else if (is_eucg3(c2)){
2502 output_mode = SHIFT_JIS;
2503 if (e2s_conv(c2, c1, &c2, &c1) == 0){
2509 if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
2510 set_iconv(FALSE, 0);
2511 return; /* too late to rescue this char */
2513 output_mode = SHIFT_JIS;
2514 e2s_conv(c2, c1, &c2, &c1);
2516 #ifdef SHIFTJIS_CP932
2518 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2519 nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
2525 #endif /* SHIFTJIS_CP932 */
2528 if (prefix_table[(unsigned char)c1]){
2529 (*o_putc)(prefix_table[(unsigned char)c1]);
2535 #ifdef UTF8_OUTPUT_ENABLE
2537 w_oconv(nkf_char c2, nkf_char c1)
2543 output_bom_f = FALSE;
2554 if (c2 == 0 && nkf_char_unicode_p(c1)){
2555 val = c1 & VALUE_MASK;
2556 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2558 if (c2) (*o_putc)(c2);
2559 if (c3) (*o_putc)(c3);
2560 if (c4) (*o_putc)(c4);
2567 val = e2w_conv(c2, c1);
2569 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2571 if (c2) (*o_putc)(c2);
2572 if (c3) (*o_putc)(c3);
2573 if (c4) (*o_putc)(c4);
2579 w_oconv16(nkf_char c2, nkf_char c1)
2582 output_bom_f = FALSE;
2583 if (output_endian == ENDIAN_LITTLE){
2597 if (c2 == 0 && nkf_char_unicode_p(c1)) {
2598 if (nkf_char_unicode_bmp_p(c1)) {
2599 c2 = (c1 >> 8) & 0xff;
2603 if (c1 <= UNICODE_MAX) {
2604 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0); /* high surrogate */
2605 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
2606 if (output_endian == ENDIAN_LITTLE){
2607 (*o_putc)(c2 & 0xff);
2608 (*o_putc)((c2 >> 8) & 0xff);
2609 (*o_putc)(c1 & 0xff);
2610 (*o_putc)((c1 >> 8) & 0xff);
2612 (*o_putc)((c2 >> 8) & 0xff);
2613 (*o_putc)(c2 & 0xff);
2614 (*o_putc)((c1 >> 8) & 0xff);
2615 (*o_putc)(c1 & 0xff);
2621 nkf_char val = e2w_conv(c2, c1);
2622 c2 = (val >> 8) & 0xff;
2627 if (output_endian == ENDIAN_LITTLE){
2637 w_oconv32(nkf_char c2, nkf_char c1)
2640 output_bom_f = FALSE;
2641 if (output_endian == ENDIAN_LITTLE){
2659 if (c2 == ISO_8859_1) {
2661 } else if (c2 == 0 && nkf_char_unicode_p(c1)) {
2664 c1 = e2w_conv(c2, c1);
2667 if (output_endian == ENDIAN_LITTLE){
2668 (*o_putc)( c1 & 0xFF);
2669 (*o_putc)((c1 >> 8) & 0xFF);
2670 (*o_putc)((c1 >> 16) & 0xFF);
2674 (*o_putc)((c1 >> 16) & 0xFF);
2675 (*o_putc)((c1 >> 8) & 0xFF);
2676 (*o_putc)( c1 & 0xFF);
2681 #define SCORE_L2 (1) /* Kanji Level 2 */
2682 #define SCORE_KANA (SCORE_L2 << 1) /* Halfwidth Katakana */
2683 #define SCORE_DEPEND (SCORE_KANA << 1) /* MD Characters */
2684 #define SCORE_CP932 (SCORE_DEPEND << 1) /* IBM extended characters */
2685 #define SCORE_X0212 (SCORE_CP932 << 1) /* JIS X 0212 */
2686 #define SCORE_NO_EXIST (SCORE_X0212 << 1) /* Undefined Characters */
2687 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME selected */
2688 #define SCORE_ERROR (SCORE_iMIME << 1) /* Error */
2690 #define SCORE_INIT (SCORE_iMIME)
2692 static const nkf_char score_table_A0[] = {
2695 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
2696 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
2699 static const nkf_char score_table_F0[] = {
2700 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
2701 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
2702 SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
2703 SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
2707 set_code_score(struct input_code *ptr, nkf_char score)
2710 ptr->score |= score;
2715 clr_code_score(struct input_code *ptr, nkf_char score)
2718 ptr->score &= ~score;
2723 code_score(struct input_code *ptr)
2725 nkf_char c2 = ptr->buf[0];
2726 #ifdef UTF8_OUTPUT_ENABLE
2727 nkf_char c1 = ptr->buf[1];
2730 set_code_score(ptr, SCORE_ERROR);
2731 }else if (c2 == SS2){
2732 set_code_score(ptr, SCORE_KANA);
2733 }else if (c2 == 0x8f){
2734 set_code_score(ptr, SCORE_X0212);
2735 #ifdef UTF8_OUTPUT_ENABLE
2736 }else if (!e2w_conv(c2, c1)){
2737 set_code_score(ptr, SCORE_NO_EXIST);
2739 }else if ((c2 & 0x70) == 0x20){
2740 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2741 }else if ((c2 & 0x70) == 0x70){
2742 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2743 }else if ((c2 & 0x70) >= 0x50){
2744 set_code_score(ptr, SCORE_L2);
2749 status_disable(struct input_code *ptr)
2754 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2758 status_push_ch(struct input_code *ptr, nkf_char c)
2760 ptr->buf[ptr->index++] = c;
2764 status_clear(struct input_code *ptr)
2771 status_reset(struct input_code *ptr)
2774 ptr->score = SCORE_INIT;
2778 status_reinit(struct input_code *ptr)
2781 ptr->_file_stat = 0;
2785 status_check(struct input_code *ptr, nkf_char c)
2787 if (c <= DEL && estab_f){
2793 s_status(struct input_code *ptr, nkf_char c)
2797 status_check(ptr, c);
2802 }else if (nkf_char_unicode_p(c)){
2804 }else if (0xa1 <= c && c <= 0xdf){
2805 status_push_ch(ptr, SS2);
2806 status_push_ch(ptr, c);
2809 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
2811 status_push_ch(ptr, c);
2812 }else if (0xed <= c && c <= 0xee){
2814 status_push_ch(ptr, c);
2815 #ifdef SHIFTJIS_CP932
2816 }else if (is_ibmext_in_sjis(c)){
2818 status_push_ch(ptr, c);
2819 #endif /* SHIFTJIS_CP932 */
2821 }else if (0xf0 <= c && c <= 0xfc){
2823 status_push_ch(ptr, c);
2824 #endif /* X0212_ENABLE */
2826 status_disable(ptr);
2830 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2831 status_push_ch(ptr, c);
2832 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2836 status_disable(ptr);
2840 #ifdef SHIFTJIS_CP932
2841 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
2842 status_push_ch(ptr, c);
2843 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
2844 set_code_score(ptr, SCORE_CP932);
2849 #endif /* SHIFTJIS_CP932 */
2850 status_disable(ptr);
2853 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2854 status_push_ch(ptr, c);
2855 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2856 set_code_score(ptr, SCORE_CP932);
2859 status_disable(ptr);
2866 e_status(struct input_code *ptr, nkf_char c)
2870 status_check(ptr, c);
2875 }else if (nkf_char_unicode_p(c)){
2877 }else if (SS2 == c || (0xa1 <= c && c <= 0xfe)){
2879 status_push_ch(ptr, c);
2881 }else if (0x8f == c){
2883 status_push_ch(ptr, c);
2884 #endif /* X0212_ENABLE */
2886 status_disable(ptr);
2890 if (0xa1 <= c && c <= 0xfe){
2891 status_push_ch(ptr, c);
2895 status_disable(ptr);
2900 if (0xa1 <= c && c <= 0xfe){
2902 status_push_ch(ptr, c);
2904 status_disable(ptr);
2906 #endif /* X0212_ENABLE */
2910 #ifdef UTF8_INPUT_ENABLE
2912 w_status(struct input_code *ptr, nkf_char c)
2916 status_check(ptr, c);
2921 }else if (nkf_char_unicode_p(c)){
2923 }else if (0xc0 <= c && c <= 0xdf){
2925 status_push_ch(ptr, c);
2926 }else if (0xe0 <= c && c <= 0xef){
2928 status_push_ch(ptr, c);
2929 }else if (0xf0 <= c && c <= 0xf4){
2931 status_push_ch(ptr, c);
2933 status_disable(ptr);
2938 if (0x80 <= c && c <= 0xbf){
2939 status_push_ch(ptr, c);
2940 if (ptr->index > ptr->stat){
2941 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2942 && ptr->buf[2] == 0xbf);
2943 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2944 &ptr->buf[0], &ptr->buf[1]);
2951 status_disable(ptr);
2955 if (0x80 <= c && c <= 0xbf){
2956 if (ptr->index < ptr->stat){
2957 status_push_ch(ptr, c);
2962 status_disable(ptr);
2970 code_status(nkf_char c)
2972 int action_flag = 1;
2973 struct input_code *result = 0;
2974 struct input_code *p = input_code_list;
2976 if (!p->status_func) {
2980 if (!p->status_func)
2982 (p->status_func)(p, c);
2985 }else if(p->stat == 0){
2996 if (result && !estab_f){
2997 set_iconv(TRUE, result->iconv_func);
2998 }else if (c <= DEL){
2999 struct input_code *ptr = input_code_list;
3009 nkf_buf_t *std_gc_buf;
3010 nkf_char broken_state;
3011 nkf_buf_t *broken_buf;
3012 nkf_char mimeout_state;
3016 static nkf_state_t *nkf_state = NULL;
3018 #define STD_GC_BUFSIZE (256)
3021 nkf_state_init(void)
3024 nkf_buf_clear(nkf_state->std_gc_buf);
3025 nkf_buf_clear(nkf_state->broken_buf);
3026 nkf_buf_clear(nkf_state->nfc_buf);
3029 nkf_state = nkf_xmalloc(sizeof(nkf_state_t));
3030 nkf_state->std_gc_buf = nkf_buf_new(STD_GC_BUFSIZE);
3031 nkf_state->broken_buf = nkf_buf_new(3);
3032 nkf_state->nfc_buf = nkf_buf_new(9);
3034 nkf_state->broken_state = 0;
3035 nkf_state->mimeout_state = 0;
3042 if (!nkf_buf_empty_p(nkf_state->std_gc_buf)){
3043 return nkf_buf_pop(nkf_state->std_gc_buf);
3050 std_ungetc(nkf_char c, FILE *f)
3052 nkf_buf_push(nkf_state->std_gc_buf, c);
3058 std_putc(nkf_char c)
3065 static nkf_char hold_buf[HOLD_SIZE*2];
3066 static int hold_count = 0;
3068 push_hold_buf(nkf_char c2)
3070 if (hold_count >= HOLD_SIZE*2)
3072 hold_buf[hold_count++] = c2;
3073 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3077 h_conv(FILE *f, nkf_char c1, nkf_char c2)
3083 /** it must NOT be in the kanji shifte sequence */
3084 /** it must NOT be written in JIS7 */
3085 /** and it must be after 2 byte 8bit code */
3091 while ((c2 = (*i_getc)(f)) != EOF) {
3097 if (push_hold_buf(c2) == EOF || estab_f) {
3103 struct input_code *p = input_code_list;
3104 struct input_code *result = p;
3109 if (p->status_func && p->score < result->score) {
3114 set_iconv(TRUE, result->iconv_func);
3119 ** 1) EOF is detected, or
3120 ** 2) Code is established, or
3121 ** 3) Buffer is FULL (but last word is pushed)
3123 ** in 1) and 3) cases, we continue to use
3124 ** Kanji codes by oconv and leave estab_f unchanged.
3129 while (hold_index < hold_count){
3130 c1 = hold_buf[hold_index++];
3131 if (nkf_char_unicode_p(c1)) {
3135 else if (c1 <= DEL){
3138 }else if (iconv == s_iconv && 0xa1 <= c1 && c1 <= 0xdf){
3139 (*iconv)(JIS_X_0201_1976_K, c1, 0);
3142 if (hold_index < hold_count){
3143 c2 = hold_buf[hold_index++];
3153 switch ((*iconv)(c1, c2, 0)) { /* can be EUC/SJIS/UTF-8 */
3156 if (hold_index < hold_count){
3157 c3 = hold_buf[hold_index++];
3158 } else if ((c3 = (*i_getc)(f)) == EOF) {
3163 if (hold_index < hold_count){
3164 c4 = hold_buf[hold_index++];
3165 } else if ((c4 = (*i_getc)(f)) == EOF) {
3170 (*iconv)(c1, c2, (c3<<8)|c4);
3173 /* 3 bytes EUC or UTF-8 */
3174 if (hold_index < hold_count){
3175 c3 = hold_buf[hold_index++];
3176 } else if ((c3 = (*i_getc)(f)) == EOF) {
3182 (*iconv)(c1, c2, c3);
3185 if (c3 == EOF) break;
3191 * Check and Ignore BOM
3197 switch(c2 = (*i_getc)(f)){
3199 if((c2 = (*i_getc)(f)) == 0x00){
3200 if((c2 = (*i_getc)(f)) == 0xFE){
3201 if((c2 = (*i_getc)(f)) == 0xFF){
3202 if(!input_encoding){
3203 set_iconv(TRUE, w_iconv32);
3205 if (iconv == w_iconv32) {
3207 input_endian = ENDIAN_BIG;
3210 (*i_ungetc)(0xFF,f);
3211 }else (*i_ungetc)(c2,f);
3212 (*i_ungetc)(0xFE,f);
3213 }else if(c2 == 0xFF){
3214 if((c2 = (*i_getc)(f)) == 0xFE){
3215 if(!input_encoding){
3216 set_iconv(TRUE, w_iconv32);
3218 if (iconv == w_iconv32) {
3219 input_endian = ENDIAN_2143;
3222 (*i_ungetc)(0xFF,f);
3223 }else (*i_ungetc)(c2,f);
3224 (*i_ungetc)(0xFF,f);
3225 }else (*i_ungetc)(c2,f);
3226 (*i_ungetc)(0x00,f);
3227 }else (*i_ungetc)(c2,f);
3228 (*i_ungetc)(0x00,f);
3231 if((c2 = (*i_getc)(f)) == 0xBB){
3232 if((c2 = (*i_getc)(f)) == 0xBF){
3233 if(!input_encoding){
3234 set_iconv(TRUE, w_iconv);
3236 if (iconv == w_iconv) {
3240 (*i_ungetc)(0xBF,f);
3241 }else (*i_ungetc)(c2,f);
3242 (*i_ungetc)(0xBB,f);
3243 }else (*i_ungetc)(c2,f);
3244 (*i_ungetc)(0xEF,f);
3247 if((c2 = (*i_getc)(f)) == 0xFF){
3248 if((c2 = (*i_getc)(f)) == 0x00){
3249 if((c2 = (*i_getc)(f)) == 0x00){
3250 if(!input_encoding){
3251 set_iconv(TRUE, w_iconv32);
3253 if (iconv == w_iconv32) {
3254 input_endian = ENDIAN_3412;
3257 (*i_ungetc)(0x00,f);
3258 }else (*i_ungetc)(c2,f);
3259 (*i_ungetc)(0x00,f);
3260 }else (*i_ungetc)(c2,f);
3261 if(!input_encoding){
3262 set_iconv(TRUE, w_iconv16);
3264 if (iconv == w_iconv16) {
3265 input_endian = ENDIAN_BIG;
3269 (*i_ungetc)(0xFF,f);
3270 }else (*i_ungetc)(c2,f);
3271 (*i_ungetc)(0xFE,f);
3274 if((c2 = (*i_getc)(f)) == 0xFE){
3275 if((c2 = (*i_getc)(f)) == 0x00){
3276 if((c2 = (*i_getc)(f)) == 0x00){
3277 if(!input_encoding){
3278 set_iconv(TRUE, w_iconv32);
3280 if (iconv == w_iconv32) {
3281 input_endian = ENDIAN_LITTLE;
3285 (*i_ungetc)(0x00,f);
3286 }else (*i_ungetc)(c2,f);
3287 (*i_ungetc)(0x00,f);
3288 }else (*i_ungetc)(c2,f);
3289 if(!input_encoding){
3290 set_iconv(TRUE, w_iconv16);
3292 if (iconv == w_iconv16) {
3293 input_endian = ENDIAN_LITTLE;
3297 (*i_ungetc)(0xFE,f);
3298 }else (*i_ungetc)(c2,f);
3299 (*i_ungetc)(0xFF,f);
3308 broken_getc(FILE *f)
3312 if (!nkf_buf_empty_p(nkf_state->broken_buf)) {
3313 return nkf_buf_pop(nkf_state->broken_buf);
3316 if (c=='$' && nkf_state->broken_state != ESC
3317 && (input_mode == ASCII || input_mode == JIS_X_0201_1976_K)) {
3319 nkf_state->broken_state = 0;
3320 if (c1=='@'|| c1=='B') {
3321 nkf_buf_push(nkf_state->broken_buf, c1);
3322 nkf_buf_push(nkf_state->broken_buf, c);
3328 } else if (c=='(' && nkf_state->broken_state != ESC
3329 && (input_mode == JIS_X_0208 || input_mode == JIS_X_0201_1976_K)) {
3331 nkf_state->broken_state = 0;
3332 if (c1=='J'|| c1=='B') {
3333 nkf_buf_push(nkf_state->broken_buf, c1);
3334 nkf_buf_push(nkf_state->broken_buf, c);
3341 nkf_state->broken_state = c;
3347 broken_ungetc(nkf_char c, FILE *f)
3349 if (nkf_buf_length(nkf_state->broken_buf) < 2)
3350 nkf_buf_push(nkf_state->broken_buf, c);
3355 eol_conv(nkf_char c2, nkf_char c1)
3357 if (guess_f && input_eol != EOF) {
3358 if (c2 == 0 && c1 == LF) {
3359 if (!input_eol) input_eol = prev_cr ? CRLF : LF;
3360 else if (input_eol != (prev_cr ? CRLF : LF)) input_eol = EOF;
3361 } else if (c2 == 0 && c1 == CR && input_eol == LF) input_eol = EOF;
3363 else if (!input_eol) input_eol = CR;
3364 else if (input_eol != CR) input_eol = EOF;
3366 if (prev_cr || (c2 == 0 && c1 == LF)) {
3368 if (eolmode_f != LF) (*o_eol_conv)(0, CR);
3369 if (eolmode_f != CR) (*o_eol_conv)(0, LF);
3371 if (c2 == 0 && c1 == CR) prev_cr = CR;
3372 else if (c2 != 0 || c1 != LF) (*o_eol_conv)(c2, c1);
3376 put_newline(void (*func)(nkf_char))
3378 switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) {
3393 oconv_newline(void (*func)(nkf_char, nkf_char))
3395 switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) {
3410 Return value of fold_conv()
3412 LF add newline and output char
3413 CR add newline and output nothing
3416 1 (or else) normal output
3418 fold state in prev (previous character)
3420 >0x80 Japanese (X0208/X0201)
3425 This fold algorthm does not preserve heading space in a line.
3426 This is the main difference from fmt.
3429 #define char_size(c2,c1) (c2?2:1)
3432 fold_conv(nkf_char c2, nkf_char c1)
3435 nkf_char fold_state;
3437 if (c1== CR && !fold_preserve_f) {
3438 fold_state=0; /* ignore cr */
3439 }else if (c1== LF&&f_prev==CR && fold_preserve_f) {
3441 fold_state=0; /* ignore cr */
3442 } else if (c1== BS) {
3443 if (f_line>0) f_line--;
3445 } else if (c2==EOF && f_line != 0) { /* close open last line */
3447 } else if ((c1==LF && !fold_preserve_f)
3448 || ((c1==CR||(c1==LF&&f_prev!=CR))
3449 && fold_preserve_f)) {
3451 if (fold_preserve_f) {
3455 } else if ((f_prev == c1 && !fold_preserve_f)
3456 || (f_prev == LF && fold_preserve_f)
3457 ) { /* duplicate newline */
3460 fold_state = LF; /* output two newline */
3466 if (f_prev&0x80) { /* Japanese? */
3468 fold_state = 0; /* ignore given single newline */
3469 } else if (f_prev==SP) {
3473 if (++f_line<=fold_len)
3477 fold_state = CR; /* fold and output nothing */
3481 } else if (c1=='\f') {
3484 fold_state = LF; /* output newline and clear */
3485 } else if ((c2==0 && nkf_isblank(c1)) || (c2 == '!' && c1 == '!')) {
3486 /* X0208 kankaku or ascii space */
3488 fold_state = 0; /* remove duplicate spaces */
3491 if (++f_line<=fold_len)
3492 fold_state = SP; /* output ASCII space only */
3494 f_prev = SP; f_line = 0;
3495 fold_state = CR; /* fold and output nothing */
3499 prev0 = f_prev; /* we still need this one... , but almost done */
3501 if (c2 || c2 == JIS_X_0201_1976_K)
3502 f_prev |= 0x80; /* this is Japanese */
3503 f_line += c2 == JIS_X_0201_1976_K ? 1: char_size(c2,c1);
3504 if (f_line<=fold_len) { /* normal case */
3507 if (f_line>fold_len+fold_margin) { /* too many kinsoku suspension */
3508 f_line = char_size(c2,c1);
3509 fold_state = LF; /* We can't wait, do fold now */
3510 } else if (c2 == JIS_X_0201_1976_K) {
3511 /* simple kinsoku rules return 1 means no folding */
3512 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
3513 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
3514 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
3515 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
3516 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
3517 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
3518 else if (SP<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
3520 fold_state = LF;/* add one new f_line before this character */
3523 fold_state = LF;/* add one new f_line before this character */
3526 /* kinsoku point in ASCII */
3527 if ( c1==')'|| /* { [ ( */
3538 /* just after special */
3539 } else if (!is_alnum(prev0)) {
3540 f_line = char_size(c2,c1);
3542 } else if ((prev0==SP) || /* ignored new f_line */
3543 (prev0==LF)|| /* ignored new f_line */
3544 (prev0&0x80)) { /* X0208 - ASCII */
3545 f_line = char_size(c2,c1);
3546 fold_state = LF;/* add one new f_line before this character */
3548 fold_state = 1; /* default no fold in ASCII */
3552 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
3553 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
3554 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
3555 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
3556 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
3557 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
3558 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
3559 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
3560 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
3561 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
3562 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
3563 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
3564 /* default no fold in kinsoku */
3567 f_line = char_size(c2,c1);
3568 /* add one new f_line before this character */
3571 f_line = char_size(c2,c1);
3573 /* add one new f_line before this character */
3578 /* terminator process */
3579 switch(fold_state) {
3581 oconv_newline(o_fconv);
3587 oconv_newline(o_fconv);
3598 static nkf_char z_prev2=0,z_prev1=0;
3601 z_conv(nkf_char c2, nkf_char c1)
3604 /* if (c2) c1 &= 0x7f; assertion */
3606 if (c2 == JIS_X_0201_1976_K && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) {
3612 if (z_prev2 == JIS_X_0201_1976_K) {
3613 if (c2 == JIS_X_0201_1976_K) {
3614 if (c1 == (0xde&0x7f)) { /*
\e$BByE@
\e(B */
3616 (*o_zconv)(dv[(z_prev1-SP)*2], dv[(z_prev1-SP)*2+1]);
3618 } else if (c1 == (0xdf&0x7f) && ev[(z_prev1-SP)*2]) { /*
\e$BH>ByE@
\e(B */
3620 (*o_zconv)(ev[(z_prev1-SP)*2], ev[(z_prev1-SP)*2+1]);
3625 (*o_zconv)(cv[(z_prev1-SP)*2], cv[(z_prev1-SP)*2+1]);
3627 if (c2 == JIS_X_0201_1976_K) {
3628 if (dv[(c1-SP)*2] || ev[(c1-SP)*2]) {
3629 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
3634 (*o_zconv)(cv[(c1-SP)*2], cv[(c1-SP)*2+1]);
3645 if (alpha_f&1 && c2 == 0x23) {
3646 /* JISX0208 Alphabet */
3648 } else if (c2 == 0x21) {
3649 /* JISX0208 Kigou */
3654 } else if (alpha_f&4) {
3659 } else if (alpha_f&1 && 0x20<c1 && c1<0x7f && fv[c1-0x20]) {
3665 if (alpha_f&8 && c2 == 0) {
3667 const char *entity = 0;
3669 case '>': entity = ">"; break;
3670 case '<': entity = "<"; break;
3671 case '\"': entity = """; break;
3672 case '&': entity = "&"; break;
3675 while (*entity) (*o_zconv)(0, *entity++);
3681 /* JIS X 0208 Katakana to JIS X 0201 Katakana */
3686 /* U+3002 (0x8142) Ideographic Full Stop -> U+FF61 (0xA1) Halfwidth Ideographic Full Stop */
3690 /* U+300C (0x8175) Left Corner Bracket -> U+FF62 (0xA2) Halfwidth Left Corner Bracket */
3694 /* U+300D (0x8176) Right Corner Bracket -> U+FF63 (0xA3) Halfwidth Right Corner Bracket */
3698 /* U+3001 (0x8141) Ideographic Comma -> U+FF64 (0xA4) Halfwidth Ideographic Comma */
3702 /* U+30FB (0x8145) Katakana Middle Dot -> U+FF65 (0xA5) Halfwidth Katakana Middle Dot */
3706 /* U+30FC (0x815B) Katakana-Hiragana Prolonged Sound Mark -> U+FF70 (0xB0) Halfwidth Katakana-Hiragana Prolonged Sound Mark */
3710 /* U+309B (0x814A) Katakana-Hiragana Voiced Sound Mark -> U+FF9E (0xDE) Halfwidth Katakana Voiced Sound Mark */
3714 /* U+309C (0x814B) Katakana-Hiragana Semi-Voiced Sound Mark -> U+FF9F (0xDF) Halfwidth Katakana Semi-Voiced Sound Mark */
3719 (*o_zconv)(JIS_X_0201_1976_K, c);
3722 } else if (c2 == 0x25) {
3723 /* JISX0208 Katakana */
3724 static const int fullwidth_to_halfwidth[] =
3726 0x0000, 0x2700, 0x3100, 0x2800, 0x3200, 0x2900, 0x3300, 0x2A00,
3727 0x3400, 0x2B00, 0x3500, 0x3600, 0x365E, 0x3700, 0x375E, 0x3800,
3728 0x385E, 0x3900, 0x395E, 0x3A00, 0x3A5E, 0x3B00, 0x3B5E, 0x3C00,
3729 0x3C5E, 0x3D00, 0x3D5E, 0x3E00, 0x3E5E, 0x3F00, 0x3F5E, 0x4000,
3730 0x405E, 0x4100, 0x415E, 0x2F00, 0x4200, 0x425E, 0x4300, 0x435E,
3731 0x4400, 0x445E, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900, 0x4A00,
3732 0x4A5E, 0x4A5F, 0x4B00, 0x4B5E, 0x4B5F, 0x4C00, 0x4C5E, 0x4C5F,
3733 0x4D00, 0x4D5E, 0x4D5F, 0x4E00, 0x4E5E, 0x4E5F, 0x4F00, 0x5000,
3734 0x5100, 0x5200, 0x5300, 0x2C00, 0x5400, 0x2D00, 0x5500, 0x2E00,
3735 0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x5B00, 0x0000, 0x5C00,
3736 0x0000, 0x0000, 0x2600, 0x5D00, 0x335E, 0x0000, 0x0000, 0x0000,
3737 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
3739 if (fullwidth_to_halfwidth[c1-0x20]){
3740 c2 = fullwidth_to_halfwidth[c1-0x20];
3741 (*o_zconv)(JIS_X_0201_1976_K, c2>>8);
3743 (*o_zconv)(JIS_X_0201_1976_K, c2&0xFF);
3753 #define rot13(c) ( \
3755 (c <= 'M') ? (c + 13): \
3756 (c <= 'Z') ? (c - 13): \
3758 (c <= 'm') ? (c + 13): \
3759 (c <= 'z') ? (c - 13): \
3763 #define rot47(c) ( \
3765 ( c <= 'O') ? (c + 47) : \
3766 ( c <= '~') ? (c - 47) : \
3771 rot_conv(nkf_char c2, nkf_char c1)
3773 if (c2 == 0 || c2 == JIS_X_0201_1976_K || c2 == ISO_8859_1) {
3779 (*o_rot_conv)(c2,c1);
3783 hira_conv(nkf_char c2, nkf_char c1)
3787 if (0x20 < c1 && c1 < 0x74) {
3789 (*o_hira_conv)(c2,c1);
3791 } else if (c1 == 0x74 && nkf_enc_unicode_p(output_encoding)) {
3793 c1 = nkf_char_unicode_new(0x3094);
3794 (*o_hira_conv)(c2,c1);
3797 } else if (c2 == 0x21 && (c1 == 0x33 || c1 == 0x34)) {
3799 (*o_hira_conv)(c2,c1);
3804 if (c2 == 0 && c1 == nkf_char_unicode_new(0x3094)) {
3807 } else if (c2 == 0x24 && 0x20 < c1 && c1 < 0x74) {
3809 } else if (c2 == 0x21 && (c1 == 0x35 || c1 == 0x36)) {
3813 (*o_hira_conv)(c2,c1);
3818 iso2022jp_check_conv(nkf_char c2, nkf_char c1)
3820 #define RANGE_NUM_MAX 18
3821 static const nkf_char range[RANGE_NUM_MAX][2] = {
3842 nkf_char start, end, c;
3844 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
3848 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
3853 for (i = 0; i < RANGE_NUM_MAX; i++) {
3854 start = range[i][0];
3857 if (c >= start && c <= end) {
3862 (*o_iso2022jp_check_conv)(c2,c1);
3866 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
3868 static const unsigned char *mime_pattern[] = {
3869 (const unsigned char *)"\075?EUC-JP?B?",
3870 (const unsigned char *)"\075?SHIFT_JIS?B?",
3871 (const unsigned char *)"\075?ISO-8859-1?Q?",
3872 (const unsigned char *)"\075?ISO-8859-1?B?",
3873 (const unsigned char *)"\075?ISO-2022-JP?B?",
3874 (const unsigned char *)"\075?ISO-2022-JP?B?",
3875 (const unsigned char *)"\075?ISO-2022-JP?Q?",
3876 #if defined(UTF8_INPUT_ENABLE)
3877 (const unsigned char *)"\075?UTF-8?B?",
3878 (const unsigned char *)"\075?UTF-8?Q?",
3880 (const unsigned char *)"\075?US-ASCII?Q?",
3885 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
3886 nkf_char (*mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0) = {
3887 e_iconv, s_iconv, 0, 0, 0, 0, 0,
3888 #if defined(UTF8_INPUT_ENABLE)
3894 static const nkf_char mime_encode[] = {
3895 EUC_JP, SHIFT_JIS, ISO_8859_1, ISO_8859_1, JIS_X_0208, JIS_X_0201_1976_K, JIS_X_0201_1976_K,
3896 #if defined(UTF8_INPUT_ENABLE)
3903 static const nkf_char mime_encode_method[] = {
3904 'B', 'B','Q', 'B', 'B', 'B', 'Q',
3905 #if defined(UTF8_INPUT_ENABLE)
3913 /* MIME preprocessor fifo */
3915 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
3916 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
3917 #define mime_input_buf(n) mime_input_state.buf[(n)&MIME_BUF_MASK]
3919 unsigned char buf[MIME_BUF_SIZE];
3921 unsigned int last; /* decoded */
3922 unsigned int input; /* undecoded */
3924 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
3926 #define MAXRECOVER 20
3929 mime_input_buf_unshift(nkf_char c)
3931 mime_input_buf(--mime_input_state.top) = (unsigned char)c;
3935 mime_ungetc(nkf_char c, FILE *f)
3937 mime_input_buf_unshift(c);
3942 mime_ungetc_buf(nkf_char c, FILE *f)
3945 (*i_mungetc_buf)(c,f);
3947 mime_input_buf(--mime_input_state.input) = (unsigned char)c;
3952 mime_getc_buf(FILE *f)
3954 /* we don't keep eof of mime_input_buf, becase it contains ?= as
3955 a terminator. It was checked in mime_integrity. */
3956 return ((mimebuf_f)?
3957 (*i_mgetc_buf)(f):mime_input_buf(mime_input_state.input++));
3961 switch_mime_getc(void)
3963 if (i_getc!=mime_getc) {
3964 i_mgetc = i_getc; i_getc = mime_getc;
3965 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
3966 if(mime_f==STRICT_MIME) {
3967 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
3968 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
3974 unswitch_mime_getc(void)
3976 if(mime_f==STRICT_MIME) {
3977 i_mgetc = i_mgetc_buf;
3978 i_mungetc = i_mungetc_buf;
3981 i_ungetc = i_mungetc;
3982 if(mime_iconv_back)set_iconv(FALSE, mime_iconv_back);
3983 mime_iconv_back = NULL;
3987 mime_integrity(FILE *f, const unsigned char *p)
3991 /* In buffered mode, read until =? or NL or buffer full
3993 mime_input_state.input = mime_input_state.top;
3994 mime_input_state.last = mime_input_state.top;
3996 while(*p) mime_input_buf(mime_input_state.input++) = *p++;
3998 q = mime_input_state.input;
3999 while((c=(*i_getc)(f))!=EOF) {
4000 if (((mime_input_state.input-mime_input_state.top)&MIME_BUF_MASK)==0) {
4001 break; /* buffer full */
4003 if (c=='=' && d=='?') {
4004 /* checked. skip header, start decode */
4005 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
4006 /* mime_last_input = mime_input_state.input; */
4007 mime_input_state.input = q;
4011 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
4013 /* Should we check length mod 4? */
4014 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
4017 /* In case of Incomplete MIME, no MIME decode */
4018 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
4019 mime_input_state.last = mime_input_state.input; /* point undecoded buffer */
4020 mime_decode_mode = 1; /* no decode on mime_input_buf last in mime_getc */
4021 switch_mime_getc(); /* anyway we need buffered getc */
4026 mime_begin_strict(FILE *f)
4030 const unsigned char *p,*q;
4031 nkf_char r[MAXRECOVER]; /* recovery buffer, max mime pattern length */
4033 mime_decode_mode = FALSE;
4034 /* =? has been checked */
4036 p = mime_pattern[j];
4039 for(i=2;p[i]>SP;i++) { /* start at =? */
4040 if (((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i]) {
4041 /* pattern fails, try next one */
4043 while (mime_pattern[++j]) {
4044 p = mime_pattern[j];
4045 for(k=2;k<i;k++) /* assume length(p) > i */
4046 if (p[k]!=q[k]) break;
4047 if (k==i && nkf_toupper(c1)==p[k]) break;
4049 p = mime_pattern[j];
4050 if (p) continue; /* found next one, continue */
4051 /* all fails, output from recovery buffer */
4059 mime_decode_mode = p[i-2];
4061 mime_iconv_back = iconv;
4062 set_iconv(FALSE, mime_priority_func[j]);
4063 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
4065 if (mime_decode_mode=='B') {
4066 mimebuf_f = unbuf_f;
4068 /* do MIME integrity check */
4069 return mime_integrity(f,mime_pattern[j]);
4083 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
4084 /* re-read and convert again from mime_buffer. */
4086 /* =? has been checked */
4087 k = mime_input_state.last;
4088 mime_input_buf(mime_input_state.last++)='='; mime_input_buf(mime_input_state.last++)='?';
4089 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
4090 /* We accept any character type even if it is breaked by new lines */
4091 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4092 if (c1==LF||c1==SP||c1==CR||
4093 c1=='-'||c1=='_'||is_alnum(c1)) continue;
4095 /* Failed. But this could be another MIME preemble */
4097 mime_input_state.last--;
4103 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4104 if (!(++i<MAXRECOVER) || c1==EOF) break;
4105 if (c1=='b'||c1=='B') {
4106 mime_decode_mode = 'B';
4107 } else if (c1=='q'||c1=='Q') {
4108 mime_decode_mode = 'Q';
4112 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4113 if (!(++i<MAXRECOVER) || c1==EOF) break;
4115 mime_decode_mode = FALSE;
4121 if (!mime_decode_mode) {
4122 /* false MIME premble, restart from mime_buffer */
4123 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
4124 /* Since we are in MIME mode until buffer becomes empty, */
4125 /* we never go into mime_begin again for a while. */
4128 /* discard mime preemble, and goto MIME mode */
4129 mime_input_state.last = k;
4130 /* do no MIME integrity check */
4131 return c1; /* used only for checking EOF */
4142 debug(const char *str)
4145 fprintf(stderr, "%s\n", str ? str : "NULL");
4151 set_input_codename(const char *codename)
4153 if (!input_codename) {
4154 input_codename = codename;
4155 } else if (strcmp(codename, input_codename) != 0) {
4156 input_codename = "";
4161 get_guessed_code(void)
4163 if (input_codename && !*input_codename) {
4164 input_codename = "BINARY";
4166 struct input_code *p = find_inputcode_byfunc(iconv);
4167 if (!input_codename) {
4168 input_codename = "ASCII";
4169 } else if (strcmp(input_codename, "Shift_JIS") == 0) {
4170 if (p->score & (SCORE_DEPEND|SCORE_CP932))
4171 input_codename = "CP932";
4172 } else if (strcmp(input_codename, "EUC-JP") == 0) {
4173 if (p->score & (SCORE_X0212))
4174 input_codename = "EUCJP-MS";
4175 else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4176 input_codename = "CP51932";
4177 } else if (strcmp(input_codename, "ISO-2022-JP") == 0) {
4178 if (p->score & (SCORE_KANA))
4179 input_codename = "CP50221";
4180 else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4181 input_codename = "CP50220";
4184 return input_codename;
4187 #if !defined(PERL_XS) && !defined(WIN32DLL)
4189 print_guessed_code(char *filename)
4191 if (filename != NULL) printf("%s: ", filename);
4192 if (input_codename && !*input_codename) {
4195 input_codename = get_guessed_code();
4197 printf("%s\n", input_codename);
4199 printf("%s%s%s%s\n",
4201 iconv != w_iconv16 && iconv != w_iconv32 ? "" :
4202 input_endian == ENDIAN_LITTLE ? " LE" :
4203 input_endian == ENDIAN_BIG ? " BE" :
4205 input_bom_f ? " (BOM)" : "",
4206 input_eol == CR ? " (CR)" :
4207 input_eol == LF ? " (LF)" :
4208 input_eol == CRLF ? " (CRLF)" :
4209 input_eol == EOF ? " (MIXED NL)" :
4219 hex_getc(nkf_char ch, FILE *f, nkf_char (*g)(FILE *f), nkf_char (*u)(nkf_char c, FILE *f))
4221 nkf_char c1, c2, c3;
4227 if (!nkf_isxdigit(c2)){
4232 if (!nkf_isxdigit(c3)){
4237 return (hex2bin(c2) << 4) | hex2bin(c3);
4243 return hex_getc(':', f, i_cgetc, i_cungetc);
4247 cap_ungetc(nkf_char c, FILE *f)
4249 return (*i_cungetc)(c, f);
4255 return hex_getc('%', f, i_ugetc, i_uungetc);
4259 url_ungetc(nkf_char c, FILE *f)
4261 return (*i_uungetc)(c, f);
4265 #ifdef NUMCHAR_OPTION
4267 numchar_getc(FILE *f)
4269 nkf_char (*g)(FILE *) = i_ngetc;
4270 nkf_char (*u)(nkf_char c ,FILE *f) = i_nungetc;
4281 if (buf[i] == 'x' || buf[i] == 'X'){
4282 for (j = 0; j < 7; j++){
4284 if (!nkf_isxdigit(buf[i])){
4291 c |= hex2bin(buf[i]);
4294 for (j = 0; j < 8; j++){
4298 if (!nkf_isdigit(buf[i])){
4305 c += hex2bin(buf[i]);
4311 return nkf_char_unicode_new(c);
4321 numchar_ungetc(nkf_char c, FILE *f)
4323 return (*i_nungetc)(c, f);
4327 #ifdef UNICODE_NORMALIZATION
4332 nkf_char (*g)(FILE *f) = i_nfc_getc;
4333 nkf_char (*u)(nkf_char c ,FILE *f) = i_nfc_ungetc;
4334 nkf_buf_t *buf = nkf_state->nfc_buf;
4335 const unsigned char *array;
4336 int lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
4337 nkf_char c = (*g)(f);
4339 if (c == EOF || c > 0xFF || (c & 0xc0) == 0x80) return c;
4341 nkf_buf_push(buf, c);
4343 while (lower <= upper) {
4344 int mid = (lower+upper) / 2;
4346 array = normalization_table[mid].nfd;
4347 for (len=0; len < NORMALIZATION_TABLE_NFD_LENGTH && array[len]; len++) {
4348 if (len >= nkf_buf_length(buf)) {
4352 lower = 1, upper = 0;
4355 nkf_buf_push(buf, c);
4357 if (array[len] != nkf_buf_at(buf, len)) {
4358 if (array[len] < nkf_buf_at(buf, len)) lower = mid + 1;
4359 else upper = mid - 1;
4366 array = normalization_table[mid].nfc;
4368 for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++)
4369 nkf_buf_push(buf, array[i]);
4373 } while (lower <= upper);
4375 while (nkf_buf_length(buf) > 1) (*u)(nkf_buf_pop(buf), f);
4376 c = nkf_buf_pop(buf);
4382 nfc_ungetc(nkf_char c, FILE *f)
4384 return (*i_nfc_ungetc)(c, f);
4386 #endif /* UNICODE_NORMALIZATION */
4390 base64decode(nkf_char c)
4395 i = c - 'A'; /* A..Z 0-25 */
4396 } else if (c == '_') {
4397 i = '?' /* 63 */ ; /* _ 63 */
4399 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
4401 } else if (c > '/') {
4402 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
4403 } else if (c == '+' || c == '-') {
4404 i = '>' /* 62 */ ; /* + and - 62 */
4406 i = '?' /* 63 */ ; /* / 63 */
4414 nkf_char c1, c2, c3, c4, cc;
4415 nkf_char t1, t2, t3, t4, mode, exit_mode;
4416 nkf_char lwsp_count;
4419 nkf_char lwsp_size = 128;
4421 if (mime_input_state.top != mime_input_state.last) { /* Something is in FIFO */
4422 return mime_input_buf(mime_input_state.top++);
4424 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
4425 mime_decode_mode=FALSE;
4426 unswitch_mime_getc();
4427 return (*i_getc)(f);
4430 if (mimebuf_f == FIXED_MIME)
4431 exit_mode = mime_decode_mode;
4434 if (mime_decode_mode == 'Q') {
4435 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4437 if (c1=='_' && mimebuf_f != FIXED_MIME) return SP;
4438 if (c1<=SP || DEL<=c1) {
4439 mime_decode_mode = exit_mode; /* prepare for quit */
4442 if (c1!='=' && (c1!='?' || mimebuf_f == FIXED_MIME)) {
4446 mime_decode_mode = exit_mode; /* prepare for quit */
4447 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
4448 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
4449 /* end Q encoding */
4450 input_mode = exit_mode;
4452 lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
4453 while ((c1=(*i_getc)(f))!=EOF) {
4458 if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4466 if ((c1=(*i_getc)(f))!=EOF && c1 == LF) {
4467 if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4482 lwsp_buf[lwsp_count] = (unsigned char)c1;
4483 if (lwsp_count++>lwsp_size){
4485 lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4486 lwsp_buf = lwsp_buf_new;
4492 if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
4494 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4495 i_ungetc(lwsp_buf[lwsp_count],f);
4498 nkf_xfree(lwsp_buf);
4501 if (c1=='='&&c2<SP) { /* this is soft wrap */
4502 while((c1 = (*i_mgetc)(f)) <=SP) {
4503 if (c1 == EOF) return (EOF);
4505 mime_decode_mode = 'Q'; /* still in MIME */
4506 goto restart_mime_q;
4509 mime_decode_mode = 'Q'; /* still in MIME */
4513 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
4514 if (c2<=SP) return c2;
4515 mime_decode_mode = 'Q'; /* still in MIME */
4516 return ((hex2bin(c2)<<4) + hex2bin(c3));
4519 if (mime_decode_mode != 'B') {
4520 mime_decode_mode = FALSE;
4521 return (*i_mgetc)(f);
4525 /* Base64 encoding */
4527 MIME allows line break in the middle of
4528 Base64, but we are very pessimistic in decoding
4529 in unbuf mode because MIME encoded code may broken by
4530 less or editor's control sequence (such as ESC-[-K in unbuffered
4531 mode. ignore incomplete MIME.
4533 mode = mime_decode_mode;
4534 mime_decode_mode = exit_mode; /* prepare for quit */
4536 while ((c1 = (*i_mgetc)(f))<=SP) {
4541 if ((c2 = (*i_mgetc)(f))<=SP) {
4544 if (mime_f != STRICT_MIME) goto mime_c2_retry;
4545 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4548 if ((c1 == '?') && (c2 == '=')) {
4551 lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
4552 while ((c1=(*i_getc)(f))!=EOF) {
4557 if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4565 if ((c1=(*i_getc)(f))!=EOF) {
4569 } else if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4584 lwsp_buf[lwsp_count] = (unsigned char)c1;
4585 if (lwsp_count++>lwsp_size){
4587 lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4588 lwsp_buf = lwsp_buf_new;
4594 if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
4596 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4597 i_ungetc(lwsp_buf[lwsp_count],f);
4600 nkf_xfree(lwsp_buf);
4604 if ((c3 = (*i_mgetc)(f))<=SP) {
4607 if (mime_f != STRICT_MIME) goto mime_c3_retry;
4608 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4612 if ((c4 = (*i_mgetc)(f))<=SP) {
4615 if (mime_f != STRICT_MIME) goto mime_c4_retry;
4616 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4620 mime_decode_mode = mode; /* still in MIME sigh... */
4622 /* BASE 64 decoding */
4624 t1 = 0x3f & base64decode(c1);
4625 t2 = 0x3f & base64decode(c2);
4626 t3 = 0x3f & base64decode(c3);
4627 t4 = 0x3f & base64decode(c4);
4628 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
4630 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
4631 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
4633 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
4634 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
4636 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
4641 return mime_input_buf(mime_input_state.top++);
4644 static const char basis_64[] =
4645 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
4647 #define MIMEOUT_BUF_LENGTH 74
4649 unsigned char buf[MIMEOUT_BUF_LENGTH+1];
4653 /*nkf_char mime_lastchar2, mime_lastchar1;*/
4656 open_mime(nkf_char mode)
4658 const unsigned char *p;
4661 p = mime_pattern[0];
4662 for(i=0;mime_pattern[i];i++) {
4663 if (mode == mime_encode[i]) {
4664 p = mime_pattern[i];
4668 mimeout_mode = mime_encode_method[i];
4670 if (base64_count>45) {
4671 if (mimeout_state.count>0 && nkf_isblank(mimeout_state.buf[i])){
4672 (*o_mputc)(mimeout_state.buf[i]);
4675 put_newline(o_mputc);
4678 if (mimeout_state.count>0 && nkf_isspace(mimeout_state.buf[i])) {
4682 for (;i<mimeout_state.count;i++) {
4683 if (nkf_isspace(mimeout_state.buf[i])) {
4684 (*o_mputc)(mimeout_state.buf[i]);
4694 j = mimeout_state.count;
4695 mimeout_state.count = 0;
4697 mime_putc(mimeout_state.buf[i]);
4702 mime_prechar(nkf_char c2, nkf_char c1)
4704 if (mimeout_mode > 0){
4706 if (base64_count + mimeout_state.count/3*4> 73){
4707 (*o_base64conv)(EOF,0);
4708 oconv_newline(o_base64conv);
4709 (*o_base64conv)(0,SP);
4713 if ((c2 != 0 || c1 > DEL) && base64_count + mimeout_state.count/3*4> 66) {
4714 (*o_base64conv)(EOF,0);
4715 oconv_newline(o_base64conv);
4716 (*o_base64conv)(0,SP);
4722 if (c2 != EOF && base64_count + mimeout_state.count/3*4> 60) {
4723 mimeout_mode = (output_mode==ASCII ||output_mode == ISO_8859_1) ? 'Q' : 'B';
4724 open_mime(output_mode);
4725 (*o_base64conv)(EOF,0);
4726 oconv_newline(o_base64conv);
4727 (*o_base64conv)(0,SP);
4746 switch(mimeout_mode) {
4751 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4)]);
4757 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2)]);
4762 if (mimeout_mode > 0) {
4763 if (mimeout_f!=FIXED_MIME) {
4765 } else if (mimeout_mode != 'Q')
4771 mimeout_addchar(nkf_char c)
4773 switch(mimeout_mode) {
4778 } else if(!nkf_isalnum(c)) {
4780 (*o_mputc)(bin2hex(((c>>4)&0xf)));
4781 (*o_mputc)(bin2hex((c&0xf)));
4789 nkf_state->mimeout_state=c;
4790 (*o_mputc)(basis_64[c>>2]);
4795 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
4796 nkf_state->mimeout_state=c;
4801 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2) | ((c & 0xC0) >>6)]);
4802 (*o_mputc)(basis_64[c & 0x3F]);
4814 mime_putc(nkf_char c)
4819 if (mimeout_f == FIXED_MIME){
4820 if (mimeout_mode == 'Q'){
4821 if (base64_count > 71){
4822 if (c!=CR && c!=LF) {
4824 put_newline(o_mputc);
4829 if (base64_count > 71){
4831 put_newline(o_mputc);
4834 if (c == EOF) { /* c==EOF */
4838 if (c != EOF) { /* c==EOF */
4844 /* mimeout_f != FIXED_MIME */
4846 if (c == EOF) { /* c==EOF */
4847 if (mimeout_mode == -1 && mimeout_state.count > 1) open_mime(output_mode);
4848 j = mimeout_state.count;
4849 mimeout_state.count = 0;
4851 if (mimeout_mode > 0) {
4852 if (!nkf_isblank(mimeout_state.buf[j-1])) {
4854 if (nkf_isspace(mimeout_state.buf[i]) && base64_count < 71){
4857 mimeout_addchar(mimeout_state.buf[i]);
4861 mimeout_addchar(mimeout_state.buf[i]);
4865 mimeout_addchar(mimeout_state.buf[i]);
4871 mimeout_addchar(mimeout_state.buf[i]);
4877 if (mimeout_state.count > 0){
4878 lastchar = mimeout_state.buf[mimeout_state.count - 1];
4883 if (mimeout_mode=='Q') {
4884 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
4885 if (c == CR || c == LF) {
4890 } else if (c <= SP) {
4892 if (base64_count > 70) {
4893 put_newline(o_mputc);
4896 if (!nkf_isblank(c)) {
4901 if (base64_count > 70) {
4903 put_newline(o_mputc);
4906 open_mime(output_mode);
4908 if (!nkf_noescape_mime(c)) {
4921 if (mimeout_mode <= 0) {
4922 if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 ||
4923 output_mode == UTF_8)) {
4924 if (nkf_isspace(c)) {
4926 if (mimeout_mode == -1) {
4929 if (c==CR || c==LF) {
4931 open_mime(output_mode);
4937 for (i=0;i<mimeout_state.count;i++) {
4938 (*o_mputc)(mimeout_state.buf[i]);
4939 if (mimeout_state.buf[i] == CR || mimeout_state.buf[i] == LF){
4950 mimeout_state.buf[0] = (char)c;
4951 mimeout_state.count = 1;
4953 if (base64_count > 1
4954 && base64_count + mimeout_state.count > 76
4955 && mimeout_state.buf[0] != CR && mimeout_state.buf[0] != LF){
4956 static const char *str = "boundary=\"";
4957 static int len = 10;
4960 for (; i < mimeout_state.count - len; ++i) {
4961 if (!strncmp((char *)(mimeout_state.buf+i), str, len)) {
4967 if (i == 0 || i == mimeout_state.count - len) {
4968 put_newline(o_mputc);
4970 if (!nkf_isspace(mimeout_state.buf[0])){
4977 for (j = 0; j <= i; ++j) {
4978 (*o_mputc)(mimeout_state.buf[j]);
4980 put_newline(o_mputc);
4982 for (; j <= mimeout_state.count; ++j) {
4983 mimeout_state.buf[j - i] = mimeout_state.buf[j];
4985 mimeout_state.count -= i;
4988 mimeout_state.buf[mimeout_state.count++] = (char)c;
4989 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
4990 open_mime(output_mode);
4995 if (lastchar==CR || lastchar == LF){
4996 for (i=0;i<mimeout_state.count;i++) {
4997 (*o_mputc)(mimeout_state.buf[i]);
5000 mimeout_state.count = 0;
5003 for (i=0;i<mimeout_state.count-1;i++) {
5004 (*o_mputc)(mimeout_state.buf[i]);
5007 mimeout_state.buf[0] = SP;
5008 mimeout_state.count = 1;
5010 open_mime(output_mode);
5013 /* mimeout_mode == 'B', 1, 2 */
5014 if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 ||
5015 output_mode == UTF_8)) {
5016 if (lastchar == CR || lastchar == LF){
5017 if (nkf_isblank(c)) {
5018 for (i=0;i<mimeout_state.count;i++) {
5019 mimeout_addchar(mimeout_state.buf[i]);
5021 mimeout_state.count = 0;
5024 for (i=0;i<mimeout_state.count;i++) {
5025 (*o_mputc)(mimeout_state.buf[i]);
5028 mimeout_state.count = 0;
5030 mimeout_state.buf[mimeout_state.count++] = (char)c;
5033 if (nkf_isspace(c)) {
5034 for (i=0;i<mimeout_state.count;i++) {
5035 if (SP<mimeout_state.buf[i] && mimeout_state.buf[i]<DEL) {
5037 for (i=0;i<mimeout_state.count;i++) {
5038 (*o_mputc)(mimeout_state.buf[i]);
5041 mimeout_state.count = 0;
5044 mimeout_state.buf[mimeout_state.count++] = (char)c;
5045 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
5047 for (i=0;i<mimeout_state.count;i++) {
5048 (*o_mputc)(mimeout_state.buf[i]);
5051 mimeout_state.count = 0;
5055 if (mimeout_state.count>0 && SP<c && c!='=') {
5056 mimeout_state.buf[mimeout_state.count++] = (char)c;
5057 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
5058 j = mimeout_state.count;
5059 mimeout_state.count = 0;
5061 mimeout_addchar(mimeout_state.buf[i]);
5068 if (mimeout_state.count>0) {
5069 j = mimeout_state.count;
5070 mimeout_state.count = 0;
5072 if (mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF)
5074 mimeout_addchar(mimeout_state.buf[i]);
5080 (*o_mputc)(mimeout_state.buf[i]);
5082 open_mime(output_mode);
5089 base64_conv(nkf_char c2, nkf_char c1)
5091 mime_prechar(c2, c1);
5092 (*o_base64conv)(c2,c1);
5096 typedef struct nkf_iconv_t {
5099 size_t input_buffer_size;
5100 char *output_buffer;
5101 size_t output_buffer_size;
5105 nkf_iconv_new(char *tocode, char *fromcode)
5107 nkf_iconv_t converter;
5109 converter->input_buffer_size = IOBUF_SIZE;
5110 converter->input_buffer = nkf_xmalloc(converter->input_buffer_size);
5111 converter->output_buffer_size = IOBUF_SIZE * 2;
5112 converter->output_buffer = nkf_xmalloc(converter->output_buffer_size);
5113 converter->cd = iconv_open(tocode, fromcode);
5114 if (converter->cd == (iconv_t)-1)
5118 perror(fprintf("iconv doesn't support %s to %s conversion.", fromcode, tocode));
5121 perror("can't iconv_open");
5127 nkf_iconv_convert(nkf_iconv_t *converter, FILE *input)
5129 size_t invalid = (size_t)0;
5130 char *input_buffer = converter->input_buffer;
5131 size_t input_length = (size_t)0;
5132 char *output_buffer = converter->output_buffer;
5133 size_t output_length = converter->output_buffer_size;
5138 while ((c = (*i_getc)(f)) != EOF) {
5139 input_buffer[input_length++] = c;
5140 if (input_length < converter->input_buffer_size) break;
5144 size_t ret = iconv(converter->cd, &input_buffer, &input_length, &output_buffer, &output_length);
5145 while (output_length-- > 0) {
5146 (*o_putc)(output_buffer[converter->output_buffer_size-output_length]);
5148 if (ret == (size_t) - 1) {
5151 if (input_buffer != converter->input_buffer)
5152 memmove(converter->input_buffer, input_buffer, input_length);
5155 converter->output_buffer_size *= 2;
5156 output_buffer = realloc(converter->outbuf, converter->output_buffer_size);
5157 if (output_buffer == NULL) {
5158 perror("can't realloc");
5161 converter->output_buffer = output_buffer;
5164 perror("can't iconv");
5177 nkf_iconv_close(nkf_iconv_t *convert)
5179 nkf_xfree(converter->inbuf);
5180 nkf_xfree(converter->outbuf);
5181 iconv_close(converter->cd);
5190 struct input_code *p = input_code_list;
5202 mime_f = MIME_DECODE_DEFAULT;
5203 mime_decode_f = FALSE;
5208 x0201_f = NKF_UNSPECIFIED;
5209 iso2022jp_f = FALSE;
5210 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
5211 ms_ucs_map_f = UCS_MAP_ASCII;
5213 #ifdef UTF8_INPUT_ENABLE
5214 no_cp932ext_f = FALSE;
5215 no_best_fit_chars_f = FALSE;
5216 encode_fallback = NULL;
5217 unicode_subchar = '?';
5218 input_endian = ENDIAN_BIG;
5220 #ifdef UTF8_OUTPUT_ENABLE
5221 output_bom_f = FALSE;
5222 output_endian = ENDIAN_BIG;
5224 #ifdef UNICODE_NORMALIZATION
5240 #ifdef SHIFTJIS_CP932
5250 for (i = 0; i < 256; i++){
5251 prefix_table[i] = 0;
5255 mimeout_state.count = 0;
5260 fold_preserve_f = FALSE;
5263 kanji_intro = DEFAULT_J;
5264 ascii_intro = DEFAULT_R;
5265 fold_margin = FOLD_MARGIN;
5266 o_zconv = no_connection;
5267 o_fconv = no_connection;
5268 o_eol_conv = no_connection;
5269 o_rot_conv = no_connection;
5270 o_hira_conv = no_connection;
5271 o_base64conv = no_connection;
5272 o_iso2022jp_check_conv = no_connection;
5275 i_ungetc = std_ungetc;
5277 i_bungetc = std_ungetc;
5280 i_mungetc = std_ungetc;
5281 i_mgetc_buf = std_getc;
5282 i_mungetc_buf = std_ungetc;
5283 output_mode = ASCII;
5285 mime_decode_mode = FALSE;
5291 z_prev2=0,z_prev1=0;
5293 iconv_for_check = 0;
5295 input_codename = NULL;
5296 input_encoding = NULL;
5297 output_encoding = NULL;
5305 module_connection(void)
5307 if (input_encoding) set_input_encoding(input_encoding);
5308 if (!output_encoding) {
5309 output_encoding = nkf_default_encoding();
5311 if (!output_encoding) {
5312 if (noout_f || guess_f) output_encoding = nkf_enc_from_index(ISO_2022_JP);
5315 set_output_encoding(output_encoding);
5316 oconv = nkf_enc_to_oconv(output_encoding);
5318 if (nkf_enc_unicode_p(output_encoding))
5319 output_mode = UTF_8;
5321 if (x0201_f == NKF_UNSPECIFIED) {
5322 x0201_f = X0201_DEFAULT;
5325 /* replace continucation module, from output side */
5327 /* output redicrection */
5329 if (noout_f || guess_f){
5336 if (mimeout_f == TRUE) {
5337 o_base64conv = oconv; oconv = base64_conv;
5339 /* base64_count = 0; */
5342 if (eolmode_f || guess_f) {
5343 o_eol_conv = oconv; oconv = eol_conv;
5346 o_rot_conv = oconv; oconv = rot_conv;
5349 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
5352 o_hira_conv = oconv; oconv = hira_conv;
5355 o_fconv = oconv; oconv = fold_conv;
5358 if (alpha_f || x0201_f) {
5359 o_zconv = oconv; oconv = z_conv;
5363 i_ungetc = std_ungetc;
5364 /* input redicrection */
5367 i_cgetc = i_getc; i_getc = cap_getc;
5368 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
5371 i_ugetc = i_getc; i_getc = url_getc;
5372 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
5375 #ifdef NUMCHAR_OPTION
5377 i_ngetc = i_getc; i_getc = numchar_getc;
5378 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
5381 #ifdef UNICODE_NORMALIZATION
5383 i_nfc_getc = i_getc; i_getc = nfc_getc;
5384 i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
5387 if (mime_f && mimebuf_f==FIXED_MIME) {
5388 i_mgetc = i_getc; i_getc = mime_getc;
5389 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
5392 i_bgetc = i_getc; i_getc = broken_getc;
5393 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
5395 if (input_encoding) {
5396 set_iconv(-TRUE, nkf_enc_to_iconv(input_encoding));
5398 set_iconv(FALSE, e_iconv);
5402 struct input_code *p = input_code_list;
5411 Conversion main loop. Code detection only.
5414 #if !defined(PERL_XS) && !defined(WIN32DLL)
5421 module_connection();
5422 while ((c = (*i_getc)(f)) != EOF)
5429 #define NEXT continue /* no output, get next */
5430 #define SKIP c2=0;continue /* no output, get next */
5431 #define MORE c2=c1;continue /* need one more byte */
5432 #define SEND (void)0 /* output c1 and c2, get next */
5433 #define LAST break /* end of loop, go closing */
5434 #define set_input_mode(mode) do { \
5435 input_mode = mode; \
5437 set_input_codename("ISO-2022-JP"); \
5438 debug("ISO-2022-JP"); \
5442 kanji_convert(FILE *f)
5444 nkf_char c1=0, c2=0, c3=0, c4=0;
5445 int shift_mode = 0; /* 0, 1, 2, 3 */
5447 int is_8bit = FALSE;
5449 if (input_encoding && !nkf_enc_asciicompat(input_encoding)) {
5454 output_mode = ASCII;
5456 if (module_connection() < 0) {
5457 #if !defined(PERL_XS) && !defined(WIN32DLL)
5458 fprintf(stderr, "no output encoding given\n");
5464 #ifdef UTF8_INPUT_ENABLE
5465 if(iconv == w_iconv32){
5466 while ((c1 = (*i_getc)(f)) != EOF &&
5467 (c2 = (*i_getc)(f)) != EOF &&
5468 (c3 = (*i_getc)(f)) != EOF &&
5469 (c4 = (*i_getc)(f)) != EOF) {
5470 nkf_iconv_utf_32(c1, c2, c3, c4);
5474 else if (iconv == w_iconv16) {
5475 while ((c1 = (*i_getc)(f)) != EOF &&
5476 (c2 = (*i_getc)(f)) != EOF) {
5477 if (nkf_iconv_utf_16(c1, c2, 0, 0) == NKF_ICONV_NEED_TWO_MORE_BYTES &&
5478 (c3 = (*i_getc)(f)) != EOF &&
5479 (c4 = (*i_getc)(f)) != EOF) {
5480 nkf_iconv_utf_16(c1, c2, c3, c4);
5487 while ((c1 = (*i_getc)(f)) != EOF) {
5488 #ifdef INPUT_CODE_FIX
5489 if (!input_encoding)
5494 if (c2 > ((input_encoding && nkf_enc_cp5022x_p(input_encoding)) ? 0x92 : DEL)) {
5495 /* in case of 8th bit is on */
5496 if (!estab_f&&!mime_decode_mode) {
5497 /* in case of not established yet */
5498 /* It is still ambiguious */
5499 if (h_conv(f, c2, c1)==EOF) {
5507 /* in case of already established */
5509 /* ignore bogus code */
5517 /* 2nd byte of 7 bit code or SJIS */
5521 else if (nkf_char_unicode_p(c1)) {
5527 if (input_mode == JIS_X_0208 && DEL <= c1 && c1 < 0x92) {
5530 }else if (input_codename && input_codename[0] == 'I' &&
5531 0xA1 <= c1 && c1 <= 0xDF) {
5532 /* JIS X 0201 Katakana in 8bit JIS */
5533 c2 = JIS_X_0201_1976_K;
5536 } else if (c1 > DEL) {
5538 if (!estab_f && !iso8859_f) {
5539 /* not established yet */
5541 } else { /* estab_f==TRUE */
5547 else if ((iconv == s_iconv && 0xA0 <= c1 && c1 <= 0xDF) ||
5548 (ms_ucs_map_f == UCS_MAP_CP10001 && (c1 == 0xFD || c1 == 0xFE))) {
5550 c2 = JIS_X_0201_1976_K;
5555 /* already established */
5559 } else if (SP < c1 && c1 < DEL) {
5560 /* in case of Roman characters */
5562 /* output 1 shifted byte */
5566 } else if (nkf_byte_jisx0201_katakana_p(c1)){
5567 /* output 1 shifted byte */
5568 c2 = JIS_X_0201_1976_K;
5571 /* look like bogus code */
5574 } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
5575 input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) {
5576 /* in case of Kanji shifted */
5578 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
5579 /* Check MIME code */
5580 if ((c1 = (*i_getc)(f)) == EOF) {
5583 } else if (c1 == '?') {
5584 /* =? is mime conversion start sequence */
5585 if(mime_f == STRICT_MIME) {
5586 /* check in real detail */
5587 if (mime_begin_strict(f) == EOF)
5590 } else if (mime_begin(f) == EOF)
5599 /* normal ASCII code */
5602 } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
5605 } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
5608 } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
5609 if ((c1 = (*i_getc)(f)) == EOF) {
5613 else if (c1 == '&') {
5615 if ((c1 = (*i_getc)(f)) == EOF) {
5621 else if (c1 == '$') {
5623 if ((c1 = (*i_getc)(f)) == EOF) {
5624 /* don't send bogus code
5626 (*oconv)(0, '$'); */
5628 } else if (c1 == '@' || c1 == 'B') {
5630 set_input_mode(JIS_X_0208);
5632 } else if (c1 == '(') {
5634 if ((c1 = (*i_getc)(f)) == EOF) {
5635 /* don't send bogus code
5641 } else if (c1 == '@'|| c1 == 'B') {
5643 set_input_mode(JIS_X_0208);
5646 } else if (c1 == 'D'){
5647 set_input_mode(JIS_X_0212);
5649 #endif /* X0212_ENABLE */
5650 } else if (c1 == 'O' || c1 == 'Q'){
5651 set_input_mode(JIS_X_0213_1);
5653 } else if (c1 == 'P'){
5654 set_input_mode(JIS_X_0213_2);
5657 /* could be some special code */
5664 } else if (broken_f&0x2) {
5665 /* accept any ESC-(-x as broken code ... */
5666 input_mode = JIS_X_0208;
5675 } else if (c1 == '(') {
5677 if ((c1 = (*i_getc)(f)) == EOF) {
5678 /* don't send bogus code
5680 (*oconv)(0, '('); */
5683 else if (c1 == 'I') {
5684 /* JIS X 0201 Katakana */
5685 set_input_mode(JIS_X_0201_1976_K);
5688 else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
5689 /* ISO-646IRV:1983 or JIS X 0201 Roman or JUNET */
5690 set_input_mode(ASCII);
5693 else if (broken_f&0x2) {
5694 set_input_mode(ASCII);
5703 else if (c1 == '.') {
5705 if ((c1 = (*i_getc)(f)) == EOF) {
5708 else if (c1 == 'A') {
5719 else if (c1 == 'N') {
5722 if (g2 == ISO_8859_1) {
5737 } else if (c1 == ESC && iconv == s_iconv) {
5738 /* ESC in Shift_JIS */
5739 if ((c1 = (*i_getc)(f)) == EOF) {
5742 } else if (c1 == '$') {
5744 if ((c1 = (*i_getc)(f)) == EOF) {
5746 } else if (('E' <= c1 && c1 <= 'G') ||
5747 ('O' <= c1 && c1 <= 'Q')) {
5755 static const nkf_char jphone_emoji_first_table[7] =
5756 {0xE1E0, 0xDFE0, 0xE2E0, 0xE3E0, 0xE4E0, 0xDFE0, 0xE0E0};
5757 c3 = nkf_char_unicode_new(jphone_emoji_first_table[c1 % 7]);
5758 if ((c1 = (*i_getc)(f)) == EOF) LAST;
5759 while (SP <= c1 && c1 <= 'z') {
5760 (*oconv)(0, c1 + c3);
5761 if ((c1 = (*i_getc)(f)) == EOF) LAST;
5776 } else if (c1 == LF || c1 == CR) {
5778 input_mode = ASCII; set_iconv(FALSE, 0);
5780 } else if (mime_decode_f && !mime_decode_mode){
5782 if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
5790 } else { /* if (c1 == CR)*/
5791 if ((c1=(*i_getc)(f))!=EOF) {
5795 } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
5815 switch ((*iconv)(c2, c1, 0)) { /* can be EUC / SJIS / UTF-8 */
5818 if ((c3 = (*i_getc)(f)) != EOF) {
5821 if ((c4 = (*i_getc)(f)) != EOF) {
5823 (*iconv)(c2, c1, c3|c4);
5828 /* 3 bytes EUC or UTF-8 */
5829 if ((c3 = (*i_getc)(f)) != EOF) {
5831 (*iconv)(c2, c1, c3);
5839 0x7F <= c2 && c2 <= 0x92 &&
5840 0x21 <= c1 && c1 <= 0x7E) {
5842 c1 = nkf_char_unicode_new((c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000);
5845 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
5849 (*oconv)(PREFIX_EUCG3 | c2, c1);
5851 #endif /* X0212_ENABLE */
5853 (*oconv)(PREFIX_EUCG3 | c2, c1);
5856 (*oconv)(input_mode, c1); /* other special case */
5862 /* goto next_word */
5867 (*iconv)(EOF, 0, 0);
5868 if (!input_codename)
5871 struct input_code *p = input_code_list;
5872 struct input_code *result = p;
5874 if (p->score < result->score) result = p;
5877 set_input_codename(result->name);
5879 debug(result->name);
5887 * int options(unsigned char *cp)
5894 options(unsigned char *cp)
5898 unsigned char *cp_back = NULL;
5903 while(*cp && *cp++!='-');
5904 while (*cp || cp_back) {
5912 case '-': /* literal options */
5913 if (!*cp || *cp == SP) { /* ignore the rest of arguments */
5917 for (i=0;i<(int)(sizeof(long_option)/sizeof(long_option[0]));i++) {
5918 p = (unsigned char *)long_option[i].name;
5919 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
5920 if (*p == cp[j] || cp[j] == SP){
5927 #if !defined(PERL_XS) && !defined(WIN32DLL)
5928 fprintf(stderr, "unknown long option: --%s\n", cp);
5932 while(*cp && *cp != SP && cp++);
5933 if (long_option[i].alias[0]){
5935 cp = (unsigned char *)long_option[i].alias;
5938 if (strcmp(long_option[i].name, "help") == 0){
5943 if (strcmp(long_option[i].name, "ic=") == 0){
5944 enc = nkf_enc_find((char *)p);
5946 input_encoding = enc;
5949 if (strcmp(long_option[i].name, "oc=") == 0){
5950 enc = nkf_enc_find((char *)p);
5951 /* if (enc <= 0) continue; */
5953 output_encoding = enc;
5956 if (strcmp(long_option[i].name, "guess=") == 0){
5957 if (p[0] == '0' || p[0] == '1') {
5965 if (strcmp(long_option[i].name, "overwrite") == 0){
5968 preserve_time_f = TRUE;
5971 if (strcmp(long_option[i].name, "overwrite=") == 0){
5974 preserve_time_f = TRUE;
5976 backup_suffix = (char *)p;
5979 if (strcmp(long_option[i].name, "in-place") == 0){
5982 preserve_time_f = FALSE;
5985 if (strcmp(long_option[i].name, "in-place=") == 0){
5988 preserve_time_f = FALSE;
5990 backup_suffix = (char *)p;
5995 if (strcmp(long_option[i].name, "cap-input") == 0){
5999 if (strcmp(long_option[i].name, "url-input") == 0){
6004 #ifdef NUMCHAR_OPTION
6005 if (strcmp(long_option[i].name, "numchar-input") == 0){
6011 if (strcmp(long_option[i].name, "no-output") == 0){
6015 if (strcmp(long_option[i].name, "debug") == 0){
6020 if (strcmp(long_option[i].name, "cp932") == 0){
6021 #ifdef SHIFTJIS_CP932
6025 #ifdef UTF8_OUTPUT_ENABLE
6026 ms_ucs_map_f = UCS_MAP_CP932;
6030 if (strcmp(long_option[i].name, "no-cp932") == 0){
6031 #ifdef SHIFTJIS_CP932
6035 #ifdef UTF8_OUTPUT_ENABLE
6036 ms_ucs_map_f = UCS_MAP_ASCII;
6040 #ifdef SHIFTJIS_CP932
6041 if (strcmp(long_option[i].name, "cp932inv") == 0){
6048 if (strcmp(long_option[i].name, "x0212") == 0){
6055 if (strcmp(long_option[i].name, "exec-in") == 0){
6059 if (strcmp(long_option[i].name, "exec-out") == 0){
6064 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
6065 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
6066 no_cp932ext_f = TRUE;
6069 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
6070 no_best_fit_chars_f = TRUE;
6073 if (strcmp(long_option[i].name, "fb-skip") == 0){
6074 encode_fallback = NULL;
6077 if (strcmp(long_option[i].name, "fb-html") == 0){
6078 encode_fallback = encode_fallback_html;
6081 if (strcmp(long_option[i].name, "fb-xml") == 0){
6082 encode_fallback = encode_fallback_xml;
6085 if (strcmp(long_option[i].name, "fb-java") == 0){
6086 encode_fallback = encode_fallback_java;
6089 if (strcmp(long_option[i].name, "fb-perl") == 0){
6090 encode_fallback = encode_fallback_perl;
6093 if (strcmp(long_option[i].name, "fb-subchar") == 0){
6094 encode_fallback = encode_fallback_subchar;
6097 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
6098 encode_fallback = encode_fallback_subchar;
6099 unicode_subchar = 0;
6101 /* decimal number */
6102 for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
6103 unicode_subchar *= 10;
6104 unicode_subchar += hex2bin(p[i]);
6106 }else if(p[1] == 'x' || p[1] == 'X'){
6107 /* hexadecimal number */
6108 for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
6109 unicode_subchar <<= 4;
6110 unicode_subchar |= hex2bin(p[i]);
6114 for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
6115 unicode_subchar *= 8;
6116 unicode_subchar += hex2bin(p[i]);
6119 w16e_conv(unicode_subchar, &i, &j);
6120 unicode_subchar = i<<8 | j;
6124 #ifdef UTF8_OUTPUT_ENABLE
6125 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
6126 ms_ucs_map_f = UCS_MAP_MS;
6130 #ifdef UNICODE_NORMALIZATION
6131 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
6136 if (strcmp(long_option[i].name, "prefix=") == 0){
6137 if (nkf_isgraph(p[0])){
6138 for (i = 1; nkf_isgraph(p[i]); i++){
6139 prefix_table[p[i]] = p[0];
6144 #if !defined(PERL_XS) && !defined(WIN32DLL)
6145 fprintf(stderr, "unsupported long option: --%s\n", long_option[i].name);
6150 case 'b': /* buffered mode */
6153 case 'u': /* non bufferd mode */
6156 case 't': /* transparent mode */
6161 } else if (*cp=='2') {
6165 * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
6173 case 'j': /* JIS output */
6175 output_encoding = nkf_enc_from_index(ISO_2022_JP);
6177 case 'e': /* AT&T EUC output */
6178 output_encoding = nkf_enc_from_index(EUCJP_NKF);
6180 case 's': /* SJIS output */
6181 output_encoding = nkf_enc_from_index(SHIFT_JIS);
6183 case 'l': /* ISO8859 Latin-1 support, no conversion */
6184 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
6185 input_encoding = nkf_enc_from_index(ISO_8859_1);
6187 case 'i': /* Kanji IN ESC-$-@/B */
6188 if (*cp=='@'||*cp=='B')
6189 kanji_intro = *cp++;
6191 case 'o': /* ASCII IN ESC-(-J/B/H */
6192 /* ESC ( H was used in initial JUNET messages */
6193 if (*cp=='J'||*cp=='B'||*cp=='H')
6194 ascii_intro = *cp++;
6198 bit:1 katakana->hiragana
6199 bit:2 hiragana->katakana
6201 if ('9'>= *cp && *cp>='0')
6202 hira_f |= (*cp++ -'0');
6209 #if defined(MSDOS) || defined(__OS2__)
6216 show_configuration();
6224 #ifdef UTF8_OUTPUT_ENABLE
6225 case 'w': /* UTF-{8,16,32} output */
6230 output_encoding = nkf_enc_from_index(UTF_8N);
6232 output_bom_f = TRUE;
6233 output_encoding = nkf_enc_from_index(UTF_8_BOM);
6237 if ('1'== cp[0] && '6'==cp[1]) {
6240 } else if ('3'== cp[0] && '2'==cp[1]) {
6244 output_encoding = nkf_enc_from_index(UTF_8);
6249 output_endian = ENDIAN_LITTLE;
6250 output_bom_f = TRUE;
6251 } else if (cp[0] == 'B') {
6253 output_bom_f = TRUE;
6256 output_bom_f = FALSE;
6258 enc_idx = enc_idx == UTF_16
6259 ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
6260 : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
6262 enc_idx = enc_idx == UTF_16
6263 ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
6264 : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
6266 output_encoding = nkf_enc_from_index(enc_idx);
6270 #ifdef UTF8_INPUT_ENABLE
6271 case 'W': /* UTF input */
6274 input_encoding = nkf_enc_from_index(UTF_8);
6277 if ('1'== cp[0] && '6'==cp[1]) {
6279 input_endian = ENDIAN_BIG;
6281 } else if ('3'== cp[0] && '2'==cp[1]) {
6283 input_endian = ENDIAN_BIG;
6286 input_encoding = nkf_enc_from_index(UTF_8);
6291 input_endian = ENDIAN_LITTLE;
6292 } else if (cp[0] == 'B') {
6294 input_endian = ENDIAN_BIG;
6296 enc_idx = (enc_idx == UTF_16
6297 ? (input_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
6298 : (input_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE));
6299 input_encoding = nkf_enc_from_index(enc_idx);
6303 /* Input code assumption */
6304 case 'J': /* ISO-2022-JP input */
6305 input_encoding = nkf_enc_from_index(ISO_2022_JP);
6307 case 'E': /* EUC-JP input */
6308 input_encoding = nkf_enc_from_index(EUCJP_NKF);
6310 case 'S': /* Shift_JIS input */
6311 input_encoding = nkf_enc_from_index(SHIFT_JIS);
6313 case 'Z': /* Convert X0208 alphabet to asii */
6315 bit:0 Convert JIS X 0208 Alphabet to ASCII
6316 bit:1 Convert Kankaku to one space
6317 bit:2 Convert Kankaku to two spaces
6318 bit:3 Convert HTML Entity
6319 bit:4 Convert JIS X 0208 Katakana to JIS X 0201 Katakana
6321 while ('0'<= *cp && *cp <='4') {
6322 alpha_f |= 1 << (*cp++ - '0');
6326 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
6327 x0201_f = FALSE; /* No X0201->X0208 conversion */
6329 ESC-(-I in JIS, EUC, MS Kanji
6330 SI/SO in JIS, EUC, MS Kanji
6331 SS2 in EUC, JIS, not in MS Kanji
6332 MS Kanji (0xa0-0xdf)
6334 ESC-(-I in JIS (0x20-0x5f)
6335 SS2 in EUC (0xa0-0xdf)
6336 0xa0-0xd in MS Kanji (0xa0-0xdf)
6339 case 'X': /* Convert X0201 kana to X0208 */
6342 case 'F': /* prserve new lines */
6343 fold_preserve_f = TRUE;
6344 case 'f': /* folding -f60 or -f */
6347 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
6349 fold_len += *cp++ - '0';
6351 if (!(0<fold_len && fold_len<BUFSIZ))
6352 fold_len = DEFAULT_FOLD;
6356 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
6358 fold_margin += *cp++ - '0';
6362 case 'm': /* MIME support */
6363 /* mime_decode_f = TRUE; */ /* this has too large side effects... */
6364 if (*cp=='B'||*cp=='Q') {
6365 mime_decode_mode = *cp++;
6366 mimebuf_f = FIXED_MIME;
6367 } else if (*cp=='N') {
6368 mime_f = TRUE; cp++;
6369 } else if (*cp=='S') {
6370 mime_f = STRICT_MIME; cp++;
6371 } else if (*cp=='0') {
6372 mime_decode_f = FALSE;
6373 mime_f = FALSE; cp++;
6375 mime_f = STRICT_MIME;
6378 case 'M': /* MIME output */
6381 mimeout_f = FIXED_MIME; cp++;
6382 } else if (*cp=='Q') {
6384 mimeout_f = FIXED_MIME; cp++;
6389 case 'B': /* Broken JIS support */
6391 bit:1 allow any x on ESC-(-x or ESC-$-x
6392 bit:2 reset to ascii on NL
6394 if ('9'>= *cp && *cp>='0')
6395 broken_f |= 1<<(*cp++ -'0');
6400 case 'O':/* for Output file */
6404 case 'c':/* add cr code */
6407 case 'd':/* delete cr code */
6410 case 'I': /* ISO-2022-JP output */
6413 case 'L': /* line mode */
6414 if (*cp=='u') { /* unix */
6415 eolmode_f = LF; cp++;
6416 } else if (*cp=='m') { /* mac */
6417 eolmode_f = CR; cp++;
6418 } else if (*cp=='w') { /* windows */
6419 eolmode_f = CRLF; cp++;
6420 } else if (*cp=='0') { /* no conversion */
6421 eolmode_f = 0; cp++;
6426 if ('2' <= *cp && *cp <= '9') {
6429 } else if (*cp == '0' || *cp == '1') {
6438 /* module muliple options in a string are allowed for Perl moudle */
6439 while(*cp && *cp++!='-');
6442 #if !defined(PERL_XS) && !defined(WIN32DLL)
6443 fprintf(stderr, "unknown option: -%c\n", *(cp-1));
6445 /* bogus option but ignored */
6453 #include "nkf32dll.c"
6454 #elif defined(PERL_XS)
6455 #else /* WIN32DLL */
6457 main(int argc, char **argv)
6462 char *outfname = NULL;
6465 #ifdef EASYWIN /*Easy Win */
6466 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
6468 #ifdef DEFAULT_CODE_LOCALE
6469 setlocale(LC_CTYPE, "");
6473 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
6474 cp = (unsigned char *)*argv;
6479 if (pipe(fds) < 0 || (pid = fork()) < 0){
6490 execvp(argv[1], &argv[1]);
6507 int debug_f_back = debug_f;
6510 int exec_f_back = exec_f;
6513 int x0212_f_back = x0212_f;
6515 int x0213_f_back = x0213_f;
6516 int guess_f_back = guess_f;
6518 guess_f = guess_f_back;
6521 debug_f = debug_f_back;
6524 exec_f = exec_f_back;
6526 x0212_f = x0212_f_back;
6527 x0213_f = x0213_f_back;
6530 if (binmode_f == TRUE)
6531 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6532 if (freopen("","wb",stdout) == NULL)
6539 setbuf(stdout, (char *) NULL);
6541 setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
6544 if (binmode_f == TRUE)
6545 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6546 if (freopen("","rb",stdin) == NULL) return (-1);
6550 setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
6554 kanji_convert(stdin);
6555 if (guess_f) print_guessed_code(NULL);
6559 int is_argument_error = FALSE;
6561 input_codename = NULL;
6564 iconv_for_check = 0;
6566 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
6568 is_argument_error = TRUE;
6576 /* reopen file for stdout */
6577 if (file_out_f == TRUE) {
6580 outfname = nkf_xmalloc(strlen(origfname)
6581 + strlen(".nkftmpXXXXXX")
6583 strcpy(outfname, origfname);
6587 for (i = strlen(outfname); i; --i){
6588 if (outfname[i - 1] == '/'
6589 || outfname[i - 1] == '\\'){
6595 strcat(outfname, "ntXXXXXX");
6597 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
6598 S_IREAD | S_IWRITE);
6600 strcat(outfname, ".nkftmpXXXXXX");
6601 fd = mkstemp(outfname);
6604 || (fd_backup = dup(fileno(stdout))) < 0
6605 || dup2(fd, fileno(stdout)) < 0
6616 outfname = "nkf.out";
6619 if(freopen(outfname, "w", stdout) == NULL) {
6623 if (binmode_f == TRUE) {
6624 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6625 if (freopen("","wb",stdout) == NULL)
6632 if (binmode_f == TRUE)
6633 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6634 if (freopen("","rb",fin) == NULL)
6639 setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
6643 char *filename = NULL;
6645 if (nfiles > 1) filename = origfname;
6646 if (guess_f) print_guessed_code(filename);
6652 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
6660 if (dup2(fd_backup, fileno(stdout)) < 0){
6663 if (stat(origfname, &sb)) {
6664 fprintf(stderr, "Can't stat %s\n", origfname);
6666 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
6667 if (chmod(outfname, sb.st_mode)) {
6668 fprintf(stderr, "Can't set permission %s\n", outfname);
6671 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
6672 if(preserve_time_f){
6673 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
6674 tb[0] = tb[1] = sb.st_mtime;
6675 if (utime(outfname, tb)) {
6676 fprintf(stderr, "Can't set timestamp %s\n", outfname);
6679 tb.actime = sb.st_atime;
6680 tb.modtime = sb.st_mtime;
6681 if (utime(outfname, &tb)) {
6682 fprintf(stderr, "Can't set timestamp %s\n", outfname);
6687 char *backup_filename = get_backup_filename(backup_suffix, origfname);
6689 unlink(backup_filename);
6691 if (rename(origfname, backup_filename)) {
6692 perror(backup_filename);
6693 fprintf(stderr, "Can't rename %s to %s\n",
6694 origfname, backup_filename);
6696 nkf_xfree(backup_filename);
6699 if (unlink(origfname)){
6704 if (rename(outfname, origfname)) {
6706 fprintf(stderr, "Can't rename %s to %s\n",
6707 outfname, origfname);
6709 nkf_xfree(outfname);
6714 if (is_argument_error)
6717 #ifdef EASYWIN /*Easy Win */
6718 if (file_out_f == FALSE)
6719 scanf("%d",&end_check);
6722 #else /* for Other OS */
6723 if (file_out_f == TRUE)
6725 #endif /*Easy Win */
6728 #endif /* WIN32DLL */