1 /** Network Kanji Filter. (PDS Version)
2 ** -*- coding: ISO-2022-JP -*-
3 ************************************************************************
4 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
5 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
6 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
7 ** Copyright (C) 1996,1998
9 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
10 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
11 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
12 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
14 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
15 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
16 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
17 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
18 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
19 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
21 ** Everyone is permitted to do anything on this program
22 ** including copying, modifying, improving,
23 ** as long as you don't try to pretend that you wrote it.
24 ** i.e., the above copyright notice has to appear in all copies.
25 ** Binary distribution requires original version messages.
26 ** You don't have to ask before copying, redistribution or publishing.
27 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
28 ***********************************************************************/
30 /***********************************************************************
31 *
\e$B8=:_!"
\e(Bnkf
\e$B$O
\e(B SorceForge
\e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#
\e(B
32 * http://sourceforge.jp/projects/nkf/
33 ***********************************************************************/
34 #define NKF_VERSION "2.0.8"
35 #define NKF_RELEASE_DATE "2009-01-05"
37 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
38 "Copyright (C) 2002-2009 Kono, Furukawa, Naruse, mastodon"
49 # define INCL_DOSERRORS
55 /* state of output_mode and input_mode
134 NKF_ENCODING_TABLE_SIZE,
135 JIS_X_0201_1976_K = 0x1013, /* I */ /* JIS C 6220-1969 */
136 /* JIS_X_0201_1976_R = 0x1014, */ /* J */ /* JIS C 6220-1969 */
137 /* JIS_X_0208_1978 = 0x1040, */ /* @ */ /* JIS C 6226-1978 */
138 /* JIS_X_0208_1983 = 0x1087, */ /* B */ /* JIS C 6226-1983 */
139 JIS_X_0208 = 0x1168, /* @B */
140 JIS_X_0212 = 0x1159, /* D */
141 /* JIS_X_0213_2000_1 = 0x1228, */ /* O */
142 JIS_X_0213_2 = 0x1229, /* P */
143 JIS_X_0213_1 = 0x1233 /* Q */
146 static nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
147 static nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
148 static nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
149 static nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
150 static nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
151 static void j_oconv(nkf_char c2, nkf_char c1);
152 static void s_oconv(nkf_char c2, nkf_char c1);
153 static void e_oconv(nkf_char c2, nkf_char c1);
154 static void w_oconv(nkf_char c2, nkf_char c1);
155 static void w_oconv16(nkf_char c2, nkf_char c1);
156 static void w_oconv32(nkf_char c2, nkf_char c1);
160 nkf_char (*iconv)(nkf_char c2, nkf_char c1, nkf_char c0);
161 void (*oconv)(nkf_char c2, nkf_char c1);
162 } nkf_native_encoding;
164 nkf_native_encoding NkfEncodingASCII = { "ASCII", e_iconv, e_oconv };
165 nkf_native_encoding NkfEncodingISO_2022_JP = { "ISO-2022-JP", e_iconv, j_oconv };
166 nkf_native_encoding NkfEncodingShift_JIS = { "Shift_JIS", s_iconv, s_oconv };
167 nkf_native_encoding NkfEncodingEUC_JP = { "EUC-JP", e_iconv, e_oconv };
168 nkf_native_encoding NkfEncodingUTF_8 = { "UTF-8", w_iconv, w_oconv };
169 nkf_native_encoding NkfEncodingUTF_16 = { "UTF-16", w_iconv16, w_oconv16 };
170 nkf_native_encoding NkfEncodingUTF_32 = { "UTF-32", w_iconv32, w_oconv32 };
175 const nkf_native_encoding *base_encoding;
178 nkf_encoding nkf_encoding_table[] = {
179 {ASCII, "US-ASCII", &NkfEncodingASCII},
180 {ISO_8859_1, "ISO-8859-1", &NkfEncodingASCII},
181 {ISO_2022_JP, "ISO-2022-JP", &NkfEncodingISO_2022_JP},
182 {CP50220, "CP50220", &NkfEncodingISO_2022_JP},
183 {CP50221, "CP50221", &NkfEncodingISO_2022_JP},
184 {CP50222, "CP50222", &NkfEncodingISO_2022_JP},
185 {ISO_2022_JP_1, "ISO-2022-JP-1", &NkfEncodingISO_2022_JP},
186 {ISO_2022_JP_3, "ISO-2022-JP-3", &NkfEncodingISO_2022_JP},
187 {ISO_2022_JP_2004, "ISO-2022-JP-2004", &NkfEncodingISO_2022_JP},
188 {SHIFT_JIS, "Shift_JIS", &NkfEncodingShift_JIS},
189 {WINDOWS_31J, "Windows-31J", &NkfEncodingShift_JIS},
190 {CP10001, "CP10001", &NkfEncodingShift_JIS},
191 {EUC_JP, "EUC-JP", &NkfEncodingEUC_JP},
192 {EUCJP_NKF, "eucJP-nkf", &NkfEncodingEUC_JP},
193 {CP51932, "CP51932", &NkfEncodingEUC_JP},
194 {EUCJP_MS, "eucJP-MS", &NkfEncodingEUC_JP},
195 {EUCJP_ASCII, "eucJP-ASCII", &NkfEncodingEUC_JP},
196 {SHIFT_JISX0213, "Shift_JISX0213", &NkfEncodingShift_JIS},
197 {SHIFT_JIS_2004, "Shift_JIS-2004", &NkfEncodingShift_JIS},
198 {EUC_JISX0213, "EUC-JISX0213", &NkfEncodingEUC_JP},
199 {EUC_JIS_2004, "EUC-JIS-2004", &NkfEncodingEUC_JP},
200 {UTF_8, "UTF-8", &NkfEncodingUTF_8},
201 {UTF_8N, "UTF-8N", &NkfEncodingUTF_8},
202 {UTF_8_BOM, "UTF-8-BOM", &NkfEncodingUTF_8},
203 {UTF8_MAC, "UTF8-MAC", &NkfEncodingUTF_8},
204 {UTF_16, "UTF-16", &NkfEncodingUTF_16},
205 {UTF_16BE, "UTF-16BE", &NkfEncodingUTF_16},
206 {UTF_16BE_BOM, "UTF-16BE-BOM", &NkfEncodingUTF_16},
207 {UTF_16LE, "UTF-16LE", &NkfEncodingUTF_16},
208 {UTF_16LE_BOM, "UTF-16LE-BOM", &NkfEncodingUTF_16},
209 {UTF_32, "UTF-32", &NkfEncodingUTF_32},
210 {UTF_32BE, "UTF-32BE", &NkfEncodingUTF_32},
211 {UTF_32BE_BOM, "UTF-32BE-BOM", &NkfEncodingUTF_32},
212 {UTF_32LE, "UTF-32LE", &NkfEncodingUTF_32},
213 {UTF_32LE_BOM, "UTF-32LE-BOM", &NkfEncodingUTF_32},
214 {BINARY, "BINARY", &NkfEncodingASCII},
221 } encoding_name_to_id_table[] = {
224 {"ISO-2022-JP", ISO_2022_JP},
225 {"ISO2022JP-CP932", CP50220},
226 {"CP50220", CP50220},
227 {"CP50221", CP50221},
228 {"CSISO2022JP", CP50221},
229 {"CP50222", CP50222},
230 {"ISO-2022-JP-1", ISO_2022_JP_1},
231 {"ISO-2022-JP-3", ISO_2022_JP_3},
232 {"ISO-2022-JP-2004", ISO_2022_JP_2004},
233 {"SHIFT_JIS", SHIFT_JIS},
235 {"WINDOWS-31J", WINDOWS_31J},
236 {"CSWINDOWS31J", WINDOWS_31J},
237 {"CP932", WINDOWS_31J},
238 {"MS932", WINDOWS_31J},
239 {"CP10001", CP10001},
242 {"EUCJP-NKF", EUCJP_NKF},
243 {"CP51932", CP51932},
244 {"EUC-JP-MS", EUCJP_MS},
245 {"EUCJP-MS", EUCJP_MS},
246 {"EUCJPMS", EUCJP_MS},
247 {"EUC-JP-ASCII", EUCJP_ASCII},
248 {"EUCJP-ASCII", EUCJP_ASCII},
249 {"SHIFT_JISX0213", SHIFT_JISX0213},
250 {"SHIFT_JIS-2004", SHIFT_JIS_2004},
251 {"EUC-JISX0213", EUC_JISX0213},
252 {"EUC-JIS-2004", EUC_JIS_2004},
255 {"UTF-8-BOM", UTF_8_BOM},
256 {"UTF8-MAC", UTF8_MAC},
257 {"UTF-8-MAC", UTF8_MAC},
259 {"UTF-16BE", UTF_16BE},
260 {"UTF-16BE-BOM", UTF_16BE_BOM},
261 {"UTF-16LE", UTF_16LE},
262 {"UTF-16LE-BOM", UTF_16LE_BOM},
264 {"UTF-32BE", UTF_32BE},
265 {"UTF-32BE-BOM", UTF_32BE_BOM},
266 {"UTF-32LE", UTF_32LE},
267 {"UTF-32LE-BOM", UTF_32LE_BOM},
272 #if defined(DEFAULT_CODE_JIS)
273 #define DEFAULT_ENCIDX ISO_2022_JP
274 #elif defined(DEFAULT_CODE_SJIS)
275 #define DEFAULT_ENCIDX SHIFT_JIS
276 #elif defined(DEFAULT_CODE_WINDOWS_31J)
277 #define DEFAULT_ENCIDX WINDOWS_31J
278 #elif defined(DEFAULT_CODE_EUC)
279 #define DEFAULT_ENCIDX EUC_JP
280 #elif defined(DEFAULT_CODE_UTF8)
281 #define DEFAULT_ENCIDX UTF_8
285 #define is_alnum(c) \
286 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
288 /* I don't trust portablity of toupper */
289 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
290 #define nkf_isoctal(c) ('0'<=c && c<='7')
291 #define nkf_isdigit(c) ('0'<=c && c<='9')
292 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
293 #define nkf_isblank(c) (c == SP || c == TAB)
294 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
295 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
296 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
297 #define nkf_isprint(c) (SP<=c && c<='~')
298 #define nkf_isgraph(c) ('!'<=c && c<='~')
299 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
300 ('A'<=c&&c<='F') ? (c-'A'+10) : \
301 ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
302 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
303 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
304 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
305 ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
306 && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
308 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
309 #define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c < (0xE0&0x7F))
311 #define HOLD_SIZE 1024
312 #if defined(INT_IS_SHORT)
313 #define IOBUF_SIZE 2048
315 #define IOBUF_SIZE 16384
318 #define DEFAULT_J 'B'
319 #define DEFAULT_R 'B'
326 /* MIME preprocessor */
328 #ifdef EASYWIN /*Easy Win */
329 extern POINT _BufferSize;
338 void (*status_func)(struct input_code *, nkf_char);
339 nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
343 static const char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
344 static nkf_encoding *input_encoding = NULL;
345 static nkf_encoding *output_encoding = NULL;
347 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
349 * 0: Shift_JIS, eucJP-ascii
354 #define UCS_MAP_ASCII 0
356 #define UCS_MAP_CP932 2
357 #define UCS_MAP_CP10001 3
358 static int ms_ucs_map_f = UCS_MAP_ASCII;
360 #ifdef UTF8_INPUT_ENABLE
361 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
362 static int no_cp932ext_f = FALSE;
363 /* ignore ZERO WIDTH NO-BREAK SPACE */
364 static int no_best_fit_chars_f = FALSE;
365 static int input_endian = ENDIAN_BIG;
366 static nkf_char unicode_subchar = '?'; /* the regular substitution character */
367 static void (*encode_fallback)(nkf_char c) = NULL;
368 static void w_status(struct input_code *, nkf_char);
370 #ifdef UTF8_OUTPUT_ENABLE
371 static int output_bom_f = FALSE;
372 static int output_endian = ENDIAN_BIG;
375 static void std_putc(nkf_char c);
376 static nkf_char std_getc(FILE *f);
377 static nkf_char std_ungetc(nkf_char c,FILE *f);
379 static nkf_char broken_getc(FILE *f);
380 static nkf_char broken_ungetc(nkf_char c,FILE *f);
382 static nkf_char mime_getc(FILE *f);
384 static void mime_putc(nkf_char c);
388 #if !defined(PERL_XS) && !defined(WIN32DLL)
389 static unsigned char stdibuf[IOBUF_SIZE];
390 static unsigned char stdobuf[IOBUF_SIZE];
394 static int unbuf_f = FALSE;
395 static int estab_f = FALSE;
396 static int nop_f = FALSE;
397 static int binmode_f = TRUE; /* binary mode */
398 static int rot_f = FALSE; /* rot14/43 mode */
399 static int hira_f = FALSE; /* hira/kata henkan */
400 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
401 static int mime_f = MIME_DECODE_DEFAULT; /* convert MIME B base64 or Q */
402 static int mime_decode_f = FALSE; /* mime decode is explicitly on */
403 static int mimebuf_f = FALSE; /* MIME buffered input */
404 static int broken_f = FALSE; /* convert ESC-less broken JIS */
405 static int iso8859_f = FALSE; /* ISO8859 through */
406 static int mimeout_f = FALSE; /* base64 mode */
407 static int x0201_f = X0201_DEFAULT; /* convert JIS X 0201 */
408 static int iso2022jp_f = FALSE; /* replace non ISO-2022-JP with GETA */
410 #ifdef UNICODE_NORMALIZATION
411 static int nfc_f = FALSE;
412 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
413 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
417 static int cap_f = FALSE;
418 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
419 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
421 static int url_f = FALSE;
422 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
423 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
426 #define PREFIX_EUCG3 NKF_INT32_C(0x8F00)
427 #define CLASS_MASK NKF_INT32_C(0xFF000000)
428 #define CLASS_UNICODE NKF_INT32_C(0x01000000)
429 #define VALUE_MASK NKF_INT32_C(0x00FFFFFF)
430 #define UNICODE_BMP_MAX NKF_INT32_C(0x0000FFFF)
431 #define UNICODE_MAX NKF_INT32_C(0x0010FFFF)
432 #define nkf_char_euc3_new(c) ((c) | PREFIX_EUCG3)
433 #define nkf_char_unicode_new(c) ((c) | CLASS_UNICODE)
434 #define nkf_char_unicode_p(c) ((c & CLASS_MASK) == CLASS_UNICODE)
435 #define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= UNICODE_BMP_MAX)
436 #define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= UNICODE_MAX)
438 #ifdef NUMCHAR_OPTION
439 static int numchar_f = FALSE;
440 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
441 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
445 static int noout_f = FALSE;
446 static void no_putc(nkf_char c);
447 static int debug_f = FALSE;
448 static void debug(const char *str);
449 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
452 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
453 static void set_input_codename(const char *codename);
456 static int exec_f = 0;
459 #ifdef SHIFTJIS_CP932
460 /* invert IBM extended characters to others */
461 static int cp51932_f = FALSE;
463 /* invert NEC-selected IBM extended characters to IBM extended characters */
464 static int cp932inv_f = TRUE;
466 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
467 #endif /* SHIFTJIS_CP932 */
469 static int x0212_f = FALSE;
470 static int x0213_f = FALSE;
472 static unsigned char prefix_table[256];
474 static void e_status(struct input_code *, nkf_char);
475 static void s_status(struct input_code *, nkf_char);
477 struct input_code input_code_list[] = {
478 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
479 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
480 #ifdef UTF8_INPUT_ENABLE
481 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
486 static int mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
487 static int base64_count = 0;
489 /* X0208 -> ASCII converter */
492 static int f_line = 0; /* chars in line */
493 static int f_prev = 0;
494 static int fold_preserve_f = FALSE; /* preserve new lines */
495 static int fold_f = FALSE;
496 static int fold_len = 0;
499 static unsigned char kanji_intro = DEFAULT_J;
500 static unsigned char ascii_intro = DEFAULT_R;
504 #define FOLD_MARGIN 10
505 #define DEFAULT_FOLD 60
507 static int fold_margin = FOLD_MARGIN;
509 /* process default */
512 no_connection2(nkf_char c2, nkf_char c1, nkf_char c0)
514 fprintf(stderr,"nkf internal module connection failure.\n");
520 no_connection(nkf_char c2, nkf_char c1)
522 no_connection2(c2,c1,0);
525 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
526 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
528 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
529 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
530 static void (*o_eol_conv)(nkf_char c2,nkf_char c1) = no_connection;
531 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
532 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
533 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
534 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
536 /* static redirections */
538 static void (*o_putc)(nkf_char c) = std_putc;
540 static nkf_char (*i_getc)(FILE *f) = std_getc; /* general input */
541 static nkf_char (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
543 static nkf_char (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
544 static nkf_char (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
546 static void (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
548 static nkf_char (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
549 static nkf_char (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
551 /* for strict mime */
552 static nkf_char (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
553 static nkf_char (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
556 static int output_mode = ASCII; /* output kanji mode */
557 static int input_mode = ASCII; /* input kanji mode */
558 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
560 /* X0201 / X0208 conversion tables */
562 /* X0201 kana conversion table */
564 static const unsigned char cv[]= {
565 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
566 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
567 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
568 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
569 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
570 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
571 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
572 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
573 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
574 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
575 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
576 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
577 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
578 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
579 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
580 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
584 /* X0201 kana conversion table for daguten */
586 static const unsigned char dv[]= {
587 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
588 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
589 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
590 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
591 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
592 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
593 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
594 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
595 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
596 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
597 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
598 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
601 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
602 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
605 /* X0201 kana conversion table for han-daguten */
607 static const unsigned char ev[]= {
608 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
610 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
611 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
614 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
615 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
616 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
617 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
618 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
619 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
620 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
621 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
622 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
623 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
627 /* X0208 kigou conversion table */
628 /* 0x8140 - 0x819e */
629 static const unsigned char fv[] = {
631 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
632 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
633 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
634 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
635 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
636 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
637 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
638 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
639 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
640 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
641 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
642 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
647 static int option_mode = 0;
648 static int file_out_f = FALSE;
650 static int overwrite_f = FALSE;
651 static int preserve_time_f = FALSE;
652 static int backup_f = FALSE;
653 static char *backup_suffix = "";
656 static int eolmode_f = 0; /* CR, LF, CRLF */
657 static int input_eol = 0; /* 0: unestablished, EOF: MIXED */
658 static nkf_char prev_cr = 0; /* CR or 0 */
659 #ifdef EASYWIN /*Easy Win */
660 static int end_check;
663 #define STD_GC_BUFSIZE (256)
664 nkf_char std_gc_buf[STD_GC_BUFSIZE];
668 nkf_xmalloc(size_t size)
672 if (size == 0) size = 1;
676 perror("can't malloc");
684 nkf_xrealloc(void *ptr, size_t size)
686 if (size == 0) size = 1;
688 ptr = realloc(ptr, size);
690 perror("can't realloc");
697 #define nkf_xfree(ptr) free(ptr)
700 nkf_str_caseeql(const char *src, const char *target)
703 for (i = 0; src[i] && target[i]; i++) {
704 if (nkf_toupper(src[i]) != nkf_toupper(target[i])) return FALSE;
706 if (src[i] || target[i]) return FALSE;
711 nkf_enc_from_index(int idx)
713 if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
716 return &nkf_encoding_table[idx];
720 nkf_enc_find_index(const char *name)
723 if (name[0] == 'X' && *(name+1) == '-') name += 2;
724 for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
725 if (nkf_str_caseeql(encoding_name_to_id_table[i].name, name)) {
726 return encoding_name_to_id_table[i].id;
733 nkf_enc_find(const char *name)
736 idx = nkf_enc_find_index(name);
737 if (idx < 0) return 0;
738 return nkf_enc_from_index(idx);
741 #define nkf_enc_name(enc) (enc)->name
742 #define nkf_enc_to_index(enc) (enc)->id
743 #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
744 #define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
745 #define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
746 #define nkf_enc_asciicompat(enc) (\
747 nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
748 nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
749 #define nkf_enc_unicode_p(enc) (\
750 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
751 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
752 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
753 #define nkf_enc_cp5022x_p(enc) (\
754 nkf_enc_to_index(enc) == CP50220 ||\
755 nkf_enc_to_index(enc) == CP50221 ||\
756 nkf_enc_to_index(enc) == CP50222)
758 #ifdef DEFAULT_CODE_LOCALE
762 #ifdef HAVE_LANGINFO_H
763 return nl_langinfo(CODESET);
764 #elif defined(__WIN32__)
766 sprintf(buf, "CP%d", GetACP());
768 #elif defined(__OS2__)
769 # if defined(INT_IS_SHORT)
775 ULONG ulCP[1], ulncp;
776 DosQueryCp(sizeof(ulCP), ulCP, &ulncp);
777 if (ulCP[0] == 932 || ulCP[0] == 943)
778 strcpy(buf, "Shift_JIS");
780 sprintf(buf, "CP%lu", ulCP[0]);
788 nkf_locale_encoding()
790 nkf_encoding *enc = 0;
791 const char *encname = nkf_locale_charmap();
793 enc = nkf_enc_find(encname);
796 #endif /* DEFAULT_CODE_LOCALE */
801 return &nkf_encoding_table[UTF_8];
805 nkf_default_encoding()
807 nkf_encoding *enc = 0;
808 #ifdef DEFAULT_CODE_LOCALE
809 enc = nkf_locale_encoding();
810 #elif defined(DEFAULT_ENCIDX)
811 enc = nkf_enc_from_index(DEFAULT_ENCIDX);
813 if (!enc) enc = nkf_utf8_encoding();
824 nkf_buf_new(int length)
826 nkf_buf_t *buf = nkf_xmalloc(sizeof(nkf_buf_t));
827 buf->ptr = nkf_xmalloc(length);
834 nkf_buf_dispose(nkf_buf_t *buf)
840 #define nkf_buf_length(buf) ((buf)->len)
841 #define nkf_buf_empty_p(buf) ((buf)->len == 0)
844 nkf_buf_at(nkf_buf_t *buf, int index)
846 assert(index <= buf->len);
847 return buf->ptr[index];
851 nkf_buf_clear(nkf_buf_t *buf)
857 nkf_buf_push(nkf_buf_t *buf, unsigned char c)
859 assert(buf->capa > buf->len);
860 buf->ptr[buf->len++] = c;
864 nkf_buf_pop(nkf_buf_t *buf)
866 assert(!nkf_buf_empty_p(buf));
867 return buf->ptr[--buf->len];
870 /* Normalization Form C */
873 #define fprintf dllprintf
879 fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n");
886 "USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n"
888 "b,u Output is buffered (DEFAULT),Output is unbuffered\n"
889 "j,s,e,w Output code is ISO-2022-JP, Shift JIS, EUC-JP, UTF-8N\n"
890 #ifdef UTF8_OUTPUT_ENABLE
891 " After 'w' you can add more options. -w[ 8 [0], 16 [[BL] [0]] ]\n"
893 "J,S,E,W Input assumption is JIS 7 bit , Shift JIS, EUC-JP, UTF-8\n"
894 #ifdef UTF8_INPUT_ENABLE
895 " After 'W' you can add more options. -W[ 8, 16 [BL] ] \n"
900 "i[@B] Specify the Esc Seq for JIS X 0208-1978/83 (DEFAULT B)\n"
901 "o[BJH] Specify the Esc Seq for ASCII/Roman (DEFAULT B)\n"
902 "r {de/en}crypt ROT13/47\n"
903 "h 1 katakana->hiragana, 2 hiragana->katakana, 3 both\n"
904 "m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:non-strict,0:no decode]\n"
905 "M[BQ] MIME encode [B:base64 Q:quoted]\n"
906 "l ISO8859-1 (Latin-1) support\n"
907 "f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
910 "Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n"
911 " 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n"
912 " 4: JISX0208 Katakana to JISX0201 Katakana\n"
913 "X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n"
914 "B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n"
918 "T Text mode output\n"
920 "O Output to File (DEFAULT 'nkf.out')\n"
921 "I Convert non ISO-2022-JP charactor to GETA\n"
922 "d,c Convert line breaks -d: LF -c: CRLF\n"
923 "-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
924 "v, V Show this usage. V: show configuration\n"
927 "Long name options\n"
928 " --ic=<input codeset> --oc=<output codeset>\n"
929 " Specify the input or output codeset\n"
930 " --fj --unix --mac --windows\n"
931 " --jis --euc --sjis --utf8 --utf16 --mime --base64\n"
932 " Convert for the system or code\n"
933 " --hiragana --katakana --katakana-hiragana\n"
934 " To Hiragana/Katakana Conversion\n"
935 " --prefix= Insert escape before troublesome characters of Shift_JIS\n"
939 " --cap-input, --url-input Convert hex after ':' or '%%'\n"
941 #ifdef NUMCHAR_OPTION
942 " --numchar-input Convert Unicode Character Reference\n"
944 #ifdef UTF8_INPUT_ENABLE
945 " --fb-{skip, html, xml, perl, java, subchar}\n"
946 " Specify how nkf handles unassigned characters\n"
951 " --in-place[=SUFFIX] --overwrite[=SUFFIX]\n"
952 " Overwrite original listed files by filtered result\n"
953 " --overwrite preserves timestamp of original files\n"
955 " -g --guess Guess the input code\n"
956 " --help --version Show this help/the version\n"
957 " For more information, see also man nkf\n"
963 show_configuration(void)
966 "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n"
967 " Compile-time options:\n"
968 " Compiled at: " __DATE__ " " __TIME__ "\n"
971 " Default output encoding: "
972 #ifdef DEFAULT_CODE_LOCALE
973 "LOCALE (%s)\n", nkf_enc_name(nkf_default_encoding())
974 #elif defined(DEFAULT_ENCIDX)
975 "CONFIG (%s)\n", nkf_enc_name(nkf_default_encoding())
981 " Default output end of line: "
982 #if DEFAULT_NEWLINE == CR
984 #elif DEFAULT_NEWLINE == CRLF
990 " Decode MIME encoded string: "
991 #if MIME_DECODE_DEFAULT
997 " Convert JIS X 0201 Katakana: "
1004 " --help, --version output: "
1005 #if HELP_OUTPUT_HELP_OUTPUT
1016 get_backup_filename(const char *suffix, const char *filename)
1018 char *backup_filename;
1019 int asterisk_count = 0;
1021 int filename_length = strlen(filename);
1023 for(i = 0; suffix[i]; i++){
1024 if(suffix[i] == '*') asterisk_count++;
1028 backup_filename = nkf_xmalloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1029 for(i = 0, j = 0; suffix[i];){
1030 if(suffix[i] == '*'){
1031 backup_filename[j] = '\0';
1032 strncat(backup_filename, filename, filename_length);
1034 j += filename_length;
1036 backup_filename[j++] = suffix[i++];
1039 backup_filename[j] = '\0';
1041 j = filename_length + strlen(suffix);
1042 backup_filename = nkf_xmalloc(j + 1);
1043 strcpy(backup_filename, filename);
1044 strcat(backup_filename, suffix);
1045 backup_filename[j] = '\0';
1047 return backup_filename;
1051 #ifdef UTF8_INPUT_ENABLE
1053 nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
1060 (*f)(0, bin2hex(c>>shift));
1071 encode_fallback_html(nkf_char c)
1076 if(c >= NKF_INT32_C(1000000))
1077 (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
1078 if(c >= NKF_INT32_C(100000))
1079 (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
1081 (*oconv)(0, 0x30+(c/10000 )%10);
1083 (*oconv)(0, 0x30+(c/1000 )%10);
1085 (*oconv)(0, 0x30+(c/100 )%10);
1087 (*oconv)(0, 0x30+(c/10 )%10);
1089 (*oconv)(0, 0x30+ c %10);
1095 encode_fallback_xml(nkf_char c)
1100 nkf_each_char_to_hex(oconv, c);
1106 encode_fallback_java(nkf_char c)
1110 if(!nkf_char_unicode_bmp_p(c)){
1114 (*oconv)(0, bin2hex(c>>20));
1115 (*oconv)(0, bin2hex(c>>16));
1119 (*oconv)(0, bin2hex(c>>12));
1120 (*oconv)(0, bin2hex(c>> 8));
1121 (*oconv)(0, bin2hex(c>> 4));
1122 (*oconv)(0, bin2hex(c ));
1127 encode_fallback_perl(nkf_char c)
1132 nkf_each_char_to_hex(oconv, c);
1138 encode_fallback_subchar(nkf_char c)
1140 c = unicode_subchar;
1141 (*oconv)((c>>8)&0xFF, c&0xFF);
1146 static const struct {
1170 {"katakana-hiragana","h3"},
1178 #ifdef UTF8_OUTPUT_ENABLE
1188 {"fb-subchar=", ""},
1190 #ifdef UTF8_INPUT_ENABLE
1191 {"utf8-input", "W"},
1192 {"utf16-input", "W16"},
1193 {"no-cp932ext", ""},
1194 {"no-best-fit-chars",""},
1196 #ifdef UNICODE_NORMALIZATION
1197 {"utf8mac-input", ""},
1209 #ifdef NUMCHAR_OPTION
1210 {"numchar-input", ""},
1216 #ifdef SHIFTJIS_CP932
1227 set_input_encoding(nkf_encoding *enc)
1229 switch (nkf_enc_to_index(enc)) {
1236 #ifdef SHIFTJIS_CP932
1239 #ifdef UTF8_OUTPUT_ENABLE
1240 ms_ucs_map_f = UCS_MAP_CP932;
1250 case ISO_2022_JP_2004:
1257 #ifdef SHIFTJIS_CP932
1260 #ifdef UTF8_OUTPUT_ENABLE
1261 ms_ucs_map_f = UCS_MAP_CP932;
1266 #ifdef SHIFTJIS_CP932
1269 #ifdef UTF8_OUTPUT_ENABLE
1270 ms_ucs_map_f = UCS_MAP_CP10001;
1278 #ifdef SHIFTJIS_CP932
1281 #ifdef UTF8_OUTPUT_ENABLE
1282 ms_ucs_map_f = UCS_MAP_CP932;
1286 #ifdef SHIFTJIS_CP932
1289 #ifdef UTF8_OUTPUT_ENABLE
1290 ms_ucs_map_f = UCS_MAP_MS;
1294 #ifdef SHIFTJIS_CP932
1297 #ifdef UTF8_OUTPUT_ENABLE
1298 ms_ucs_map_f = UCS_MAP_ASCII;
1301 case SHIFT_JISX0213:
1302 case SHIFT_JIS_2004:
1304 #ifdef SHIFTJIS_CP932
1311 #ifdef SHIFTJIS_CP932
1315 #ifdef UTF8_INPUT_ENABLE
1316 #ifdef UNICODE_NORMALIZATION
1324 input_endian = ENDIAN_BIG;
1328 input_endian = ENDIAN_LITTLE;
1333 input_endian = ENDIAN_BIG;
1337 input_endian = ENDIAN_LITTLE;
1344 set_output_encoding(nkf_encoding *enc)
1346 switch (nkf_enc_to_index(enc)) {
1349 #ifdef SHIFTJIS_CP932
1350 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1352 #ifdef UTF8_OUTPUT_ENABLE
1353 ms_ucs_map_f = UCS_MAP_CP932;
1357 #ifdef SHIFTJIS_CP932
1358 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1360 #ifdef UTF8_OUTPUT_ENABLE
1361 ms_ucs_map_f = UCS_MAP_CP932;
1366 #ifdef SHIFTJIS_CP932
1367 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1373 #ifdef SHIFTJIS_CP932
1374 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1380 #ifdef UTF8_OUTPUT_ENABLE
1381 ms_ucs_map_f = UCS_MAP_CP932;
1385 #ifdef UTF8_OUTPUT_ENABLE
1386 ms_ucs_map_f = UCS_MAP_CP10001;
1391 #ifdef SHIFTJIS_CP932
1392 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1394 #ifdef UTF8_OUTPUT_ENABLE
1395 ms_ucs_map_f = UCS_MAP_ASCII;
1400 #ifdef SHIFTJIS_CP932
1401 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1403 #ifdef UTF8_OUTPUT_ENABLE
1404 ms_ucs_map_f = UCS_MAP_ASCII;
1408 #ifdef SHIFTJIS_CP932
1409 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1411 #ifdef UTF8_OUTPUT_ENABLE
1412 ms_ucs_map_f = UCS_MAP_CP932;
1417 #ifdef UTF8_OUTPUT_ENABLE
1418 ms_ucs_map_f = UCS_MAP_MS;
1423 #ifdef UTF8_OUTPUT_ENABLE
1424 ms_ucs_map_f = UCS_MAP_ASCII;
1427 case SHIFT_JISX0213:
1428 case SHIFT_JIS_2004:
1430 #ifdef SHIFTJIS_CP932
1431 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1438 #ifdef SHIFTJIS_CP932
1439 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1442 #ifdef UTF8_OUTPUT_ENABLE
1444 output_bom_f = TRUE;
1448 output_bom_f = TRUE;
1451 output_endian = ENDIAN_LITTLE;
1452 output_bom_f = FALSE;
1455 output_endian = ENDIAN_LITTLE;
1456 output_bom_f = TRUE;
1459 output_bom_f = TRUE;
1462 output_endian = ENDIAN_LITTLE;
1463 output_bom_f = FALSE;
1466 output_endian = ENDIAN_LITTLE;
1467 output_bom_f = TRUE;
1473 static struct input_code*
1474 find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1477 struct input_code *p = input_code_list;
1479 if (iconv_func == p->iconv_func){
1489 set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1491 #ifdef INPUT_CODE_FIX
1492 if (f || !input_encoding)
1499 #ifdef INPUT_CODE_FIX
1500 && (f == -TRUE || !input_encoding) /* -TRUE means "FORCE" */
1506 if (estab_f && iconv_for_check != iconv){
1507 struct input_code *p = find_inputcode_byfunc(iconv);
1509 set_input_codename(p->name);
1512 iconv_for_check = iconv;
1519 x0212_shift(nkf_char c)
1524 if (0x75 <= c && c <= 0x7f){
1525 ret = c + (0x109 - 0x75);
1528 if (0x75 <= c && c <= 0x7f){
1529 ret = c + (0x113 - 0x75);
1537 x0212_unshift(nkf_char c)
1540 if (0x7f <= c && c <= 0x88){
1541 ret = c + (0x75 - 0x7f);
1542 }else if (0x89 <= c && c <= 0x92){
1543 ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
1547 #endif /* X0212_ENABLE */
1550 e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
1556 if((0x21 <= ndx && ndx <= 0x2F)){
1557 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
1558 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1560 }else if(0x6E <= ndx && ndx <= 0x7E){
1561 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
1562 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1568 else if(nkf_isgraph(ndx)){
1570 const unsigned short *ptr;
1571 ptr = x0212_shiftjis[ndx - 0x21];
1573 val = ptr[(c1 & 0x7f) - 0x21];
1582 c2 = x0212_shift(c2);
1584 #endif /* X0212_ENABLE */
1586 if(0x7F < c2) return 1;
1587 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
1588 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1593 s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
1595 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
1598 static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
1599 if (0xFC < c1) return 1;
1600 #ifdef SHIFTJIS_CP932
1601 if (!cp932inv_f && is_ibmext_in_sjis(c2)){
1602 val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
1609 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
1610 val = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
1616 #endif /* SHIFTJIS_CP932 */
1618 if (!x0213_f && is_ibmext_in_sjis(c2)){
1619 val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
1622 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
1635 if(x0213_f && c2 >= 0xF0){
1636 if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
1637 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
1638 }else{ /* 78<=k<=94 */
1639 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
1640 if (0x9E < c1) c2++;
1643 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
1644 #define SJ6394 0x0161 /* 63 - 94 ku offset */
1645 c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
1646 if (0x9E < c1) c2++;
1649 c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
1656 c2 = x0212_unshift(c2);
1663 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
1665 nkf_unicode_to_utf8(nkf_char val, nkf_char *p1, nkf_char *p2, nkf_char *p3, nkf_char *p4)
1673 }else if (val < 0x800){
1674 *p1 = 0xc0 | (val >> 6);
1675 *p2 = 0x80 | (val & 0x3f);
1678 } else if (nkf_char_unicode_bmp_p(val)) {
1679 *p1 = 0xe0 | (val >> 12);
1680 *p2 = 0x80 | ((val >> 6) & 0x3f);
1681 *p3 = 0x80 | ( val & 0x3f);
1683 } else if (nkf_char_unicode_value_p(val)) {
1684 *p1 = 0xe0 | (val >> 16);
1685 *p2 = 0x80 | ((val >> 12) & 0x3f);
1686 *p3 = 0x80 | ((val >> 6) & 0x3f);
1687 *p4 = 0x80 | ( val & 0x3f);
1697 nkf_utf8_to_unicode(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
1704 else if (c1 <= 0xC3) {
1705 /* trail byte or invalid */
1708 else if (c1 <= 0xDF) {
1710 wc = (c1 & 0x1F) << 6;
1713 else if (c1 <= 0xEF) {
1715 wc = (c1 & 0x0F) << 12;
1716 wc |= (c2 & 0x3F) << 6;
1719 else if (c2 <= 0xF4) {
1721 wc = (c1 & 0x0F) << 18;
1722 wc |= (c2 & 0x3F) << 12;
1723 wc |= (c3 & 0x3F) << 6;
1733 #ifdef UTF8_INPUT_ENABLE
1735 unicode_to_jis_common2(nkf_char c1, nkf_char c0,
1736 const unsigned short *const *pp, nkf_char psize,
1737 nkf_char *p2, nkf_char *p1)
1740 const unsigned short *p;
1743 if (pp == 0) return 1;
1746 if (c1 < 0 || psize <= c1) return 1;
1748 if (p == 0) return 1;
1751 if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
1753 if (val == 0) return 1;
1754 if (no_cp932ext_f && (
1755 (val>>8) == 0x2D || /* NEC special characters */
1756 val > NKF_INT32_C(0xF300) /* IBM extended characters */
1764 if (c2 == SO) c2 = JIS_X_0201_1976_K;
1772 unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
1774 const unsigned short *const *pp;
1775 const unsigned short *const *const *ppp;
1776 static const char no_best_fit_chars_table_C2[] =
1777 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1778 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1779 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
1780 0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
1781 static const char no_best_fit_chars_table_C2_ms[] =
1782 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1783 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1784 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
1785 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
1786 static const char no_best_fit_chars_table_932_C2[] =
1787 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1788 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1789 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1790 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
1791 static const char no_best_fit_chars_table_932_C3[] =
1792 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1793 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1794 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1795 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
1801 }else if(c2 < 0xe0){
1802 if(no_best_fit_chars_f){
1803 if(ms_ucs_map_f == UCS_MAP_CP932){
1806 if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
1809 if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1812 }else if(!cp932inv_f){
1815 if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
1818 if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1821 }else if(ms_ucs_map_f == UCS_MAP_MS){
1822 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
1823 }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1841 ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
1842 ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
1843 ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
1845 ret = unicode_to_jis_common2(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
1846 }else if(c0 < 0xF0){
1847 if(no_best_fit_chars_f){
1848 if(ms_ucs_map_f == UCS_MAP_CP932){
1849 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
1850 }else if(ms_ucs_map_f == UCS_MAP_MS){
1855 if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
1858 if(c0 == 0x92) return 1;
1863 if(c1 == 0x80 || c0 == 0x9C) return 1;
1866 }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1871 if(c0 == 0x94) return 1;
1874 if(c0 == 0xBB) return 1;
1884 if(c0 == 0x95) return 1;
1887 if(c0 == 0xA5) return 1;
1894 if(c0 == 0x8D) return 1;
1897 if(c0 == 0x9E && !cp932inv_f) return 1;
1900 if(0xA0 <= c0 && c0 <= 0xA5) return 1;
1908 ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
1909 ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
1910 ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
1912 ret = unicode_to_jis_common2(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
1914 #ifdef SHIFTJIS_CP932
1915 if (!ret && !cp932inv_f && is_eucg3(*p2)) {
1917 if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
1918 s2e_conv(s2, s1, p2, p1);
1927 #ifdef UTF8_OUTPUT_ENABLE
1929 e2w_conv(nkf_char c2, nkf_char c1)
1931 const unsigned short *p;
1933 if (c2 == JIS_X_0201_1976_K) {
1934 if (ms_ucs_map_f == UCS_MAP_CP10001) {
1942 p = euc_to_utf8_1byte;
1944 } else if (is_eucg3(c2)){
1945 if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
1948 c2 = (c2&0x7f) - 0x21;
1949 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
1950 p = x0212_to_utf8_2bytes[c2];
1956 c2 = (c2&0x7f) - 0x21;
1957 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
1959 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
1960 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
1961 euc_to_utf8_2bytes_ms[c2];
1966 c1 = (c1 & 0x7f) - 0x21;
1967 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
1974 w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
1981 }else if (0xc0 <= c2 && c2 <= 0xef) {
1982 ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
1983 #ifdef NUMCHAR_OPTION
1986 if (p1) *p1 = nkf_char_unicode_new(nkf_utf8_to_unicode(c2, c1, c0, 0));
1994 #ifdef UTF8_INPUT_ENABLE
1996 w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
1998 nkf_char c1, c2, c3, c4;
2005 else if (nkf_char_unicode_bmp_p(val)){
2006 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2007 ret = unicode_to_jis_common(c1, c2, c3, p2, p1);
2010 *p1 = nkf_char_unicode_new(val);
2016 *p1 = nkf_char_unicode_new(val);
2023 e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
2025 if (c2 == JIS_X_0201_1976_K || c2 == SS2){
2026 if (iso2022jp_f && !x0201_f) {
2027 c2 = GETA1; c1 = GETA2;
2029 c2 = JIS_X_0201_1976_K;
2033 }else if (c2 == 0x8f){
2037 if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
2038 /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2039 c1 = nkf_char_unicode_new((c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC);
2042 c2 = (c2 << 8) | (c1 & 0x7f);
2044 #ifdef SHIFTJIS_CP932
2047 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2048 s2e_conv(s2, s1, &c2, &c1);
2055 #endif /* SHIFTJIS_CP932 */
2057 #endif /* X0212_ENABLE */
2058 } else if ((c2 == EOF) || (c2 == 0) || c2 < SP || c2 == ISO_8859_1) {
2061 if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
2062 /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2063 c1 = nkf_char_unicode_new((c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000);
2068 #ifdef SHIFTJIS_CP932
2069 if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
2071 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2072 s2e_conv(s2, s1, &c2, &c1);
2079 #endif /* SHIFTJIS_CP932 */
2087 s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
2089 if (c2 == JIS_X_0201_1976_K || (0xA1 <= c2 && c2 <= 0xDF)) {
2090 if (iso2022jp_f && !x0201_f) {
2091 c2 = GETA1; c1 = GETA2;
2095 } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
2097 } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
2099 if(c1 == 0x7F) return 0;
2100 c1 = nkf_char_unicode_new((c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000);
2103 nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
2104 if (ret) return ret;
2111 w_iconv(nkf_char c1, nkf_char c2, nkf_char c3)
2113 nkf_char ret = 0, c4 = 0;
2114 static const char w_iconv_utf8_1st_byte[] =
2116 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2117 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2118 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
2119 40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
2126 if (c1 < 0 || 0xff < c1) {
2127 }else if (c1 == 0) { /* 0 : 1 byte*/
2129 } else if ((c1 & 0xC0) == 0x80) { /* 0x80-0xbf : trail byte */
2132 switch (w_iconv_utf8_1st_byte[c1 - 0xC0]) {
2134 if (c2 < 0x80 || 0xBF < c2) return 0;
2137 if (c3 == 0) return -1;
2138 if (c2 < 0xA0 || 0xBF < c2 || (c3 & 0xC0) != 0x80)
2143 if (c3 == 0) return -1;
2144 if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80)
2148 if (c3 == 0) return -1;
2149 if (c2 < 0x80 || 0x9F < c2 || (c3 & 0xC0) != 0x80)
2153 if (c3 == 0) return -2;
2154 if (c2 < 0x90 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2158 if (c3 == 0) return -2;
2159 if (c2 < 0x80 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2163 if (c3 == 0) return -2;
2164 if (c2 < 0x80 || 0x8F < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2172 if (c1 == 0 || c1 == EOF){
2173 } else if ((c1 & 0xf8) == 0xf0) { /* 4 bytes */
2174 c2 = nkf_char_unicode_new(nkf_utf8_to_unicode(c1, c2, c3, c4));
2177 ret = w2e_conv(c1, c2, c3, &c1, &c2);
2185 #define NKF_ICONV_INVALID_CODE_RANGE -13
2187 unicode_iconv(nkf_char wc)
2195 }else if ((wc>>11) == 27) {
2196 /* unpaired surrogate */
2197 return NKF_ICONV_INVALID_CODE_RANGE;
2198 }else if (wc < 0xFFFF) {
2199 ret = w16e_conv(wc, &c2, &c1);
2200 if (ret) return ret;
2201 }else if (wc < 0x10FFFF) {
2203 c1 = nkf_char_unicode_new(wc);
2205 return NKF_ICONV_INVALID_CODE_RANGE;
2211 #define NKF_ICONV_NEED_ONE_MORE_BYTE -1
2212 #define NKF_ICONV_NEED_TWO_MORE_BYTES -2
2213 #define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00))
2215 nkf_iconv_utf_16(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2224 if (input_endian == ENDIAN_BIG) {
2225 if (0xD8 <= c1 && c1 <= 0xDB) {
2226 if (0xDC <= c3 && c3 <= 0xDF) {
2227 wc = UTF16_TO_UTF32(c1 << 8 | c2, c3 << 8 | c4);
2228 } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2233 if (0xD8 <= c2 && c2 <= 0xDB) {
2234 if (0xDC <= c4 && c4 <= 0xDF) {
2235 wc = UTF16_TO_UTF32(c2 << 8 | c1, c4 << 8 | c3);
2236 } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2242 return (*unicode_iconv)(wc);
2246 w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
2252 w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
2258 nkf_iconv_utf_32(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2267 switch(input_endian){
2269 wc = c2 << 16 | c3 << 8 | c4;
2272 wc = c3 << 16 | c2 << 8 | c1;
2275 wc = c1 << 16 | c4 << 8 | c3;
2278 wc = c4 << 16 | c1 << 8 | c2;
2281 return NKF_ICONV_INVALID_CODE_RANGE;
2284 return (*unicode_iconv)(wc);
2288 #define output_ascii_escape_sequence(mode) do { \
2289 if (output_mode != ASCII && output_mode != ISO_8859_1) { \
2292 (*o_putc)(ascii_intro); \
2293 output_mode = mode; \
2298 output_escape_sequence(int mode)
2300 if (output_mode == mode)
2308 case JIS_X_0201_1976_K:
2316 (*o_putc)(kanji_intro);
2341 j_oconv(nkf_char c2, nkf_char c1)
2343 #ifdef NUMCHAR_OPTION
2344 if (c2 == 0 && nkf_char_unicode_p(c1)){
2345 w16e_conv(c1, &c2, &c1);
2346 if (c2 == 0 && nkf_char_unicode_p(c1)){
2347 c2 = c1 & VALUE_MASK;
2348 if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
2351 c2 = 0x7F + c1 / 94;
2352 c1 = 0x21 + c1 % 94;
2354 if (encode_fallback) (*encode_fallback)(c1);
2361 output_ascii_escape_sequence(ASCII);
2364 else if (c2 == EOF) {
2365 output_ascii_escape_sequence(ASCII);
2368 else if (c2 == ISO_8859_1) {
2369 output_ascii_escape_sequence(ISO_8859_1);
2372 else if (c2 == JIS_X_0201_1976_K) {
2373 output_escape_sequence(JIS_X_0201_1976_K);
2376 } else if (is_eucg3(c2)){
2377 output_escape_sequence(x0213_f ? JIS_X_0213_2 : JIS_X_0212);
2378 (*o_putc)(c2 & 0x7f);
2383 ? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1
2384 : c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1) return;
2385 output_escape_sequence(x0213_f ? JIS_X_0213_1 : JIS_X_0208);
2392 e_oconv(nkf_char c2, nkf_char c1)
2394 if (c2 == 0 && nkf_char_unicode_p(c1)){
2395 w16e_conv(c1, &c2, &c1);
2396 if (c2 == 0 && nkf_char_unicode_p(c1)){
2397 c2 = c1 & VALUE_MASK;
2398 if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
2402 c2 += c2 < 10 ? 0x75 : 0x8FEB;
2403 c1 = 0x21 + c1 % 94;
2406 (*o_putc)((c2 & 0x7f) | 0x080);
2407 (*o_putc)(c1 | 0x080);
2409 (*o_putc)((c2 & 0x7f) | 0x080);
2410 (*o_putc)(c1 | 0x080);
2414 if (encode_fallback) (*encode_fallback)(c1);
2422 } else if (c2 == 0) {
2423 output_mode = ASCII;
2425 } else if (c2 == JIS_X_0201_1976_K) {
2426 output_mode = EUC_JP;
2427 (*o_putc)(SS2); (*o_putc)(c1|0x80);
2428 } else if (c2 == ISO_8859_1) {
2429 output_mode = ISO_8859_1;
2430 (*o_putc)(c1 | 0x080);
2432 } else if (is_eucg3(c2)){
2433 output_mode = EUC_JP;
2434 #ifdef SHIFTJIS_CP932
2437 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2438 s2e_conv(s2, s1, &c2, &c1);
2443 output_mode = ASCII;
2445 }else if (is_eucg3(c2)){
2448 (*o_putc)((c2 & 0x7f) | 0x080);
2449 (*o_putc)(c1 | 0x080);
2452 (*o_putc)((c2 & 0x7f) | 0x080);
2453 (*o_putc)(c1 | 0x080);
2457 if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
2458 set_iconv(FALSE, 0);
2459 return; /* too late to rescue this char */
2461 output_mode = EUC_JP;
2462 (*o_putc)(c2 | 0x080);
2463 (*o_putc)(c1 | 0x080);
2468 s_oconv(nkf_char c2, nkf_char c1)
2470 #ifdef NUMCHAR_OPTION
2471 if (c2 == 0 && nkf_char_unicode_p(c1)){
2472 w16e_conv(c1, &c2, &c1);
2473 if (c2 == 0 && nkf_char_unicode_p(c1)){
2474 c2 = c1 & VALUE_MASK;
2475 if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
2478 c2 = c1 / 188 + (cp932inv_f ? 0xF0 : 0xEB);
2480 c1 += 0x40 + (c1 > 0x3e);
2485 if(encode_fallback)(*encode_fallback)(c1);
2494 } else if (c2 == 0) {
2495 output_mode = ASCII;
2497 } else if (c2 == JIS_X_0201_1976_K) {
2498 output_mode = SHIFT_JIS;
2500 } else if (c2 == ISO_8859_1) {
2501 output_mode = ISO_8859_1;
2502 (*o_putc)(c1 | 0x080);
2504 } else if (is_eucg3(c2)){
2505 output_mode = SHIFT_JIS;
2506 if (e2s_conv(c2, c1, &c2, &c1) == 0){
2512 if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
2513 set_iconv(FALSE, 0);
2514 return; /* too late to rescue this char */
2516 output_mode = SHIFT_JIS;
2517 e2s_conv(c2, c1, &c2, &c1);
2519 #ifdef SHIFTJIS_CP932
2521 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2522 nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
2528 #endif /* SHIFTJIS_CP932 */
2531 if (prefix_table[(unsigned char)c1]){
2532 (*o_putc)(prefix_table[(unsigned char)c1]);
2538 #ifdef UTF8_OUTPUT_ENABLE
2540 w_oconv(nkf_char c2, nkf_char c1)
2546 output_bom_f = FALSE;
2557 if (c2 == 0 && nkf_char_unicode_p(c1)){
2558 val = c1 & VALUE_MASK;
2559 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2561 if (c2) (*o_putc)(c2);
2562 if (c3) (*o_putc)(c3);
2563 if (c4) (*o_putc)(c4);
2570 val = e2w_conv(c2, c1);
2572 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2574 if (c2) (*o_putc)(c2);
2575 if (c3) (*o_putc)(c3);
2576 if (c4) (*o_putc)(c4);
2582 w_oconv16(nkf_char c2, nkf_char c1)
2585 output_bom_f = FALSE;
2586 if (output_endian == ENDIAN_LITTLE){
2600 if (c2 == 0 && nkf_char_unicode_p(c1)) {
2601 if (nkf_char_unicode_bmp_p(c1)) {
2602 c2 = (c1 >> 8) & 0xff;
2606 if (c1 <= UNICODE_MAX) {
2607 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0); /* high surrogate */
2608 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
2609 if (output_endian == ENDIAN_LITTLE){
2610 (*o_putc)(c2 & 0xff);
2611 (*o_putc)((c2 >> 8) & 0xff);
2612 (*o_putc)(c1 & 0xff);
2613 (*o_putc)((c1 >> 8) & 0xff);
2615 (*o_putc)((c2 >> 8) & 0xff);
2616 (*o_putc)(c2 & 0xff);
2617 (*o_putc)((c1 >> 8) & 0xff);
2618 (*o_putc)(c1 & 0xff);
2624 nkf_char val = e2w_conv(c2, c1);
2625 c2 = (val >> 8) & 0xff;
2630 if (output_endian == ENDIAN_LITTLE){
2640 w_oconv32(nkf_char c2, nkf_char c1)
2643 output_bom_f = FALSE;
2644 if (output_endian == ENDIAN_LITTLE){
2662 if (c2 == ISO_8859_1) {
2664 } else if (c2 == 0 && nkf_char_unicode_p(c1)) {
2667 c1 = e2w_conv(c2, c1);
2670 if (output_endian == ENDIAN_LITTLE){
2671 (*o_putc)( c1 & 0xFF);
2672 (*o_putc)((c1 >> 8) & 0xFF);
2673 (*o_putc)((c1 >> 16) & 0xFF);
2677 (*o_putc)((c1 >> 16) & 0xFF);
2678 (*o_putc)((c1 >> 8) & 0xFF);
2679 (*o_putc)( c1 & 0xFF);
2684 #define SCORE_L2 (1) /*
\e$BBh
\e(B2
\e$B?e=`4A;z
\e(B */
2685 #define SCORE_KANA (SCORE_L2 << 1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
2686 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
2687 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932
\e$B$K$h$kFI$_49$(
\e(B (IBM extended characters) */
2688 #define SCORE_X0212 (SCORE_CP932 << 1) /* JIS X 0212 */
2689 #define SCORE_NO_EXIST (SCORE_X0212 << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
2690 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME
\e$B$K$h$k;XDj
\e(B */
2691 #define SCORE_ERROR (SCORE_iMIME << 1) /*
\e$B%(%i!<
\e(B */
2693 #define SCORE_INIT (SCORE_iMIME)
2695 static const nkf_char score_table_A0[] = {
2698 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
2699 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
2702 static const nkf_char score_table_F0[] = {
2703 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
2704 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
2705 SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
2706 SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
2710 set_code_score(struct input_code *ptr, nkf_char score)
2713 ptr->score |= score;
2718 clr_code_score(struct input_code *ptr, nkf_char score)
2721 ptr->score &= ~score;
2726 code_score(struct input_code *ptr)
2728 nkf_char c2 = ptr->buf[0];
2729 #ifdef UTF8_OUTPUT_ENABLE
2730 nkf_char c1 = ptr->buf[1];
2733 set_code_score(ptr, SCORE_ERROR);
2734 }else if (c2 == SS2){
2735 set_code_score(ptr, SCORE_KANA);
2736 }else if (c2 == 0x8f){
2737 set_code_score(ptr, SCORE_X0212);
2738 #ifdef UTF8_OUTPUT_ENABLE
2739 }else if (!e2w_conv(c2, c1)){
2740 set_code_score(ptr, SCORE_NO_EXIST);
2742 }else if ((c2 & 0x70) == 0x20){
2743 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2744 }else if ((c2 & 0x70) == 0x70){
2745 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2746 }else if ((c2 & 0x70) >= 0x50){
2747 set_code_score(ptr, SCORE_L2);
2752 status_disable(struct input_code *ptr)
2757 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2761 status_push_ch(struct input_code *ptr, nkf_char c)
2763 ptr->buf[ptr->index++] = c;
2767 status_clear(struct input_code *ptr)
2774 status_reset(struct input_code *ptr)
2777 ptr->score = SCORE_INIT;
2781 status_reinit(struct input_code *ptr)
2784 ptr->_file_stat = 0;
2788 status_check(struct input_code *ptr, nkf_char c)
2790 if (c <= DEL && estab_f){
2796 s_status(struct input_code *ptr, nkf_char c)
2800 status_check(ptr, c);
2805 }else if (nkf_char_unicode_p(c)){
2807 }else if (0xa1 <= c && c <= 0xdf){
2808 status_push_ch(ptr, SS2);
2809 status_push_ch(ptr, c);
2812 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
2814 status_push_ch(ptr, c);
2815 }else if (0xed <= c && c <= 0xee){
2817 status_push_ch(ptr, c);
2818 #ifdef SHIFTJIS_CP932
2819 }else if (is_ibmext_in_sjis(c)){
2821 status_push_ch(ptr, c);
2822 #endif /* SHIFTJIS_CP932 */
2824 }else if (0xf0 <= c && c <= 0xfc){
2826 status_push_ch(ptr, c);
2827 #endif /* X0212_ENABLE */
2829 status_disable(ptr);
2833 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2834 status_push_ch(ptr, c);
2835 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2839 status_disable(ptr);
2843 #ifdef SHIFTJIS_CP932
2844 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
2845 status_push_ch(ptr, c);
2846 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
2847 set_code_score(ptr, SCORE_CP932);
2852 #endif /* SHIFTJIS_CP932 */
2853 status_disable(ptr);
2856 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2857 status_push_ch(ptr, c);
2858 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2859 set_code_score(ptr, SCORE_CP932);
2862 status_disable(ptr);
2869 e_status(struct input_code *ptr, nkf_char c)
2873 status_check(ptr, c);
2878 }else if (nkf_char_unicode_p(c)){
2880 }else if (SS2 == c || (0xa1 <= c && c <= 0xfe)){
2882 status_push_ch(ptr, c);
2884 }else if (0x8f == c){
2886 status_push_ch(ptr, c);
2887 #endif /* X0212_ENABLE */
2889 status_disable(ptr);
2893 if (0xa1 <= c && c <= 0xfe){
2894 status_push_ch(ptr, c);
2898 status_disable(ptr);
2903 if (0xa1 <= c && c <= 0xfe){
2905 status_push_ch(ptr, c);
2907 status_disable(ptr);
2909 #endif /* X0212_ENABLE */
2913 #ifdef UTF8_INPUT_ENABLE
2915 w_status(struct input_code *ptr, nkf_char c)
2919 status_check(ptr, c);
2924 }else if (nkf_char_unicode_p(c)){
2926 }else if (0xc0 <= c && c <= 0xdf){
2928 status_push_ch(ptr, c);
2929 }else if (0xe0 <= c && c <= 0xef){
2931 status_push_ch(ptr, c);
2932 }else if (0xf0 <= c && c <= 0xf4){
2934 status_push_ch(ptr, c);
2936 status_disable(ptr);
2941 if (0x80 <= c && c <= 0xbf){
2942 status_push_ch(ptr, c);
2943 if (ptr->index > ptr->stat){
2944 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2945 && ptr->buf[2] == 0xbf);
2946 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2947 &ptr->buf[0], &ptr->buf[1]);
2954 status_disable(ptr);
2958 if (0x80 <= c && c <= 0xbf){
2959 if (ptr->index < ptr->stat){
2960 status_push_ch(ptr, c);
2965 status_disable(ptr);
2973 code_status(nkf_char c)
2975 int action_flag = 1;
2976 struct input_code *result = 0;
2977 struct input_code *p = input_code_list;
2979 if (!p->status_func) {
2983 if (!p->status_func)
2985 (p->status_func)(p, c);
2988 }else if(p->stat == 0){
2999 if (result && !estab_f){
3000 set_iconv(TRUE, result->iconv_func);
3001 }else if (c <= DEL){
3002 struct input_code *ptr = input_code_list;
3016 return std_gc_buf[--std_gc_ndx];
3023 std_ungetc(nkf_char c, FILE *f)
3025 if (std_gc_ndx == STD_GC_BUFSIZE){
3028 std_gc_buf[std_gc_ndx++] = c;
3034 std_putc(nkf_char c)
3041 static unsigned char hold_buf[HOLD_SIZE*2];
3042 static int hold_count = 0;
3044 push_hold_buf(nkf_char c2)
3046 if (hold_count >= HOLD_SIZE*2)
3048 hold_buf[hold_count++] = (unsigned char)c2;
3049 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3053 h_conv(FILE *f, int c1, int c2)
3059 /** it must NOT be in the kanji shifte sequence */
3060 /** it must NOT be written in JIS7 */
3061 /** and it must be after 2 byte 8bit code */
3067 while ((c2 = (*i_getc)(f)) != EOF) {
3073 if (push_hold_buf(c2) == EOF || estab_f) {
3079 struct input_code *p = input_code_list;
3080 struct input_code *result = p;
3085 if (p->status_func && p->score < result->score) {
3090 set_iconv(TRUE, result->iconv_func);
3095 ** 1) EOF is detected, or
3096 ** 2) Code is established, or
3097 ** 3) Buffer is FULL (but last word is pushed)
3099 ** in 1) and 3) cases, we continue to use
3100 ** Kanji codes by oconv and leave estab_f unchanged.
3105 while (hold_index < hold_count){
3106 c1 = hold_buf[hold_index++];
3110 }else if (iconv == s_iconv && 0xa1 <= c1 && c1 <= 0xdf){
3111 (*iconv)(JIS_X_0201_1976_K, c1, 0);
3114 if (hold_index < hold_count){
3115 c2 = hold_buf[hold_index++];
3125 switch ((*iconv)(c1, c2, 0)) { /* can be EUC/SJIS/UTF-8 */
3128 if (hold_index < hold_count){
3129 c3 = hold_buf[hold_index++];
3130 } else if ((c3 = (*i_getc)(f)) == EOF) {
3135 if (hold_index < hold_count){
3136 c4 = hold_buf[hold_index++];
3137 } else if ((c4 = (*i_getc)(f)) == EOF) {
3142 (*iconv)(c1, c2, (c3<<8)|c4);
3147 /* 3 bytes EUC or UTF-8 */
3148 if (hold_index < hold_count){
3149 c3 = hold_buf[hold_index++];
3150 } else if ((c3 = (*i_getc)(f)) == EOF) {
3156 (*iconv)(c1, c2, c3);
3159 if (c3 == EOF) break;
3165 * Check and Ignore BOM
3171 switch(c2 = (*i_getc)(f)){
3173 if((c2 = (*i_getc)(f)) == 0x00){
3174 if((c2 = (*i_getc)(f)) == 0xFE){
3175 if((c2 = (*i_getc)(f)) == 0xFF){
3176 if(!input_encoding){
3177 set_iconv(TRUE, w_iconv32);
3179 if (iconv == w_iconv32) {
3180 input_endian = ENDIAN_BIG;
3183 (*i_ungetc)(0xFF,f);
3184 }else (*i_ungetc)(c2,f);
3185 (*i_ungetc)(0xFE,f);
3186 }else if(c2 == 0xFF){
3187 if((c2 = (*i_getc)(f)) == 0xFE){
3188 if(!input_encoding){
3189 set_iconv(TRUE, w_iconv32);
3191 if (iconv == w_iconv32) {
3192 input_endian = ENDIAN_2143;
3195 (*i_ungetc)(0xFF,f);
3196 }else (*i_ungetc)(c2,f);
3197 (*i_ungetc)(0xFF,f);
3198 }else (*i_ungetc)(c2,f);
3199 (*i_ungetc)(0x00,f);
3200 }else (*i_ungetc)(c2,f);
3201 (*i_ungetc)(0x00,f);
3204 if((c2 = (*i_getc)(f)) == 0xBB){
3205 if((c2 = (*i_getc)(f)) == 0xBF){
3206 if(!input_encoding){
3207 set_iconv(TRUE, w_iconv);
3209 if (iconv == w_iconv) {
3212 (*i_ungetc)(0xBF,f);
3213 }else (*i_ungetc)(c2,f);
3214 (*i_ungetc)(0xBB,f);
3215 }else (*i_ungetc)(c2,f);
3216 (*i_ungetc)(0xEF,f);
3219 if((c2 = (*i_getc)(f)) == 0xFF){
3220 if((c2 = (*i_getc)(f)) == 0x00){
3221 if((c2 = (*i_getc)(f)) == 0x00){
3222 if(!input_encoding){
3223 set_iconv(TRUE, w_iconv32);
3225 if (iconv == w_iconv32) {
3226 input_endian = ENDIAN_3412;
3229 (*i_ungetc)(0x00,f);
3230 }else (*i_ungetc)(c2,f);
3231 (*i_ungetc)(0x00,f);
3232 }else (*i_ungetc)(c2,f);
3233 if(!input_encoding){
3234 set_iconv(TRUE, w_iconv16);
3236 if (iconv == w_iconv16) {
3237 input_endian = ENDIAN_BIG;
3240 (*i_ungetc)(0xFF,f);
3241 }else (*i_ungetc)(c2,f);
3242 (*i_ungetc)(0xFE,f);
3245 if((c2 = (*i_getc)(f)) == 0xFE){
3246 if((c2 = (*i_getc)(f)) == 0x00){
3247 if((c2 = (*i_getc)(f)) == 0x00){
3248 if(!input_encoding){
3249 set_iconv(TRUE, w_iconv32);
3251 if (iconv == w_iconv32) {
3252 input_endian = ENDIAN_LITTLE;
3255 (*i_ungetc)(0x00,f);
3256 }else (*i_ungetc)(c2,f);
3257 (*i_ungetc)(0x00,f);
3258 }else (*i_ungetc)(c2,f);
3259 if(!input_encoding){
3260 set_iconv(TRUE, w_iconv16);
3262 if (iconv == w_iconv16) {
3263 input_endian = ENDIAN_LITTLE;
3266 (*i_ungetc)(0xFE,f);
3267 }else (*i_ungetc)(c2,f);
3268 (*i_ungetc)(0xFF,f);
3283 init_broken_state(void)
3285 memset(&broken_state, 0, sizeof(broken_state));
3291 broken_state.buf[broken_state.count++] = c;
3295 pop_broken_buf(void)
3297 return broken_state.buf[--broken_state.count];
3301 broken_getc(FILE *f)
3305 if (broken_state.count > 0) {
3306 return pop_broken_buf();
3309 if (c=='$' && broken_state.status != ESC
3310 && (input_mode == ASCII || input_mode == JIS_X_0201_1976_K)) {
3312 broken_state.status = 0;
3313 if (c1=='@'|| c1=='B') {
3314 push_broken_buf(c1);
3321 } else if (c=='(' && broken_state.status != ESC
3322 && (input_mode == JIS_X_0208 || input_mode == JIS_X_0201_1976_K)) {
3324 broken_state.status = 0;
3325 if (c1=='J'|| c1=='B') {
3326 push_broken_buf(c1);
3334 broken_state.status = c;
3340 broken_ungetc(nkf_char c, FILE *f)
3342 if (broken_state.count < 2)
3348 eol_conv(nkf_char c2, nkf_char c1)
3350 if (guess_f && input_eol != EOF) {
3351 if (c2 == 0 && c1 == LF) {
3352 if (!input_eol) input_eol = prev_cr ? CRLF : LF;
3353 else if (input_eol != (prev_cr ? CRLF : LF)) input_eol = EOF;
3354 } else if (c2 == 0 && c1 == CR && input_eol == LF) input_eol = EOF;
3356 else if (!input_eol) input_eol = CR;
3357 else if (input_eol != CR) input_eol = EOF;
3359 if (prev_cr || (c2 == 0 && c1 == LF)) {
3361 if (eolmode_f != LF) (*o_eol_conv)(0, CR);
3362 if (eolmode_f != CR) (*o_eol_conv)(0, LF);
3364 if (c2 == 0 && c1 == CR) prev_cr = CR;
3365 else if (c2 != 0 || c1 != LF) (*o_eol_conv)(c2, c1);
3369 Return value of fold_conv()
3371 LF add newline and output char
3372 CR add newline and output nothing
3375 1 (or else) normal output
3377 fold state in prev (previous character)
3379 >0x80 Japanese (X0208/X0201)
3384 This fold algorthm does not preserve heading space in a line.
3385 This is the main difference from fmt.
3388 #define char_size(c2,c1) (c2?2:1)
3391 fold_conv(nkf_char c2, nkf_char c1)
3394 nkf_char fold_state;
3396 if (c1== CR && !fold_preserve_f) {
3397 fold_state=0; /* ignore cr */
3398 }else if (c1== LF&&f_prev==CR && fold_preserve_f) {
3400 fold_state=0; /* ignore cr */
3401 } else if (c1== BS) {
3402 if (f_line>0) f_line--;
3404 } else if (c2==EOF && f_line != 0) { /* close open last line */
3406 } else if ((c1==LF && !fold_preserve_f)
3407 || ((c1==CR||(c1==LF&&f_prev!=CR))
3408 && fold_preserve_f)) {
3410 if (fold_preserve_f) {
3414 } else if ((f_prev == c1 && !fold_preserve_f)
3415 || (f_prev == LF && fold_preserve_f)
3416 ) { /* duplicate newline */
3419 fold_state = LF; /* output two newline */
3425 if (f_prev&0x80) { /* Japanese? */
3427 fold_state = 0; /* ignore given single newline */
3428 } else if (f_prev==SP) {
3432 if (++f_line<=fold_len)
3436 fold_state = CR; /* fold and output nothing */
3440 } else if (c1=='\f') {
3443 fold_state = LF; /* output newline and clear */
3444 } else if ( (c2==0 && c1==SP)||
3445 (c2==0 && c1==TAB)||
3446 (c2=='!'&& c1=='!')) {
3447 /* X0208 kankaku or ascii space */
3449 fold_state = 0; /* remove duplicate spaces */
3452 if (++f_line<=fold_len)
3453 fold_state = SP; /* output ASCII space only */
3455 f_prev = SP; f_line = 0;
3456 fold_state = CR; /* fold and output nothing */
3460 prev0 = f_prev; /* we still need this one... , but almost done */
3462 if (c2 || c2 == JIS_X_0201_1976_K)
3463 f_prev |= 0x80; /* this is Japanese */
3464 f_line += char_size(c2,c1);
3465 if (f_line<=fold_len) { /* normal case */
3468 if (f_line>fold_len+fold_margin) { /* too many kinsoku suspension */
3469 f_line = char_size(c2,c1);
3470 fold_state = LF; /* We can't wait, do fold now */
3471 } else if (c2 == JIS_X_0201_1976_K) {
3472 /* simple kinsoku rules return 1 means no folding */
3473 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
3474 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
3475 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
3476 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
3477 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
3478 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
3479 else if (SP<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
3481 fold_state = LF;/* add one new f_line before this character */
3484 fold_state = LF;/* add one new f_line before this character */
3487 /* kinsoku point in ASCII */
3488 if ( c1==')'|| /* { [ ( */
3499 /* just after special */
3500 } else if (!is_alnum(prev0)) {
3501 f_line = char_size(c2,c1);
3503 } else if ((prev0==SP) || /* ignored new f_line */
3504 (prev0==LF)|| /* ignored new f_line */
3505 (prev0&0x80)) { /* X0208 - ASCII */
3506 f_line = char_size(c2,c1);
3507 fold_state = LF;/* add one new f_line before this character */
3509 fold_state = 1; /* default no fold in ASCII */
3513 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
3514 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
3515 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
3516 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
3517 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
3518 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
3519 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
3520 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
3521 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
3522 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
3523 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
3524 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
3525 /* default no fold in kinsoku */
3528 f_line = char_size(c2,c1);
3529 /* add one new f_line before this character */
3532 f_line = char_size(c2,c1);
3534 /* add one new f_line before this character */
3539 /* terminator process */
3540 switch(fold_state) {
3542 OCONV_NEWLINE((*o_fconv));
3548 OCONV_NEWLINE((*o_fconv));
3559 static nkf_char z_prev2=0,z_prev1=0;
3562 z_conv(nkf_char c2, nkf_char c1)
3565 /* if (c2) c1 &= 0x7f; assertion */
3567 if (c2 == JIS_X_0201_1976_K && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) {
3573 if (z_prev2 == JIS_X_0201_1976_K) {
3574 if (c2 == JIS_X_0201_1976_K) {
3575 if (c1 == (0xde&0x7f)) { /*
\e$BByE@
\e(B */
3577 (*o_zconv)(dv[(z_prev1-SP)*2], dv[(z_prev1-SP)*2+1]);
3579 } else if (c1 == (0xdf&0x7f) && ev[(z_prev1-SP)*2]) { /*
\e$BH>ByE@
\e(B */
3581 (*o_zconv)(ev[(z_prev1-SP)*2], ev[(z_prev1-SP)*2+1]);
3586 (*o_zconv)(cv[(z_prev1-SP)*2], cv[(z_prev1-SP)*2+1]);
3588 if (c2 == JIS_X_0201_1976_K) {
3589 if (dv[(c1-SP)*2] || ev[(c1-SP)*2]) {
3590 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
3595 (*o_zconv)(cv[(c1-SP)*2], cv[(c1-SP)*2+1]);
3606 if (alpha_f&1 && c2 == 0x23) {
3607 /* JISX0208 Alphabet */
3609 } else if (c2 == 0x21) {
3610 /* JISX0208 Kigou */
3615 } else if (alpha_f&4) {
3620 } else if (alpha_f&1 && 0x20<c1 && c1<0x7f && fv[c1-0x20]) {
3626 if (alpha_f&8 && c2 == 0) {
3628 const char *entity = 0;
3630 case '>': entity = ">"; break;
3631 case '<': entity = "<"; break;
3632 case '\"': entity = """; break;
3633 case '&': entity = "&"; break;
3636 while (*entity) (*o_zconv)(0, *entity++);
3642 /* JIS X 0208 Katakana to JIS X 0201 Katakana */
3647 /* U+3002 (0x8142) Ideographic Full Stop -> U+FF61 (0xA1) Halfwidth Ideographic Full Stop */
3651 /* U+300C (0x8175) Left Corner Bracket -> U+FF62 (0xA2) Halfwidth Left Corner Bracket */
3655 /* U+300D (0x8176) Right Corner Bracket -> U+FF63 (0xA3) Halfwidth Right Corner Bracket */
3659 /* U+3001 (0x8141) Ideographic Comma -> U+FF64 (0xA4) Halfwidth Ideographic Comma */
3663 /* U+30FB (0x8145) Katakana Middle Dot -> U+FF65 (0xA5) Halfwidth Katakana Middle Dot */
3667 /* U+30FC (0x815B) Katakana-Hiragana Prolonged Sound Mark -> U+FF70 (0xB0) Halfwidth Katakana-Hiragana Prolonged Sound Mark */
3671 /* U+309B (0x814A) Katakana-Hiragana Voiced Sound Mark -> U+FF9E (0xDE) Halfwidth Katakana Voiced Sound Mark */
3675 /* U+309C (0x814B) Katakana-Hiragana Semi-Voiced Sound Mark -> U+FF9F (0xDF) Halfwidth Katakana Semi-Voiced Sound Mark */
3680 (*o_zconv)(JIS_X_0201_1976_K, c);
3683 } else if (c2 == 0x25) {
3684 /* JISX0208 Katakana */
3685 static const int fullwidth_to_halfwidth[] =
3687 0x0000, 0x2700, 0x3100, 0x2800, 0x3200, 0x2900, 0x3300, 0x2A00,
3688 0x3400, 0x2B00, 0x3500, 0x3600, 0x365E, 0x3700, 0x375E, 0x3800,
3689 0x385E, 0x3900, 0x395E, 0x3A00, 0x3A5E, 0x3B00, 0x3B5E, 0x3C00,
3690 0x3C5E, 0x3D00, 0x3D5E, 0x3E00, 0x3E5E, 0x3F00, 0x3F5E, 0x4000,
3691 0x405E, 0x4100, 0x415E, 0x2F00, 0x4200, 0x425E, 0x4300, 0x435E,
3692 0x4400, 0x445E, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900, 0x4A00,
3693 0x4A5E, 0x4A5F, 0x4B00, 0x4B5E, 0x4B5F, 0x4C00, 0x4C5E, 0x4C5F,
3694 0x4D00, 0x4D5E, 0x4D5F, 0x4E00, 0x4E5E, 0x4E5F, 0x4F00, 0x5000,
3695 0x5100, 0x5200, 0x5300, 0x2C00, 0x5400, 0x2D00, 0x5500, 0x2E00,
3696 0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x5B00, 0x0000, 0x5C00,
3697 0x0000, 0x0000, 0x2600, 0x5D00, 0x335E, 0x0000, 0x0000, 0x0000,
3698 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
3700 if (fullwidth_to_halfwidth[c1-0x20]){
3701 c2 = fullwidth_to_halfwidth[c1-0x20];
3702 (*o_zconv)(JIS_X_0201_1976_K, c2>>8);
3704 (*o_zconv)(JIS_X_0201_1976_K, c2&0xFF);
3714 #define rot13(c) ( \
3716 (c <= 'M') ? (c + 13): \
3717 (c <= 'Z') ? (c - 13): \
3719 (c <= 'm') ? (c + 13): \
3720 (c <= 'z') ? (c - 13): \
3724 #define rot47(c) ( \
3726 ( c <= 'O') ? (c + 47) : \
3727 ( c <= '~') ? (c - 47) : \
3732 rot_conv(nkf_char c2, nkf_char c1)
3734 if (c2 == 0 || c2 == JIS_X_0201_1976_K || c2 == ISO_8859_1) {
3740 (*o_rot_conv)(c2,c1);
3744 hira_conv(nkf_char c2, nkf_char c1)
3748 if (0x20 < c1 && c1 < 0x74) {
3750 (*o_hira_conv)(c2,c1);
3752 } else if (c1 == 0x74 && nkf_enc_unicode_p(output_encoding)) {
3754 c1 = nkf_char_unicode_new(0x3094);
3755 (*o_hira_conv)(c2,c1);
3758 } else if (c2 == 0x21 && (c1 == 0x33 || c1 == 0x34)) {
3760 (*o_hira_conv)(c2,c1);
3765 if (c2 == 0 && c1 == nkf_char_unicode_new(0x3094)) {
3768 } else if (c2 == 0x24 && 0x20 < c1 && c1 < 0x74) {
3770 } else if (c2 == 0x21 && (c1 == 0x35 || c1 == 0x36)) {
3774 (*o_hira_conv)(c2,c1);
3779 iso2022jp_check_conv(nkf_char c2, nkf_char c1)
3781 #define RANGE_NUM_MAX 18
3782 static const nkf_char range[RANGE_NUM_MAX][2] = {
3803 nkf_char start, end, c;
3805 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
3809 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
3814 for (i = 0; i < RANGE_NUM_MAX; i++) {
3815 start = range[i][0];
3818 if (c >= start && c <= end) {
3823 (*o_iso2022jp_check_conv)(c2,c1);
3827 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
3829 static const unsigned char *mime_pattern[] = {
3830 (const unsigned char *)"\075?EUC-JP?B?",
3831 (const unsigned char *)"\075?SHIFT_JIS?B?",
3832 (const unsigned char *)"\075?ISO-8859-1?Q?",
3833 (const unsigned char *)"\075?ISO-8859-1?B?",
3834 (const unsigned char *)"\075?ISO-2022-JP?B?",
3835 (const unsigned char *)"\075?ISO-2022-JP?Q?",
3836 #if defined(UTF8_INPUT_ENABLE)
3837 (const unsigned char *)"\075?UTF-8?B?",
3838 (const unsigned char *)"\075?UTF-8?Q?",
3840 (const unsigned char *)"\075?US-ASCII?Q?",
3845 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
3846 nkf_char (*mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0) = {
3847 e_iconv, s_iconv, 0, 0, 0, 0,
3848 #if defined(UTF8_INPUT_ENABLE)
3854 static const nkf_char mime_encode[] = {
3855 EUC_JP, SHIFT_JIS, ISO_8859_1, ISO_8859_1, JIS_X_0208, JIS_X_0201_1976_K,
3856 #if defined(UTF8_INPUT_ENABLE)
3863 static const nkf_char mime_encode_method[] = {
3864 'B', 'B','Q', 'B', 'B', 'Q',
3865 #if defined(UTF8_INPUT_ENABLE)
3873 /* MIME preprocessor fifo */
3875 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
3876 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
3877 #define mime_input_buf(n) mime_input_state.buf[(n)&MIME_BUF_MASK]
3879 unsigned char buf[MIME_BUF_SIZE];
3881 unsigned int last; /* decoded */
3882 unsigned int input; /* undecoded */
3884 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
3886 #define MAXRECOVER 20
3889 mime_input_buf_unshift(nkf_char c)
3891 mime_input_buf(--mime_input_state.top) = (unsigned char)c;
3895 mime_ungetc(nkf_char c, FILE *f)
3897 mime_input_buf_unshift(c);
3902 mime_ungetc_buf(nkf_char c, FILE *f)
3905 (*i_mungetc_buf)(c,f);
3907 mime_input_buf(--mime_input_state.input) = (unsigned char)c;
3912 mime_getc_buf(FILE *f)
3914 /* we don't keep eof of mime_input_buf, becase it contains ?= as
3915 a terminator. It was checked in mime_integrity. */
3916 return ((mimebuf_f)?
3917 (*i_mgetc_buf)(f):mime_input_buf(mime_input_state.input++));
3921 switch_mime_getc(void)
3923 if (i_getc!=mime_getc) {
3924 i_mgetc = i_getc; i_getc = mime_getc;
3925 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
3926 if(mime_f==STRICT_MIME) {
3927 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
3928 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
3934 unswitch_mime_getc(void)
3936 if(mime_f==STRICT_MIME) {
3937 i_mgetc = i_mgetc_buf;
3938 i_mungetc = i_mungetc_buf;
3941 i_ungetc = i_mungetc;
3942 if(mime_iconv_back)set_iconv(FALSE, mime_iconv_back);
3943 mime_iconv_back = NULL;
3947 mime_integrity(FILE *f, const unsigned char *p)
3951 /* In buffered mode, read until =? or NL or buffer full
3953 mime_input_state.input = mime_input_state.top;
3954 mime_input_state.last = mime_input_state.top;
3956 while(*p) mime_input_buf(mime_input_state.input++) = *p++;
3958 q = mime_input_state.input;
3959 while((c=(*i_getc)(f))!=EOF) {
3960 if (((mime_input_state.input-mime_input_state.top)&MIME_BUF_MASK)==0) {
3961 break; /* buffer full */
3963 if (c=='=' && d=='?') {
3964 /* checked. skip header, start decode */
3965 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
3966 /* mime_last_input = mime_input_state.input; */
3967 mime_input_state.input = q;
3971 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
3973 /* Should we check length mod 4? */
3974 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
3977 /* In case of Incomplete MIME, no MIME decode */
3978 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
3979 mime_input_state.last = mime_input_state.input; /* point undecoded buffer */
3980 mime_decode_mode = 1; /* no decode on mime_input_buf last in mime_getc */
3981 switch_mime_getc(); /* anyway we need buffered getc */
3986 mime_begin_strict(FILE *f)
3990 const unsigned char *p,*q;
3991 nkf_char r[MAXRECOVER]; /* recovery buffer, max mime pattern length */
3993 mime_decode_mode = FALSE;
3994 /* =? has been checked */
3996 p = mime_pattern[j];
3999 for(i=2;p[i]>SP;i++) { /* start at =? */
4000 if (((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i]) {
4001 /* pattern fails, try next one */
4003 while (mime_pattern[++j]) {
4004 p = mime_pattern[j];
4005 for(k=2;k<i;k++) /* assume length(p) > i */
4006 if (p[k]!=q[k]) break;
4007 if (k==i && nkf_toupper(c1)==p[k]) break;
4009 p = mime_pattern[j];
4010 if (p) continue; /* found next one, continue */
4011 /* all fails, output from recovery buffer */
4019 mime_decode_mode = p[i-2];
4021 mime_iconv_back = iconv;
4022 set_iconv(FALSE, mime_priority_func[j]);
4023 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
4025 if (mime_decode_mode=='B') {
4026 mimebuf_f = unbuf_f;
4028 /* do MIME integrity check */
4029 return mime_integrity(f,mime_pattern[j]);
4043 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
4044 /* re-read and convert again from mime_buffer. */
4046 /* =? has been checked */
4047 k = mime_input_state.last;
4048 mime_input_buf(mime_input_state.last++)='='; mime_input_buf(mime_input_state.last++)='?';
4049 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
4050 /* We accept any character type even if it is breaked by new lines */
4051 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4052 if (c1==LF||c1==SP||c1==CR||
4053 c1=='-'||c1=='_'||is_alnum(c1)) continue;
4055 /* Failed. But this could be another MIME preemble */
4057 mime_input_state.last--;
4063 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4064 if (!(++i<MAXRECOVER) || c1==EOF) break;
4065 if (c1=='b'||c1=='B') {
4066 mime_decode_mode = 'B';
4067 } else if (c1=='q'||c1=='Q') {
4068 mime_decode_mode = 'Q';
4072 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4073 if (!(++i<MAXRECOVER) || c1==EOF) break;
4075 mime_decode_mode = FALSE;
4081 if (!mime_decode_mode) {
4082 /* false MIME premble, restart from mime_buffer */
4083 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
4084 /* Since we are in MIME mode until buffer becomes empty, */
4085 /* we never go into mime_begin again for a while. */
4088 /* discard mime preemble, and goto MIME mode */
4089 mime_input_state.last = k;
4090 /* do no MIME integrity check */
4091 return c1; /* used only for checking EOF */
4102 debug(const char *str)
4105 fprintf(stderr, "%s\n", str ? str : "NULL");
4111 set_input_codename(const char *codename)
4113 if (!input_codename) {
4114 input_codename = codename;
4115 } else if (strcmp(codename, input_codename) != 0) {
4116 input_codename = "";
4121 get_guessed_code(void)
4123 if (input_codename && !*input_codename) {
4124 input_codename = "BINARY";
4126 struct input_code *p = find_inputcode_byfunc(iconv);
4127 if (!input_codename) {
4128 input_codename = "ASCII";
4129 } else if (strcmp(input_codename, "Shift_JIS") == 0) {
4130 if (p->score & (SCORE_DEPEND|SCORE_CP932))
4131 input_codename = "CP932";
4132 } else if (strcmp(input_codename, "EUC-JP") == 0) {
4133 if (p->score & (SCORE_X0212))
4134 input_codename = "EUCJP-MS";
4135 else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4136 input_codename = "CP51932";
4137 } else if (strcmp(input_codename, "ISO-2022-JP") == 0) {
4138 if (p->score & (SCORE_KANA))
4139 input_codename = "CP50221";
4140 else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4141 input_codename = "CP50220";
4144 return input_codename;
4147 #if !defined(PERL_XS) && !defined(WIN32DLL)
4149 print_guessed_code(char *filename)
4151 if (filename != NULL) printf("%s: ", filename);
4152 if (input_codename && !*input_codename) {
4155 input_codename = get_guessed_code();
4157 printf("%s\n", input_codename);
4161 input_eol == CR ? " (CR)" :
4162 input_eol == LF ? " (LF)" :
4163 input_eol == CRLF ? " (CRLF)" :
4164 input_eol == EOF ? " (MIXED NL)" :
4174 hex_getc(nkf_char ch, FILE *f, nkf_char (*g)(FILE *f), nkf_char (*u)(nkf_char c, FILE *f))
4176 nkf_char c1, c2, c3;
4182 if (!nkf_isxdigit(c2)){
4187 if (!nkf_isxdigit(c3)){
4192 return (hex2bin(c2) << 4) | hex2bin(c3);
4198 return hex_getc(':', f, i_cgetc, i_cungetc);
4202 cap_ungetc(nkf_char c, FILE *f)
4204 return (*i_cungetc)(c, f);
4210 return hex_getc('%', f, i_ugetc, i_uungetc);
4214 url_ungetc(nkf_char c, FILE *f)
4216 return (*i_uungetc)(c, f);
4220 #ifdef NUMCHAR_OPTION
4222 numchar_getc(FILE *f)
4224 nkf_char (*g)(FILE *) = i_ngetc;
4225 nkf_char (*u)(nkf_char c ,FILE *f) = i_nungetc;
4236 if (buf[i] == 'x' || buf[i] == 'X'){
4237 for (j = 0; j < 7; j++){
4239 if (!nkf_isxdigit(buf[i])){
4246 c |= hex2bin(buf[i]);
4249 for (j = 0; j < 8; j++){
4253 if (!nkf_isdigit(buf[i])){
4260 c += hex2bin(buf[i]);
4266 return nkf_char_unicode_new(c);
4276 numchar_ungetc(nkf_char c, FILE *f)
4278 return (*i_nungetc)(c, f);
4282 #ifdef UNICODE_NORMALIZATION
4287 nkf_char (*g)(FILE *f) = i_nfc_getc;
4288 nkf_char (*u)(nkf_char c ,FILE *f) = i_nfc_ungetc;
4289 nkf_buf_t *buf = nkf_buf_new(9);
4290 const unsigned char *array;
4291 int lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
4292 nkf_char c = (*g)(f);
4294 if (c == EOF || c > 0xFF || (c & 0xc0) == 0x80) return c;
4296 nkf_buf_push(buf, (unsigned char)c);
4298 while (lower <= upper) {
4299 int mid = (lower+upper) / 2;
4301 array = normalization_table[mid].nfd;
4302 for (len=0; len < NORMALIZATION_TABLE_NFD_LENGTH && array[len]; len++) {
4303 if (len >= nkf_buf_length(buf)) {
4307 lower = 1, upper = 0;
4310 nkf_buf_push(buf, c);
4312 if (array[len] != nkf_buf_at(buf, len)) {
4313 if (array[len] < nkf_buf_at(buf, len)) lower = mid + 1;
4314 else upper = mid - 1;
4321 array = normalization_table[mid].nfc;
4323 for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++)
4324 nkf_buf_push(buf, array[i]);
4328 } while (lower <= upper);
4330 while (nkf_buf_length(buf) > 1) (*u)(nkf_buf_pop(buf), f);
4331 c = nkf_buf_pop(buf);
4332 nkf_buf_dispose(buf);
4338 nfc_ungetc(nkf_char c, FILE *f)
4340 return (*i_nfc_ungetc)(c, f);
4342 #endif /* UNICODE_NORMALIZATION */
4346 base64decode(nkf_char c)
4351 i = c - 'A'; /* A..Z 0-25 */
4352 } else if (c == '_') {
4353 i = '?' /* 63 */ ; /* _ 63 */
4355 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
4357 } else if (c > '/') {
4358 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
4359 } else if (c == '+' || c == '-') {
4360 i = '>' /* 62 */ ; /* + and - 62 */
4362 i = '?' /* 63 */ ; /* / 63 */
4370 nkf_char c1, c2, c3, c4, cc;
4371 nkf_char t1, t2, t3, t4, mode, exit_mode;
4372 nkf_char lwsp_count;
4375 nkf_char lwsp_size = 128;
4377 if (mime_input_state.top != mime_input_state.last) { /* Something is in FIFO */
4378 return mime_input_buf(mime_input_state.top++);
4380 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
4381 mime_decode_mode=FALSE;
4382 unswitch_mime_getc();
4383 return (*i_getc)(f);
4386 if (mimebuf_f == FIXED_MIME)
4387 exit_mode = mime_decode_mode;
4390 if (mime_decode_mode == 'Q') {
4391 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4393 if (c1=='_' && mimebuf_f != FIXED_MIME) return SP;
4394 if (c1<=SP || DEL<=c1) {
4395 mime_decode_mode = exit_mode; /* prepare for quit */
4398 if (c1!='=' && (c1!='?' || mimebuf_f == FIXED_MIME)) {
4402 mime_decode_mode = exit_mode; /* prepare for quit */
4403 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
4404 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
4405 /* end Q encoding */
4406 input_mode = exit_mode;
4408 lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
4409 while ((c1=(*i_getc)(f))!=EOF) {
4414 if ((c1=(*i_getc)(f))!=EOF && (c1==SP||c1==TAB)) {
4422 if ((c1=(*i_getc)(f))!=EOF && c1 == LF) {
4423 if ((c1=(*i_getc)(f))!=EOF && (c1==SP||c1==TAB)) {
4438 lwsp_buf[lwsp_count] = (unsigned char)c1;
4439 if (lwsp_count++>lwsp_size){
4441 lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4442 lwsp_buf = lwsp_buf_new;
4448 if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
4450 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4451 i_ungetc(lwsp_buf[lwsp_count],f);
4454 nkf_xfree(lwsp_buf);
4457 if (c1=='='&&c2<SP) { /* this is soft wrap */
4458 while((c1 = (*i_mgetc)(f)) <=SP) {
4459 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4461 mime_decode_mode = 'Q'; /* still in MIME */
4462 goto restart_mime_q;
4465 mime_decode_mode = 'Q'; /* still in MIME */
4469 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
4470 if (c2<=SP) return c2;
4471 mime_decode_mode = 'Q'; /* still in MIME */
4472 return ((hex2bin(c2)<<4) + hex2bin(c3));
4475 if (mime_decode_mode != 'B') {
4476 mime_decode_mode = FALSE;
4477 return (*i_mgetc)(f);
4481 /* Base64 encoding */
4483 MIME allows line break in the middle of
4484 Base64, but we are very pessimistic in decoding
4485 in unbuf mode because MIME encoded code may broken by
4486 less or editor's control sequence (such as ESC-[-K in unbuffered
4487 mode. ignore incomplete MIME.
4489 mode = mime_decode_mode;
4490 mime_decode_mode = exit_mode; /* prepare for quit */
4492 while ((c1 = (*i_mgetc)(f))<=SP) {
4497 if ((c2 = (*i_mgetc)(f))<=SP) {
4500 if (mime_f != STRICT_MIME) goto mime_c2_retry;
4501 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4504 if ((c1 == '?') && (c2 == '=')) {
4507 lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
4508 while ((c1=(*i_getc)(f))!=EOF) {
4513 if ((c1=(*i_getc)(f))!=EOF && (c1==SP||c1==TAB)) {
4521 if ((c1=(*i_getc)(f))!=EOF) {
4525 } else if ((c1=(*i_getc)(f))!=EOF && (c1==SP||c1==TAB)) {
4540 lwsp_buf[lwsp_count] = (unsigned char)c1;
4541 if (lwsp_count++>lwsp_size){
4543 lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4544 lwsp_buf = lwsp_buf_new;
4550 if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
4552 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4553 i_ungetc(lwsp_buf[lwsp_count],f);
4556 nkf_xfree(lwsp_buf);
4560 if ((c3 = (*i_mgetc)(f))<=SP) {
4563 if (mime_f != STRICT_MIME) goto mime_c3_retry;
4564 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4568 if ((c4 = (*i_mgetc)(f))<=SP) {
4571 if (mime_f != STRICT_MIME) goto mime_c4_retry;
4572 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4576 mime_decode_mode = mode; /* still in MIME sigh... */
4578 /* BASE 64 decoding */
4580 t1 = 0x3f & base64decode(c1);
4581 t2 = 0x3f & base64decode(c2);
4582 t3 = 0x3f & base64decode(c3);
4583 t4 = 0x3f & base64decode(c4);
4584 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
4586 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
4587 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
4589 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
4590 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
4592 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
4597 return mime_input_buf(mime_input_state.top++);
4600 static const char basis_64[] =
4601 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
4603 #define MIMEOUT_BUF_LENGTH 74
4605 char buf[MIMEOUT_BUF_LENGTH+1];
4610 /*nkf_char mime_lastchar2, mime_lastchar1;*/
4613 open_mime(nkf_char mode)
4615 const unsigned char *p;
4618 p = mime_pattern[0];
4619 for(i=0;mime_pattern[i];i++) {
4620 if (mode == mime_encode[i]) {
4621 p = mime_pattern[i];
4625 mimeout_mode = mime_encode_method[i];
4627 if (base64_count>45) {
4628 if (mimeout_state.count>0 && nkf_isblank(mimeout_state.buf[i])){
4629 (*o_mputc)(mimeout_state.buf[i]);
4632 PUT_NEWLINE((*o_mputc));
4635 if (mimeout_state.count>0
4636 && (mimeout_state.buf[i]==SP || mimeout_state.buf[i]==TAB
4637 || mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF)) {
4641 for (;i<mimeout_state.count;i++) {
4642 if (mimeout_state.buf[i]==SP || mimeout_state.buf[i]==TAB
4643 || mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF) {
4644 (*o_mputc)(mimeout_state.buf[i]);
4654 j = mimeout_state.count;
4655 mimeout_state.count = 0;
4657 mime_putc(mimeout_state.buf[i]);
4662 mime_prechar(nkf_char c2, nkf_char c1)
4664 if (mimeout_mode > 0){
4666 if (base64_count + mimeout_state.count/3*4> 73){
4667 (*o_base64conv)(EOF,0);
4668 OCONV_NEWLINE((*o_base64conv));
4669 (*o_base64conv)(0,SP);
4673 if (base64_count + mimeout_state.count/3*4> 66) {
4674 (*o_base64conv)(EOF,0);
4675 OCONV_NEWLINE((*o_base64conv));
4676 (*o_base64conv)(0,SP);
4682 if (c2 != EOF && base64_count + mimeout_state.count/3*4> 60) {
4683 mimeout_mode = (output_mode==ASCII ||output_mode == ISO_8859_1) ? 'Q' : 'B';
4684 open_mime(output_mode);
4685 (*o_base64conv)(EOF,0);
4686 OCONV_NEWLINE((*o_base64conv));
4687 (*o_base64conv)(0,SP);
4706 switch(mimeout_mode) {
4711 (*o_mputc)(basis_64[((mimeout_state.state & 0x3)<< 4)]);
4717 (*o_mputc)(basis_64[((mimeout_state.state & 0xF) << 2)]);
4722 if (mimeout_mode > 0) {
4723 if (mimeout_f!=FIXED_MIME) {
4725 } else if (mimeout_mode != 'Q')
4731 mimeout_addchar(nkf_char c)
4733 switch(mimeout_mode) {
4738 } else if(!nkf_isalnum(c)) {
4740 (*o_mputc)(bin2hex(((c>>4)&0xf)));
4741 (*o_mputc)(bin2hex((c&0xf)));
4749 mimeout_state.state=c;
4750 (*o_mputc)(basis_64[c>>2]);
4755 (*o_mputc)(basis_64[((mimeout_state.state & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
4756 mimeout_state.state=c;
4761 (*o_mputc)(basis_64[((mimeout_state.state & 0xF) << 2) | ((c & 0xC0) >>6)]);
4762 (*o_mputc)(basis_64[c & 0x3F]);
4774 mime_putc(nkf_char c)
4779 if (mimeout_f == FIXED_MIME){
4780 if (mimeout_mode == 'Q'){
4781 if (base64_count > 71){
4782 if (c!=CR && c!=LF) {
4784 PUT_NEWLINE((*o_mputc));
4789 if (base64_count > 71){
4791 PUT_NEWLINE((*o_mputc));
4794 if (c == EOF) { /* c==EOF */
4798 if (c != EOF) { /* c==EOF */
4804 /* mimeout_f != FIXED_MIME */
4806 if (c == EOF) { /* c==EOF */
4807 if (mimeout_mode == -1 && mimeout_state.count > 1) open_mime(output_mode);
4808 j = mimeout_state.count;
4809 mimeout_state.count = 0;
4811 if (mimeout_mode > 0) {
4812 if (!nkf_isblank(mimeout_state.buf[j-1])) {
4814 if (nkf_isspace(mimeout_state.buf[i]) && base64_count < 71){
4817 mimeout_addchar(mimeout_state.buf[i]);
4821 mimeout_addchar(mimeout_state.buf[i]);
4825 mimeout_addchar(mimeout_state.buf[i]);
4831 mimeout_addchar(mimeout_state.buf[i]);
4837 if (mimeout_state.count > 0){
4838 lastchar = mimeout_state.buf[mimeout_state.count - 1];
4843 if (mimeout_mode=='Q') {
4844 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
4845 if (c == CR || c == LF) {
4850 } else if (c <= SP) {
4852 if (base64_count > 70) {
4853 PUT_NEWLINE((*o_mputc));
4856 if (!nkf_isblank(c)) {
4861 if (base64_count > 70) {
4863 PUT_NEWLINE((*o_mputc));
4866 open_mime(output_mode);
4868 if (!nkf_noescape_mime(c)) {
4879 if (mimeout_mode <= 0) {
4880 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
4881 if (nkf_isspace(c)) {
4883 if (mimeout_mode == -1) {
4886 if (c==CR || c==LF) {
4888 open_mime(output_mode);
4894 for (i=0;i<mimeout_state.count;i++) {
4895 (*o_mputc)(mimeout_state.buf[i]);
4896 if (mimeout_state.buf[i] == CR || mimeout_state.buf[i] == LF){
4907 mimeout_state.buf[0] = (char)c;
4908 mimeout_state.count = 1;
4910 if (base64_count > 1
4911 && base64_count + mimeout_state.count > 76
4912 && mimeout_state.buf[0] != CR && mimeout_state.buf[0] != LF){
4913 static const char *str = "boundary=\"";
4914 static int len = 10;
4917 for (; i < mimeout_state.count - len; ++i) {
4918 if (!strncmp(mimeout_state.buf+i, str, len)) {
4924 if (i == 0 || i == mimeout_state.count - len) {
4925 PUT_NEWLINE((*o_mputc));
4927 if (!nkf_isspace(mimeout_state.buf[0])){
4934 for (j = 0; j <= i; ++j) {
4935 (*o_mputc)(mimeout_state.buf[j]);
4937 PUT_NEWLINE((*o_mputc));
4939 for (; j <= mimeout_state.count; ++j) {
4940 mimeout_state.buf[j - i] = mimeout_state.buf[j];
4942 mimeout_state.count -= i;
4945 mimeout_state.buf[mimeout_state.count++] = (char)c;
4946 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
4947 open_mime(output_mode);
4952 if (lastchar==CR || lastchar == LF){
4953 for (i=0;i<mimeout_state.count;i++) {
4954 (*o_mputc)(mimeout_state.buf[i]);
4957 mimeout_state.count = 0;
4960 for (i=0;i<mimeout_state.count-1;i++) {
4961 (*o_mputc)(mimeout_state.buf[i]);
4964 mimeout_state.buf[0] = SP;
4965 mimeout_state.count = 1;
4967 open_mime(output_mode);
4970 /* mimeout_mode == 'B', 1, 2 */
4971 if ( c<=DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
4972 if (lastchar == CR || lastchar == LF){
4973 if (nkf_isblank(c)) {
4974 for (i=0;i<mimeout_state.count;i++) {
4975 mimeout_addchar(mimeout_state.buf[i]);
4977 mimeout_state.count = 0;
4978 } else if (SP<c && c<DEL) {
4980 for (i=0;i<mimeout_state.count;i++) {
4981 (*o_mputc)(mimeout_state.buf[i]);
4984 mimeout_state.count = 0;
4986 mimeout_state.buf[mimeout_state.count++] = (char)c;
4989 if (c==SP || c==TAB || c==CR || c==LF) {
4990 for (i=0;i<mimeout_state.count;i++) {
4991 if (SP<mimeout_state.buf[i] && mimeout_state.buf[i]<DEL) {
4993 for (i=0;i<mimeout_state.count;i++) {
4994 (*o_mputc)(mimeout_state.buf[i]);
4997 mimeout_state.count = 0;
5000 mimeout_state.buf[mimeout_state.count++] = (char)c;
5001 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
5003 for (i=0;i<mimeout_state.count;i++) {
5004 (*o_mputc)(mimeout_state.buf[i]);
5007 mimeout_state.count = 0;
5011 if (mimeout_state.count>0 && SP<c && c!='=') {
5012 mimeout_state.buf[mimeout_state.count++] = (char)c;
5013 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
5014 j = mimeout_state.count;
5015 mimeout_state.count = 0;
5017 mimeout_addchar(mimeout_state.buf[i]);
5024 if (mimeout_state.count>0) {
5025 j = mimeout_state.count;
5026 mimeout_state.count = 0;
5028 if (mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF)
5030 mimeout_addchar(mimeout_state.buf[i]);
5036 (*o_mputc)(mimeout_state.buf[i]);
5038 open_mime(output_mode);
5045 base64_conv(nkf_char c2, nkf_char c1)
5047 mime_prechar(c2, c1);
5048 (*o_base64conv)(c2,c1);
5052 typedef struct nkf_iconv_t {
5055 size_t input_buffer_size;
5056 char *output_buffer;
5057 size_t output_buffer_size;
5061 nkf_iconv_new(char *tocode, char *fromcode)
5063 nkf_iconv_t converter;
5065 converter->input_buffer_size = IOBUF_SIZE;
5066 converter->input_buffer = nkf_xmalloc(converter->input_buffer_size);
5067 converter->output_buffer_size = IOBUF_SIZE * 2;
5068 converter->output_buffer = nkf_xmalloc(converter->output_buffer_size);
5069 converter->cd = iconv_open(tocode, fromcode);
5070 if (converter->cd == (iconv_t)-1)
5074 perror(fprintf("iconv doesn't support %s to %s conversion.", fromcode, tocode));
5077 perror("can't iconv_open");
5083 nkf_iconv_convert(nkf_iconv_t *converter, FILE *input)
5085 size_t invalid = (size_t)0;
5086 char *input_buffer = converter->input_buffer;
5087 size_t input_length = (size_t)0;
5088 char *output_buffer = converter->output_buffer;
5089 size_t output_length = converter->output_buffer_size;
5094 while ((c = (*i_getc)(f)) != EOF) {
5095 input_buffer[input_length++] = c;
5096 if (input_length < converter->input_buffer_size) break;
5100 size_t ret = iconv(converter->cd, &input_buffer, &input_length, &output_buffer, &output_length);
5101 while (output_length-- > 0) {
5102 (*o_putc)(output_buffer[converter->output_buffer_size-output_length]);
5104 if (ret == (size_t) - 1) {
5107 if (input_buffer != converter->input_buffer)
5108 memmove(converter->input_buffer, input_buffer, input_length);
5111 converter->output_buffer_size *= 2;
5112 output_buffer = realloc(converter->outbuf, converter->output_buffer_size);
5113 if (output_buffer == NULL) {
5114 perror("can't realloc");
5117 converter->output_buffer = output_buffer;
5120 perror("can't iconv");
5133 nkf_iconv_close(nkf_iconv_t *convert)
5135 nkf_xfree(converter->inbuf);
5136 nkf_xfree(converter->outbuf);
5137 iconv_close(converter->cd);
5146 struct input_code *p = input_code_list;
5158 mime_f = MIME_DECODE_DEFAULT;
5159 mime_decode_f = FALSE;
5164 x0201_f = X0201_DEFAULT;
5165 iso2022jp_f = FALSE;
5166 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
5167 ms_ucs_map_f = UCS_MAP_ASCII;
5169 #ifdef UTF8_INPUT_ENABLE
5170 no_cp932ext_f = FALSE;
5171 no_best_fit_chars_f = FALSE;
5172 encode_fallback = NULL;
5173 unicode_subchar = '?';
5174 input_endian = ENDIAN_BIG;
5176 #ifdef UTF8_OUTPUT_ENABLE
5177 output_bom_f = FALSE;
5178 output_endian = ENDIAN_BIG;
5180 #ifdef UNICODE_NORMALIZATION
5196 #ifdef SHIFTJIS_CP932
5206 for (i = 0; i < 256; i++){
5207 prefix_table[i] = 0;
5211 mimeout_state.count = 0;
5216 fold_preserve_f = FALSE;
5219 kanji_intro = DEFAULT_J;
5220 ascii_intro = DEFAULT_R;
5221 fold_margin = FOLD_MARGIN;
5222 o_zconv = no_connection;
5223 o_fconv = no_connection;
5224 o_eol_conv = no_connection;
5225 o_rot_conv = no_connection;
5226 o_hira_conv = no_connection;
5227 o_base64conv = no_connection;
5228 o_iso2022jp_check_conv = no_connection;
5231 i_ungetc = std_ungetc;
5233 i_bungetc = std_ungetc;
5236 i_mungetc = std_ungetc;
5237 i_mgetc_buf = std_getc;
5238 i_mungetc_buf = std_ungetc;
5239 output_mode = ASCII;
5241 mime_decode_mode = FALSE;
5247 init_broken_state();
5248 z_prev2=0,z_prev1=0;
5250 iconv_for_check = 0;
5252 input_codename = NULL;
5253 input_encoding = NULL;
5254 output_encoding = NULL;
5261 module_connection(void)
5263 if (input_encoding) set_input_encoding(input_encoding);
5264 if (!output_encoding) {
5265 output_encoding = nkf_default_encoding();
5267 if (!output_encoding) {
5268 if (noout_f || guess_f) output_encoding = nkf_enc_from_index(ISO_2022_JP);
5271 set_output_encoding(output_encoding);
5272 oconv = nkf_enc_to_oconv(output_encoding);
5275 /* replace continucation module, from output side */
5277 /* output redicrection */
5279 if (noout_f || guess_f){
5286 if (mimeout_f == TRUE) {
5287 o_base64conv = oconv; oconv = base64_conv;
5289 /* base64_count = 0; */
5292 if (eolmode_f || guess_f) {
5293 o_eol_conv = oconv; oconv = eol_conv;
5296 o_rot_conv = oconv; oconv = rot_conv;
5299 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
5302 o_hira_conv = oconv; oconv = hira_conv;
5305 o_fconv = oconv; oconv = fold_conv;
5308 if (alpha_f || x0201_f) {
5309 o_zconv = oconv; oconv = z_conv;
5313 i_ungetc = std_ungetc;
5314 /* input redicrection */
5317 i_cgetc = i_getc; i_getc = cap_getc;
5318 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
5321 i_ugetc = i_getc; i_getc = url_getc;
5322 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
5325 #ifdef NUMCHAR_OPTION
5327 i_ngetc = i_getc; i_getc = numchar_getc;
5328 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
5331 #ifdef UNICODE_NORMALIZATION
5333 i_nfc_getc = i_getc; i_getc = nfc_getc;
5334 i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
5337 if (mime_f && mimebuf_f==FIXED_MIME) {
5338 i_mgetc = i_getc; i_getc = mime_getc;
5339 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
5342 i_bgetc = i_getc; i_getc = broken_getc;
5343 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
5345 if (input_encoding) {
5346 set_iconv(-TRUE, nkf_enc_to_iconv(input_encoding));
5348 set_iconv(FALSE, e_iconv);
5352 struct input_code *p = input_code_list;
5361 Conversion main loop. Code detection only.
5364 #if !defined(PERL_XS) && !defined(WIN32DLL)
5371 module_connection();
5372 while ((c = (*i_getc)(f)) != EOF)
5379 #define NEXT continue /* no output, get next */
5380 #define SKIP c2=0;continue /* no output, get next */
5381 #define MORE c2=c1;continue /* need one more byte */
5382 #define SEND ; /* output c1 and c2, get next */
5383 #define LAST break /* end of loop, go closing */
5384 #define set_input_mode(mode) do { \
5385 input_mode = mode; \
5387 set_input_codename("ISO-2022-JP"); \
5388 debug("ISO-2022-JP"); \
5392 kanji_convert(FILE *f)
5394 nkf_char c1=0, c2=0, c3=0, c4=0;
5395 int shift_mode = 0; /* 0, 1, 2, 3 */
5397 int is_8bit = FALSE;
5399 if (input_encoding && !nkf_enc_asciicompat(input_encoding)) {
5404 output_mode = ASCII;
5406 if (module_connection() < 0) {
5407 #if !defined(PERL_XS) && !defined(WIN32DLL)
5408 fprintf(stderr, "no output encoding given\n");
5414 #ifdef UTF8_INPUT_ENABLE
5415 if(iconv == w_iconv32){
5416 while ((c1 = (*i_getc)(f)) != EOF &&
5417 (c2 = (*i_getc)(f)) != EOF &&
5418 (c3 = (*i_getc)(f)) != EOF &&
5419 (c4 = (*i_getc)(f)) != EOF) {
5420 nkf_iconv_utf_32(c1, c2, c3, c4);
5422 (*i_ungetc)(EOF, f);
5424 else if (iconv == w_iconv16) {
5425 while ((c1 = (*i_getc)(f)) != EOF &&
5426 (c2 = (*i_getc)(f)) != EOF) {
5427 if (nkf_iconv_utf_16(c1, c2, 0, 0) == -2 &&
5428 (c3 = (*i_getc)(f)) != EOF &&
5429 (c4 = (*i_getc)(f)) != EOF) {
5430 nkf_iconv_utf_16(c1, c2, c3, c4);
5433 (*i_ungetc)(EOF, f);
5437 while ((c1 = (*i_getc)(f)) != EOF) {
5438 #ifdef INPUT_CODE_FIX
5439 if (!input_encoding)
5445 /* in case of 8th bit is on */
5446 if (!estab_f&&!mime_decode_mode) {
5447 /* in case of not established yet */
5448 /* It is still ambiguious */
5449 if (h_conv(f, c2, c1)==EOF) {
5457 /* in case of already established */
5459 /* ignore bogus code */
5467 /* 2nd byte of 7 bit code or SJIS */
5471 else if (nkf_char_unicode_p(c1)) {
5477 if (input_mode == JIS_X_0208 && DEL <= c1 && c1 < 0x92) {
5480 } else if (c1 > DEL) {
5482 if (!estab_f && !iso8859_f) {
5483 /* not established yet */
5485 } else { /* estab_f==TRUE */
5491 else if ((iconv == s_iconv && 0xA0 <= c1 && c1 <= 0xDF) ||
5492 (ms_ucs_map_f == UCS_MAP_CP10001 && (c1 == 0xFD || c1 == 0xFE))) {
5494 c2 = JIS_X_0201_1976_K;
5499 /* already established */
5503 } else if (SP < c1 && c1 < DEL) {
5504 /* in case of Roman characters */
5506 /* output 1 shifted byte */
5510 } else if (nkf_byte_jisx0201_katakana_p(c1)){
5511 /* output 1 shifted byte */
5512 c2 = JIS_X_0201_1976_K;
5515 /* look like bogus code */
5518 } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
5519 input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) {
5520 /* in case of Kanji shifted */
5522 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
5523 /* Check MIME code */
5524 if ((c1 = (*i_getc)(f)) == EOF) {
5527 } else if (c1 == '?') {
5528 /* =? is mime conversion start sequence */
5529 if(mime_f == STRICT_MIME) {
5530 /* check in real detail */
5531 if (mime_begin_strict(f) == EOF)
5534 } else if (mime_begin(f) == EOF)
5543 /* normal ASCII code */
5546 } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
5549 } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
5552 } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
5553 if ((c1 = (*i_getc)(f)) == EOF) {
5554 /* (*oconv)(0, ESC); don't send bogus code */
5557 else if (c1 == '&') {
5559 if ((c1 = (*i_getc)(f)) == EOF) {
5565 else if (c1 == '$') {
5567 if ((c1 = (*i_getc)(f)) == EOF) {
5568 /* don't send bogus code
5570 (*oconv)(0, '$'); */
5572 } else if (c1 == '@' || c1 == 'B') {
5574 set_input_mode(JIS_X_0208);
5576 } else if (c1 == '(') {
5578 if ((c1 = (*i_getc)(f)) == EOF) {
5579 /* don't send bogus code
5585 } else if (c1 == '@'|| c1 == 'B') {
5587 set_input_mode(JIS_X_0208);
5590 } else if (c1 == 'D'){
5591 set_input_mode(JIS_X_0212);
5593 #endif /* X0212_ENABLE */
5594 } else if (c1 == 'O' || c1 == 'Q'){
5595 set_input_mode(JIS_X_0213_1);
5597 } else if (c1 == 'P'){
5598 set_input_mode(JIS_X_0213_2);
5601 /* could be some special code */
5608 } else if (broken_f&0x2) {
5609 /* accept any ESC-(-x as broken code ... */
5610 input_mode = JIS_X_0208;
5619 } else if (c1 == '(') {
5621 if ((c1 = (*i_getc)(f)) == EOF) {
5622 /* don't send bogus code
5624 (*oconv)(0, '('); */
5627 else if (c1 == 'I') {
5628 /* JIS X 0201 Katakana */
5629 set_input_mode(JIS_X_0201_1976_K);
5632 else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
5633 /* ISO-646IRV:1983 or JIS X 0201 Roman or JUNET */
5634 set_input_mode(ASCII);
5637 else if (broken_f&0x2) {
5638 set_input_mode(ASCII);
5647 else if (c1 == '.') {
5649 if ((c1 = (*i_getc)(f)) == EOF) {
5652 else if (c1 == 'A') {
5663 else if (c1 == 'N') {
5666 if (g2 == ISO_8859_1) {
5681 } else if (c1 == ESC && iconv == s_iconv) {
5682 /* ESC in Shift_JIS */
5683 if ((c1 = (*i_getc)(f)) == EOF) {
5684 /* (*oconv)(0, ESC); don't send bogus code */
5686 } else if (c1 == '$') {
5688 if ((c1 = (*i_getc)(f)) == EOF) {
5690 } else if (('E' <= c1 && c1 <= 'G') ||
5691 ('O' <= c1 && c1 <= 'Q')) {
5699 static const nkf_char jphone_emoji_first_table[7] =
5700 {0xE1E0, 0xDFE0, 0xE2E0, 0xE3E0, 0xE4E0, 0xDFE0, 0xE0E0};
5701 c3 = nkf_char_unicode_new(jphone_emoji_first_table[c1 % 7]);
5702 if ((c1 = (*i_getc)(f)) == EOF) LAST;
5703 while (SP <= c1 && c1 <= 'z') {
5704 (*oconv)(0, c1 + c3);
5705 if ((c1 = (*i_getc)(f)) == EOF) LAST;
5720 } else if (c1 == LF || c1 == CR) {
5722 input_mode = ASCII; set_iconv(FALSE, 0);
5724 } else if (mime_decode_f && !mime_decode_mode){
5726 if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
5734 } else { /* if (c1 == CR)*/
5735 if ((c1=(*i_getc)(f))!=EOF) {
5739 } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
5759 switch ((*iconv)(c2, c1, 0)) { /* can be EUC / SJIS / UTF-8 */
5762 if ((c3 = (*i_getc)(f)) != EOF) {
5765 if ((c4 = (*i_getc)(f)) != EOF) {
5767 (*iconv)(c2, c1, c3|c4);
5772 /* 3 bytes EUC or UTF-8 */
5773 if ((c3 = (*i_getc)(f)) != EOF) {
5775 (*iconv)(c2, c1, c3);
5783 0x7F <= c2 && c2 <= 0x92 &&
5784 0x21 <= c1 && c1 <= 0x7E) {
5786 c1 = nkf_char_unicode_new((c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000);
5789 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
5793 (*oconv)(PREFIX_EUCG3 | c2, c1);
5795 #endif /* X0212_ENABLE */
5797 (*oconv)(PREFIX_EUCG3 | c2, c1);
5800 (*oconv)(input_mode, c1); /* other special case */
5806 /* goto next_word */
5810 (*iconv)(EOF, 0, 0);
5811 if (!input_codename)
5814 struct input_code *p = input_code_list;
5815 struct input_code *result = p;
5817 if (p->score < result->score) result = p;
5820 set_input_codename(result->name);
5822 debug(result->name);
5830 * int options(unsigned char *cp)
5837 options(unsigned char *cp)
5841 unsigned char *cp_back = NULL;
5846 while(*cp && *cp++!='-');
5847 while (*cp || cp_back) {
5855 case '-': /* literal options */
5856 if (!*cp || *cp == SP) { /* ignore the rest of arguments */
5860 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
5861 p = (unsigned char *)long_option[i].name;
5862 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
5863 if (*p == cp[j] || cp[j] == SP){
5870 #if !defined(PERL_XS) && !defined(WIN32DLL)
5871 fprintf(stderr, "unknown long option: --%s\n", cp);
5875 while(*cp && *cp != SP && cp++);
5876 if (long_option[i].alias[0]){
5878 cp = (unsigned char *)long_option[i].alias;
5880 if (strcmp(long_option[i].name, "ic=") == 0){
5881 enc = nkf_enc_find((char *)p);
5883 input_encoding = enc;
5886 if (strcmp(long_option[i].name, "oc=") == 0){
5887 enc = nkf_enc_find((char *)p);
5888 /* if (enc <= 0) continue; */
5890 output_encoding = enc;
5893 if (strcmp(long_option[i].name, "guess=") == 0){
5894 if (p[0] == '0' || p[0] == '1') {
5902 if (strcmp(long_option[i].name, "overwrite") == 0){
5905 preserve_time_f = TRUE;
5908 if (strcmp(long_option[i].name, "overwrite=") == 0){
5911 preserve_time_f = TRUE;
5913 backup_suffix = (char *)p;
5916 if (strcmp(long_option[i].name, "in-place") == 0){
5919 preserve_time_f = FALSE;
5922 if (strcmp(long_option[i].name, "in-place=") == 0){
5925 preserve_time_f = FALSE;
5927 backup_suffix = (char *)p;
5932 if (strcmp(long_option[i].name, "cap-input") == 0){
5936 if (strcmp(long_option[i].name, "url-input") == 0){
5941 #ifdef NUMCHAR_OPTION
5942 if (strcmp(long_option[i].name, "numchar-input") == 0){
5948 if (strcmp(long_option[i].name, "no-output") == 0){
5952 if (strcmp(long_option[i].name, "debug") == 0){
5957 if (strcmp(long_option[i].name, "cp932") == 0){
5958 #ifdef SHIFTJIS_CP932
5962 #ifdef UTF8_OUTPUT_ENABLE
5963 ms_ucs_map_f = UCS_MAP_CP932;
5967 if (strcmp(long_option[i].name, "no-cp932") == 0){
5968 #ifdef SHIFTJIS_CP932
5972 #ifdef UTF8_OUTPUT_ENABLE
5973 ms_ucs_map_f = UCS_MAP_ASCII;
5977 #ifdef SHIFTJIS_CP932
5978 if (strcmp(long_option[i].name, "cp932inv") == 0){
5985 if (strcmp(long_option[i].name, "x0212") == 0){
5992 if (strcmp(long_option[i].name, "exec-in") == 0){
5996 if (strcmp(long_option[i].name, "exec-out") == 0){
6001 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
6002 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
6003 no_cp932ext_f = TRUE;
6006 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
6007 no_best_fit_chars_f = TRUE;
6010 if (strcmp(long_option[i].name, "fb-skip") == 0){
6011 encode_fallback = NULL;
6014 if (strcmp(long_option[i].name, "fb-html") == 0){
6015 encode_fallback = encode_fallback_html;
6018 if (strcmp(long_option[i].name, "fb-xml") == 0){
6019 encode_fallback = encode_fallback_xml;
6022 if (strcmp(long_option[i].name, "fb-java") == 0){
6023 encode_fallback = encode_fallback_java;
6026 if (strcmp(long_option[i].name, "fb-perl") == 0){
6027 encode_fallback = encode_fallback_perl;
6030 if (strcmp(long_option[i].name, "fb-subchar") == 0){
6031 encode_fallback = encode_fallback_subchar;
6034 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
6035 encode_fallback = encode_fallback_subchar;
6036 unicode_subchar = 0;
6038 /* decimal number */
6039 for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
6040 unicode_subchar *= 10;
6041 unicode_subchar += hex2bin(p[i]);
6043 }else if(p[1] == 'x' || p[1] == 'X'){
6044 /* hexadecimal number */
6045 for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
6046 unicode_subchar <<= 4;
6047 unicode_subchar |= hex2bin(p[i]);
6051 for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
6052 unicode_subchar *= 8;
6053 unicode_subchar += hex2bin(p[i]);
6056 w16e_conv(unicode_subchar, &i, &j);
6057 unicode_subchar = i<<8 | j;
6061 #ifdef UTF8_OUTPUT_ENABLE
6062 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
6063 ms_ucs_map_f = UCS_MAP_MS;
6067 #ifdef UNICODE_NORMALIZATION
6068 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
6073 if (strcmp(long_option[i].name, "prefix=") == 0){
6074 if (nkf_isgraph(p[0])){
6075 for (i = 1; nkf_isgraph(p[i]); i++){
6076 prefix_table[p[i]] = p[0];
6081 #if !defined(PERL_XS) && !defined(WIN32DLL)
6082 fprintf(stderr, "unsupported long option: --%s\n", long_option[i].name);
6087 case 'b': /* buffered mode */
6090 case 'u': /* non bufferd mode */
6093 case 't': /* transparent mode */
6098 } else if (*cp=='2') {
6102 * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
6110 case 'j': /* JIS output */
6112 output_encoding = nkf_enc_from_index(ISO_2022_JP);
6114 case 'e': /* AT&T EUC output */
6115 output_encoding = nkf_enc_from_index(EUCJP_NKF);
6117 case 's': /* SJIS output */
6118 output_encoding = nkf_enc_from_index(WINDOWS_31J);
6120 case 'l': /* ISO8859 Latin-1 support, no conversion */
6121 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
6122 input_encoding = nkf_enc_from_index(ISO_8859_1);
6124 case 'i': /* Kanji IN ESC-$-@/B */
6125 if (*cp=='@'||*cp=='B')
6126 kanji_intro = *cp++;
6128 case 'o': /* ASCII IN ESC-(-J/B */
6129 if (*cp=='J'||*cp=='B'||*cp=='H')
6130 ascii_intro = *cp++;
6134 bit:1 katakana->hiragana
6135 bit:2 hiragana->katakana
6137 if ('9'>= *cp && *cp>='0')
6138 hira_f |= (*cp++ -'0');
6145 #if defined(MSDOS) || defined(__OS2__)
6152 show_configuration();
6160 #ifdef UTF8_OUTPUT_ENABLE
6161 case 'w': /* UTF-8 output */
6166 output_encoding = nkf_enc_from_index(UTF_8N);
6168 output_bom_f = TRUE;
6169 output_encoding = nkf_enc_from_index(UTF_8_BOM);
6173 if ('1'== cp[0] && '6'==cp[1]) {
6176 } else if ('3'== cp[0] && '2'==cp[1]) {
6180 output_encoding = nkf_enc_from_index(UTF_8);
6185 output_endian = ENDIAN_LITTLE;
6186 } else if (cp[0] == 'B') {
6189 output_encoding = nkf_enc_from_index(enc_idx);
6194 enc_idx = enc_idx == UTF_16
6195 ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
6196 : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
6198 output_bom_f = TRUE;
6199 enc_idx = enc_idx == UTF_16
6200 ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
6201 : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
6203 output_encoding = nkf_enc_from_index(enc_idx);
6207 #ifdef UTF8_INPUT_ENABLE
6208 case 'W': /* UTF input */
6211 input_encoding = nkf_enc_from_index(UTF_8);
6214 if ('1'== cp[0] && '6'==cp[1]) {
6216 input_endian = ENDIAN_BIG;
6218 } else if ('3'== cp[0] && '2'==cp[1]) {
6220 input_endian = ENDIAN_BIG;
6223 input_encoding = nkf_enc_from_index(UTF_8);
6228 input_endian = ENDIAN_LITTLE;
6229 } else if (cp[0] == 'B') {
6231 input_endian = ENDIAN_BIG;
6233 enc_idx = (enc_idx == UTF_16
6234 ? (input_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
6235 : (input_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE));
6236 input_encoding = nkf_enc_from_index(enc_idx);
6240 /* Input code assumption */
6241 case 'J': /* ISO-2022-JP input */
6242 input_encoding = nkf_enc_from_index(ISO_2022_JP);
6244 case 'E': /* EUC-JP input */
6245 input_encoding = nkf_enc_from_index(EUCJP_NKF);
6247 case 'S': /* Windows-31J input */
6248 input_encoding = nkf_enc_from_index(WINDOWS_31J);
6250 case 'Z': /* Convert X0208 alphabet to asii */
6252 bit:0 Convert JIS X 0208 Alphabet to ASCII
6253 bit:1 Convert Kankaku to one space
6254 bit:2 Convert Kankaku to two spaces
6255 bit:3 Convert HTML Entity
6256 bit:4 Convert JIS X 0208 Katakana to JIS X 0201 Katakana
6258 while ('0'<= *cp && *cp <='9') {
6259 alpha_f |= 1 << (*cp++ - '0');
6261 if (!alpha_f) alpha_f = 1;
6263 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
6264 x0201_f = FALSE; /* No X0201->X0208 conversion */
6266 ESC-(-I in JIS, EUC, MS Kanji
6267 SI/SO in JIS, EUC, MS Kanji
6268 SS2 in EUC, JIS, not in MS Kanji
6269 MS Kanji (0xa0-0xdf)
6271 ESC-(-I in JIS (0x20-0x5f)
6272 SS2 in EUC (0xa0-0xdf)
6273 0xa0-0xd in MS Kanji (0xa0-0xdf)
6276 case 'X': /* Convert X0201 kana to X0208 */
6279 case 'F': /* prserve new lines */
6280 fold_preserve_f = TRUE;
6281 case 'f': /* folding -f60 or -f */
6284 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
6286 fold_len += *cp++ - '0';
6288 if (!(0<fold_len && fold_len<BUFSIZ))
6289 fold_len = DEFAULT_FOLD;
6293 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
6295 fold_margin += *cp++ - '0';
6299 case 'm': /* MIME support */
6300 /* mime_decode_f = TRUE; */ /* this has too large side effects... */
6301 if (*cp=='B'||*cp=='Q') {
6302 mime_decode_mode = *cp++;
6303 mimebuf_f = FIXED_MIME;
6304 } else if (*cp=='N') {
6305 mime_f = TRUE; cp++;
6306 } else if (*cp=='S') {
6307 mime_f = STRICT_MIME; cp++;
6308 } else if (*cp=='0') {
6309 mime_decode_f = FALSE;
6310 mime_f = FALSE; cp++;
6312 mime_f = STRICT_MIME;
6315 case 'M': /* MIME output */
6318 mimeout_f = FIXED_MIME; cp++;
6319 } else if (*cp=='Q') {
6321 mimeout_f = FIXED_MIME; cp++;
6326 case 'B': /* Broken JIS support */
6328 bit:1 allow any x on ESC-(-x or ESC-$-x
6329 bit:2 reset to ascii on NL
6331 if ('9'>= *cp && *cp>='0')
6332 broken_f |= 1<<(*cp++ -'0');
6337 case 'O':/* for Output file */
6341 case 'c':/* add cr code */
6344 case 'd':/* delete cr code */
6347 case 'I': /* ISO-2022-JP output */
6350 case 'L': /* line mode */
6351 if (*cp=='u') { /* unix */
6352 eolmode_f = LF; cp++;
6353 } else if (*cp=='m') { /* mac */
6354 eolmode_f = CR; cp++;
6355 } else if (*cp=='w') { /* windows */
6356 eolmode_f = CRLF; cp++;
6357 } else if (*cp=='0') { /* no conversion */
6358 eolmode_f = 0; cp++;
6363 if ('2' <= *cp && *cp <= '9') {
6366 } else if (*cp == '0' || *cp == '1') {
6375 /* module muliple options in a string are allowed for Perl moudle */
6376 while(*cp && *cp++!='-');
6379 #if !defined(PERL_XS) && !defined(WIN32DLL)
6380 fprintf(stderr, "unknown option: -%c\n", *(cp-1));
6382 /* bogus option but ignored */
6390 #include "nkf32dll.c"
6391 #elif defined(PERL_XS)
6392 #else /* WIN32DLL */
6394 main(int argc, char **argv)
6399 char *outfname = NULL;
6402 #ifdef EASYWIN /*Easy Win */
6403 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
6405 #ifdef DEFAULT_CODE_LOCALE
6406 setlocale(LC_CTYPE, "");
6408 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
6409 cp = (unsigned char *)*argv;
6414 if (pipe(fds) < 0 || (pid = fork()) < 0){
6425 execvp(argv[1], &argv[1]);
6442 int debug_f_back = debug_f;
6445 int exec_f_back = exec_f;
6448 int x0212_f_back = x0212_f;
6450 int x0213_f_back = x0213_f;
6451 int guess_f_back = guess_f;
6453 guess_f = guess_f_back;
6456 debug_f = debug_f_back;
6459 exec_f = exec_f_back;
6461 x0212_f = x0212_f_back;
6462 x0213_f = x0213_f_back;
6465 if (binmode_f == TRUE)
6466 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6467 if (freopen("","wb",stdout) == NULL)
6474 setbuf(stdout, (char *) NULL);
6476 setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
6479 if (binmode_f == TRUE)
6480 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6481 if (freopen("","rb",stdin) == NULL) return (-1);
6485 setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
6489 kanji_convert(stdin);
6490 if (guess_f) print_guessed_code(NULL);
6494 int is_argument_error = FALSE;
6496 input_codename = NULL;
6499 iconv_for_check = 0;
6501 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
6503 is_argument_error = TRUE;
6511 /* reopen file for stdout */
6512 if (file_out_f == TRUE) {
6515 outfname = nkf_xmalloc(strlen(origfname)
6516 + strlen(".nkftmpXXXXXX")
6518 strcpy(outfname, origfname);
6522 for (i = strlen(outfname); i; --i){
6523 if (outfname[i - 1] == '/'
6524 || outfname[i - 1] == '\\'){
6530 strcat(outfname, "ntXXXXXX");
6532 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
6533 S_IREAD | S_IWRITE);
6535 strcat(outfname, ".nkftmpXXXXXX");
6536 fd = mkstemp(outfname);
6539 || (fd_backup = dup(fileno(stdout))) < 0
6540 || dup2(fd, fileno(stdout)) < 0
6551 outfname = "nkf.out";
6554 if(freopen(outfname, "w", stdout) == NULL) {
6558 if (binmode_f == TRUE) {
6559 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6560 if (freopen("","wb",stdout) == NULL)
6567 if (binmode_f == TRUE)
6568 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6569 if (freopen("","rb",fin) == NULL)
6574 setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
6578 char *filename = NULL;
6580 if (nfiles > 1) filename = origfname;
6581 if (guess_f) print_guessed_code(filename);
6587 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
6595 if (dup2(fd_backup, fileno(stdout)) < 0){
6598 if (stat(origfname, &sb)) {
6599 fprintf(stderr, "Can't stat %s\n", origfname);
6601 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
6602 if (chmod(outfname, sb.st_mode)) {
6603 fprintf(stderr, "Can't set permission %s\n", outfname);
6606 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
6607 if(preserve_time_f){
6608 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
6609 tb[0] = tb[1] = sb.st_mtime;
6610 if (utime(outfname, tb)) {
6611 fprintf(stderr, "Can't set timestamp %s\n", outfname);
6614 tb.actime = sb.st_atime;
6615 tb.modtime = sb.st_mtime;
6616 if (utime(outfname, &tb)) {
6617 fprintf(stderr, "Can't set timestamp %s\n", outfname);
6622 char *backup_filename = get_backup_filename(backup_suffix, origfname);
6624 unlink(backup_filename);
6626 if (rename(origfname, backup_filename)) {
6627 perror(backup_filename);
6628 fprintf(stderr, "Can't rename %s to %s\n",
6629 origfname, backup_filename);
6631 nkf_xfree(backup_filename);
6634 if (unlink(origfname)){
6639 if (rename(outfname, origfname)) {
6641 fprintf(stderr, "Can't rename %s to %s\n",
6642 outfname, origfname);
6644 nkf_xfree(outfname);
6649 if (is_argument_error)
6652 #ifdef EASYWIN /*Easy Win */
6653 if (file_out_f == FALSE)
6654 scanf("%d",&end_check);
6657 #else /* for Other OS */
6658 if (file_out_f == TRUE)
6660 #endif /*Easy Win */
6663 #endif /* WIN32DLL */