1 /** Network Kanji Filter. (PDS Version)
2 ** -*- coding: ISO-2022-JP -*-
3 ************************************************************************
4 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
5 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
6 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
7 ** Copyright (C) 1996,1998
9 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
10 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
11 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
12 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
14 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
15 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
16 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
17 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
18 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
19 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
21 ** Everyone is permitted to do anything on this program
22 ** including copying, modifying, improving,
23 ** as long as you don't try to pretend that you wrote it.
24 ** i.e., the above copyright notice has to appear in all copies.
25 ** Binary distribution requires original version messages.
26 ** You don't have to ask before copying, redistribution or publishing.
27 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
28 ***********************************************************************/
30 /***********************************************************************
31 *
\e$B8=:_!"
\e(Bnkf
\e$B$O
\e(B SorceForge
\e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#
\e(B
32 * http://sourceforge.jp/projects/nkf/
33 ***********************************************************************/
34 #define NKF_IDENT "$Id: nkf.c,v 1.192 2008/11/09 23:09:22 naruse Exp $"
35 #define NKF_VERSION "2.0.8"
36 #define NKF_RELEASE_DATE "2008-12-25"
38 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
39 "Copyright (C) 2002-2008 Kono, Furukawa, Naruse, mastodon"
50 # define INCL_DOSERRORS
55 #define nkf_debug(fmt, ...) \
56 fprintf(stderr, "%s(%s)%d: " fmt "\n", __FILE__, __func__, __LINE__, __VA_ARGS__)
59 /* state of output_mode and input_mode
138 NKF_ENCODING_TABLE_SIZE,
139 JIS_X_0201_1976_K = 0x1013, /* I */ /* JIS C 6220-1969 */
140 /* JIS_X_0201_1976_R = 0x1014, */ /* J */ /* JIS C 6220-1969 */
141 /* JIS_X_0208_1978 = 0x1040, */ /* @ */ /* JIS C 6226-1978 */
142 /* JIS_X_0208_1983 = 0x1087, */ /* B */ /* JIS C 6226-1983 */
143 JIS_X_0208 = 0x1168, /* @B */
144 JIS_X_0212 = 0x1159, /* D */
145 /* JIS_X_0213_2000_1 = 0x1228, */ /* O */
146 JIS_X_0213_2 = 0x1229, /* P */
147 JIS_X_0213_1 = 0x1233, /* Q */
150 static nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
151 static nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
152 static nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
153 static nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
154 static nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
155 static void j_oconv(nkf_char c2, nkf_char c1);
156 static void s_oconv(nkf_char c2, nkf_char c1);
157 static void e_oconv(nkf_char c2, nkf_char c1);
158 static void w_oconv(nkf_char c2, nkf_char c1);
159 static void w_oconv16(nkf_char c2, nkf_char c1);
160 static void w_oconv32(nkf_char c2, nkf_char c1);
164 nkf_char (*iconv)(nkf_char c2, nkf_char c1, nkf_char c0);
165 void (*oconv)(nkf_char c2, nkf_char c1);
166 } nkf_native_encoding;
168 nkf_native_encoding NkfEncodingASCII = { "ASCII", e_iconv, e_oconv };
169 nkf_native_encoding NkfEncodingISO_2022_JP = { "ISO-2022-JP", e_iconv, j_oconv };
170 nkf_native_encoding NkfEncodingShift_JIS = { "Shift_JIS", s_iconv, s_oconv };
171 nkf_native_encoding NkfEncodingEUC_JP = { "EUC-JP", e_iconv, e_oconv };
172 nkf_native_encoding NkfEncodingUTF_8 = { "UTF-8", w_iconv, w_oconv };
173 nkf_native_encoding NkfEncodingUTF_16 = { "UTF-16", w_iconv16, w_oconv16 };
174 nkf_native_encoding NkfEncodingUTF_32 = { "UTF-32", w_iconv32, w_oconv32 };
179 const nkf_native_encoding *base_encoding;
182 nkf_encoding nkf_encoding_table[] = {
183 {ASCII, "US-ASCII", &NkfEncodingASCII},
184 {ISO_8859_1, "ISO-8859-1", &NkfEncodingASCII},
185 {ISO_2022_JP, "ISO-2022-JP", &NkfEncodingISO_2022_JP},
186 {CP50220, "CP50220", &NkfEncodingISO_2022_JP},
187 {CP50221, "CP50221", &NkfEncodingISO_2022_JP},
188 {CP50222, "CP50222", &NkfEncodingISO_2022_JP},
189 {ISO_2022_JP_1, "ISO-2022-JP-1", &NkfEncodingISO_2022_JP},
190 {ISO_2022_JP_3, "ISO-2022-JP-3", &NkfEncodingISO_2022_JP},
191 {ISO_2022_JP_2004, "ISO-2022-JP-2004", &NkfEncodingISO_2022_JP},
192 {SHIFT_JIS, "Shift_JIS", &NkfEncodingShift_JIS},
193 {WINDOWS_31J, "Windows-31J", &NkfEncodingShift_JIS},
194 {CP10001, "CP10001", &NkfEncodingShift_JIS},
195 {EUC_JP, "EUC-JP", &NkfEncodingEUC_JP},
196 {EUCJP_NKF, "eucJP-nkf", &NkfEncodingEUC_JP},
197 {CP51932, "CP51932", &NkfEncodingEUC_JP},
198 {EUCJP_MS, "eucJP-MS", &NkfEncodingEUC_JP},
199 {EUCJP_ASCII, "eucJP-ASCII", &NkfEncodingEUC_JP},
200 {SHIFT_JISX0213, "Shift_JISX0213", &NkfEncodingShift_JIS},
201 {SHIFT_JIS_2004, "Shift_JIS-2004", &NkfEncodingShift_JIS},
202 {EUC_JISX0213, "EUC-JISX0213", &NkfEncodingEUC_JP},
203 {EUC_JIS_2004, "EUC-JIS-2004", &NkfEncodingEUC_JP},
204 {UTF_8, "UTF-8", &NkfEncodingUTF_8},
205 {UTF_8N, "UTF-8N", &NkfEncodingUTF_8},
206 {UTF_8_BOM, "UTF-8-BOM", &NkfEncodingUTF_8},
207 {UTF8_MAC, "UTF8-MAC", &NkfEncodingUTF_8},
208 {UTF_16, "UTF-16", &NkfEncodingUTF_16},
209 {UTF_16BE, "UTF-16BE", &NkfEncodingUTF_16},
210 {UTF_16BE_BOM, "UTF-16BE-BOM", &NkfEncodingUTF_16},
211 {UTF_16LE, "UTF-16LE", &NkfEncodingUTF_16},
212 {UTF_16LE_BOM, "UTF-16LE-BOM", &NkfEncodingUTF_16},
213 {UTF_32, "UTF-32", &NkfEncodingUTF_32},
214 {UTF_32BE, "UTF-32BE", &NkfEncodingUTF_32},
215 {UTF_32BE_BOM, "UTF-32BE-BOM", &NkfEncodingUTF_32},
216 {UTF_32LE, "UTF-32LE", &NkfEncodingUTF_32},
217 {UTF_32LE_BOM, "UTF-32LE-BOM", &NkfEncodingUTF_32},
218 {BINARY, "BINARY", &NkfEncodingASCII},
225 } encoding_name_to_id_table[] = {
228 {"ISO-2022-JP", ISO_2022_JP},
229 {"ISO2022JP-CP932", CP50220},
230 {"CP50220", CP50220},
231 {"CP50221", CP50221},
232 {"CSISO2022JP", CP50221},
233 {"CP50222", CP50222},
234 {"ISO-2022-JP-1", ISO_2022_JP_1},
235 {"ISO-2022-JP-3", ISO_2022_JP_3},
236 {"ISO-2022-JP-2004", ISO_2022_JP_2004},
237 {"SHIFT_JIS", SHIFT_JIS},
239 {"WINDOWS-31J", WINDOWS_31J},
240 {"CSWINDOWS31J", WINDOWS_31J},
241 {"CP932", WINDOWS_31J},
242 {"MS932", WINDOWS_31J},
243 {"CP10001", CP10001},
246 {"EUCJP-NKF", EUCJP_NKF},
247 {"CP51932", CP51932},
248 {"EUC-JP-MS", EUCJP_MS},
249 {"EUCJP-MS", EUCJP_MS},
250 {"EUCJPMS", EUCJP_MS},
251 {"EUC-JP-ASCII", EUCJP_ASCII},
252 {"EUCJP-ASCII", EUCJP_ASCII},
253 {"SHIFT_JISX0213", SHIFT_JISX0213},
254 {"SHIFT_JIS-2004", SHIFT_JIS_2004},
255 {"EUC-JISX0213", EUC_JISX0213},
256 {"EUC-JIS-2004", EUC_JIS_2004},
259 {"UTF-8-BOM", UTF_8_BOM},
260 {"UTF8-MAC", UTF8_MAC},
261 {"UTF-8-MAC", UTF8_MAC},
263 {"UTF-16BE", UTF_16BE},
264 {"UTF-16BE-BOM", UTF_16BE_BOM},
265 {"UTF-16LE", UTF_16LE},
266 {"UTF-16LE-BOM", UTF_16LE_BOM},
268 {"UTF-32BE", UTF_32BE},
269 {"UTF-32BE-BOM", UTF_32BE_BOM},
270 {"UTF-32LE", UTF_32LE},
271 {"UTF-32LE-BOM", UTF_32LE_BOM},
276 #if defined(DEFAULT_CODE_JIS)
277 #define DEFAULT_ENCIDX ISO_2022_JP
278 #elif defined(DEFAULT_CODE_SJIS)
279 #define DEFAULT_ENCIDX SHIFT_JIS
280 #elif defined(DEFAULT_CODE_WINDOWS_31J)
281 #define DEFAULT_ENCIDX WINDOWS_31J
282 #elif defined(DEFAULT_CODE_EUC)
283 #define DEFAULT_ENCIDX EUC_JP
284 #elif defined(DEFAULT_CODE_UTF8)
285 #define DEFAULT_ENCIDX UTF_8
289 #define is_alnum(c) \
290 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
292 /* I don't trust portablity of toupper */
293 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
294 #define nkf_isoctal(c) ('0'<=c && c<='7')
295 #define nkf_isdigit(c) ('0'<=c && c<='9')
296 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
297 #define nkf_isblank(c) (c == SP || c == TAB)
298 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
299 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
300 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
301 #define nkf_isprint(c) (SP<=c && c<='~')
302 #define nkf_isgraph(c) ('!'<=c && c<='~')
303 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
304 ('A'<=c&&c<='F') ? (c-'A'+10) : \
305 ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
306 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
307 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
308 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
309 ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
310 && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
312 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
313 #define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c < (0xE0&0x7F))
315 #define HOLD_SIZE 1024
316 #if defined(INT_IS_SHORT)
317 #define IOBUF_SIZE 2048
319 #define IOBUF_SIZE 16384
322 #define DEFAULT_J 'B'
323 #define DEFAULT_R 'B'
330 /* MIME preprocessor */
332 #ifdef EASYWIN /*Easy Win */
333 extern POINT _BufferSize;
342 void (*status_func)(struct input_code *, nkf_char);
343 nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
347 static const char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
348 static nkf_encoding *input_encoding = NULL;
349 static nkf_encoding *output_encoding = NULL;
351 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
353 * 0: Shift_JIS, eucJP-ascii
358 #define UCS_MAP_ASCII 0
360 #define UCS_MAP_CP932 2
361 #define UCS_MAP_CP10001 3
362 static int ms_ucs_map_f = UCS_MAP_ASCII;
364 #ifdef UTF8_INPUT_ENABLE
365 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
366 static int no_cp932ext_f = FALSE;
367 /* ignore ZERO WIDTH NO-BREAK SPACE */
368 static int no_best_fit_chars_f = FALSE;
369 static int input_endian = ENDIAN_BIG;
370 static nkf_char unicode_subchar = '?'; /* the regular substitution character */
371 static void (*encode_fallback)(nkf_char c) = NULL;
372 static void w_status(struct input_code *, nkf_char);
374 #ifdef UTF8_OUTPUT_ENABLE
375 static int output_bom_f = FALSE;
376 static int output_endian = ENDIAN_BIG;
379 static void std_putc(nkf_char c);
380 static nkf_char std_getc(FILE *f);
381 static nkf_char std_ungetc(nkf_char c,FILE *f);
383 static nkf_char broken_getc(FILE *f);
384 static nkf_char broken_ungetc(nkf_char c,FILE *f);
386 static nkf_char mime_getc(FILE *f);
388 static void mime_putc(nkf_char c);
392 #if !defined(PERL_XS) && !defined(WIN32DLL)
393 static unsigned char stdibuf[IOBUF_SIZE];
394 static unsigned char stdobuf[IOBUF_SIZE];
398 static int unbuf_f = FALSE;
399 static int estab_f = FALSE;
400 static int nop_f = FALSE;
401 static int binmode_f = TRUE; /* binary mode */
402 static int rot_f = FALSE; /* rot14/43 mode */
403 static int hira_f = FALSE; /* hira/kata henkan */
404 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
405 static int mime_f = MIME_DECODE_DEFAULT; /* convert MIME B base64 or Q */
406 static int mime_decode_f = FALSE; /* mime decode is explicitly on */
407 static int mimebuf_f = FALSE; /* MIME buffered input */
408 static int broken_f = FALSE; /* convert ESC-less broken JIS */
409 static int iso8859_f = FALSE; /* ISO8859 through */
410 static int mimeout_f = FALSE; /* base64 mode */
411 static int x0201_f = X0201_DEFAULT; /* convert JIS X 0201 */
412 static int iso2022jp_f = FALSE; /* replace non ISO-2022-JP with GETA */
414 #ifdef UNICODE_NORMALIZATION
415 static int nfc_f = FALSE;
416 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
417 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
421 static int cap_f = FALSE;
422 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
423 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
425 static int url_f = FALSE;
426 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
427 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
430 #define PREFIX_EUCG3 NKF_INT32_C(0x8F00)
431 #define CLASS_MASK NKF_INT32_C(0xFF000000)
432 #define CLASS_UNICODE NKF_INT32_C(0x01000000)
433 #define VALUE_MASK NKF_INT32_C(0x00FFFFFF)
434 #define UNICODE_BMP_MAX NKF_INT32_C(0x0000FFFF)
435 #define UNICODE_MAX NKF_INT32_C(0x0010FFFF)
436 #define nkf_char_euc3_new(c) ((c) | PREFIX_EUCG3)
437 #define nkf_char_unicode_new(c) ((c) | CLASS_UNICODE)
438 #define nkf_char_unicode_p(c) ((c & CLASS_MASK) == CLASS_UNICODE)
439 #define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= UNICODE_BMP_MAX)
440 #define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= UNICODE_MAX)
442 #ifdef NUMCHAR_OPTION
443 static int numchar_f = FALSE;
444 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
445 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
449 static int noout_f = FALSE;
450 static void no_putc(nkf_char c);
451 static int debug_f = FALSE;
452 static void debug(const char *str);
453 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
456 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
457 static void set_input_codename(const char *codename);
460 static int exec_f = 0;
463 #ifdef SHIFTJIS_CP932
464 /* invert IBM extended characters to others */
465 static int cp51932_f = FALSE;
467 /* invert NEC-selected IBM extended characters to IBM extended characters */
468 static int cp932inv_f = TRUE;
470 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
471 #endif /* SHIFTJIS_CP932 */
473 static int x0212_f = FALSE;
474 static int x0213_f = FALSE;
476 static unsigned char prefix_table[256];
478 static void e_status(struct input_code *, nkf_char);
479 static void s_status(struct input_code *, nkf_char);
481 struct input_code input_code_list[] = {
482 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
483 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
484 #ifdef UTF8_INPUT_ENABLE
485 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
490 static int mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
491 static int base64_count = 0;
493 /* X0208 -> ASCII converter */
496 static int f_line = 0; /* chars in line */
497 static int f_prev = 0;
498 static int fold_preserve_f = FALSE; /* preserve new lines */
499 static int fold_f = FALSE;
500 static int fold_len = 0;
503 static unsigned char kanji_intro = DEFAULT_J;
504 static unsigned char ascii_intro = DEFAULT_R;
508 #define FOLD_MARGIN 10
509 #define DEFAULT_FOLD 60
511 static int fold_margin = FOLD_MARGIN;
513 /* process default */
516 no_connection2(nkf_char c2, nkf_char c1, nkf_char c0)
518 fprintf(stderr,"nkf internal module connection failure.\n");
524 no_connection(nkf_char c2, nkf_char c1)
526 no_connection2(c2,c1,0);
529 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
530 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
532 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
533 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
534 static void (*o_eol_conv)(nkf_char c2,nkf_char c1) = no_connection;
535 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
536 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
537 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
538 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
540 /* static redirections */
542 static void (*o_putc)(nkf_char c) = std_putc;
544 static nkf_char (*i_getc)(FILE *f) = std_getc; /* general input */
545 static nkf_char (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
547 static nkf_char (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
548 static nkf_char (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
550 static void (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
552 static nkf_char (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
553 static nkf_char (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
555 /* for strict mime */
556 static nkf_char (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
557 static nkf_char (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
560 static int output_mode = ASCII; /* output kanji mode */
561 static int input_mode = ASCII; /* input kanji mode */
562 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
564 /* X0201 / X0208 conversion tables */
566 /* X0201 kana conversion table */
568 static const unsigned char cv[]= {
569 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
570 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
571 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
572 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
573 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
574 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
575 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
576 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
577 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
578 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
579 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
580 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
581 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
582 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
583 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
584 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
588 /* X0201 kana conversion table for daguten */
590 static const unsigned char dv[]= {
591 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
592 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
593 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
594 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
595 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
596 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
597 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
598 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
599 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
600 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
601 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
602 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
603 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
604 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
605 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
606 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609 /* X0201 kana conversion table for han-daguten */
611 static const unsigned char ev[]= {
612 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
614 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
615 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
616 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
617 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
618 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
619 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
620 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
621 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
622 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
623 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
624 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
625 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
626 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
627 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
631 /* X0208 kigou conversion table */
632 /* 0x8140 - 0x819e */
633 static const unsigned char fv[] = {
635 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
636 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
637 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
638 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
639 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
640 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
641 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
642 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
643 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
644 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
645 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
646 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
651 static int option_mode = 0;
652 static int file_out_f = FALSE;
654 static int overwrite_f = FALSE;
655 static int preserve_time_f = FALSE;
656 static int backup_f = FALSE;
657 static char *backup_suffix = "";
660 static int eolmode_f = 0; /* CR, LF, CRLF */
661 static int input_eol = 0; /* 0: unestablished, EOF: MIXED */
662 static nkf_char prev_cr = 0; /* CR or 0 */
663 #ifdef EASYWIN /*Easy Win */
664 static int end_check;
667 #define STD_GC_BUFSIZE (256)
668 nkf_char std_gc_buf[STD_GC_BUFSIZE];
672 nkf_malloc(size_t size)
676 if (size == 0) size = 1;
680 perror("can't malloc");
688 nkf_realloc(void *ptr, size_t size)
690 if (size == 0) size = 1;
692 ptr = realloc(ptr, size);
694 perror("can't realloc");
701 #define nkf_free(ptr) free(ptr)
704 nkf_str_caseeql(const char *src, const char *target)
707 for (i = 0; src[i] && target[i]; i++) {
708 if (nkf_toupper(src[i]) != nkf_toupper(target[i])) return FALSE;
710 if (src[i] || target[i]) return FALSE;
715 nkf_enc_from_index(int idx)
717 if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
720 return &nkf_encoding_table[idx];
724 nkf_enc_find_index(const char *name)
727 if (name[0] == 'X' && *(name+1) == '-') name += 2;
728 for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
729 if (nkf_str_caseeql(encoding_name_to_id_table[i].name, name)) {
730 return encoding_name_to_id_table[i].id;
737 nkf_enc_find(const char *name)
740 idx = nkf_enc_find_index(name);
741 if (idx < 0) return 0;
742 return nkf_enc_from_index(idx);
745 #define nkf_enc_name(enc) (enc)->name
746 #define nkf_enc_to_index(enc) (enc)->id
747 #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
748 #define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
749 #define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
750 #define nkf_enc_asciicompat(enc) (\
751 nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
752 nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
753 #define nkf_enc_unicode_p(enc) (\
754 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
755 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
756 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
757 #define nkf_enc_cp5022x_p(enc) (\
758 nkf_enc_to_index(enc) == CP50220 ||\
759 nkf_enc_to_index(enc) == CP50221 ||\
760 nkf_enc_to_index(enc) == CP50222)
762 #ifdef DEFAULT_CODE_LOCALE
766 #ifdef HAVE_LANGINFO_H
767 return nl_langinfo(CODESET);
768 #elif defined(__WIN32__)
770 sprintf(buf, "CP%d", GetACP());
772 #elif defined(__OS2__)
773 # if defined(INT_IS_SHORT)
779 ULONG ulCP[1], ulncp;
780 DosQueryCp(sizeof(ulCP), ulCP, &ulncp);
781 if (ulCP[0] == 932 || ulCP[0] == 943)
782 strcpy(buf, "Shift_JIS");
784 sprintf(buf, "CP%lu", ulCP[0]);
792 nkf_locale_encoding()
794 nkf_encoding *enc = 0;
795 const char *encname = nkf_locale_charmap();
797 enc = nkf_enc_find(encname);
800 #endif /* DEFAULT_CODE_LOCALE */
805 return &nkf_encoding_table[UTF_8];
809 nkf_default_encoding()
811 nkf_encoding *enc = 0;
812 #ifdef DEFAULT_CODE_LOCALE
813 enc = nkf_locale_encoding();
814 #elif defined(DEFAULT_ENCIDX)
815 enc = nkf_enc_from_index(DEFAULT_ENCIDX);
817 if (!enc) enc = nkf_utf8_encoding();
823 #define fprintf dllprintf
829 fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n");
836 "USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n"
838 "b,u Output is buffered (DEFAULT),Output is unbuffered\n"
839 "j,s,e,w Output code is ISO-2022-JP, Shift JIS, EUC-JP, UTF-8N\n"
840 #ifdef UTF8_OUTPUT_ENABLE
841 " After 'w' you can add more options. -w[ 8 [0], 16 [[BL] [0]] ]\n"
843 "J,S,E,W Input assumption is JIS 7 bit , Shift JIS, EUC-JP, UTF-8\n"
844 #ifdef UTF8_INPUT_ENABLE
845 " After 'W' you can add more options. -W[ 8, 16 [BL] ] \n"
848 "i[@B] Specify the Esc Seq for JIS X 0208-1978/83 (DEFAULT B)\n"
849 "o[BJH] Specify the Esc Seq for ASCII/Roman (DEFAULT B)\n"
850 "r {de/en}crypt ROT13/47\n"
851 "h 1 katakana->hiragana, 2 hiragana->katakana, 3 both\n"
852 "m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:non-strict,0:no decode]\n"
853 "M[BQ] MIME encode [B:base64 Q:quoted]\n"
854 "l ISO8859-1 (Latin-1) support\n"
855 "f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
856 "Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n"
857 " 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n"
858 " 4: JISX0208 Katakana to JISX0201 Katakana\n"
859 "X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n"
860 "B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n"
862 "T Text mode output\n"
864 "O Output to File (DEFAULT 'nkf.out')\n"
865 "I Convert non ISO-2022-JP charactor to GETA\n"
866 "d,c Convert line breaks -d: LF -c: CRLF\n"
867 "-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
868 "v, V Show this usage. V: show configuration\n"
870 "Long name options\n"
871 " --ic=<input codeset> --oc=<output codeset>\n"
872 " Specify the input or output codeset\n"
873 " --fj --unix --mac --windows\n"
874 " --jis --euc --sjis --utf8 --utf16 --mime --base64\n"
875 " Convert for the system or code\n"
876 " --hiragana --katakana --katakana-hiragana\n"
877 " To Hiragana/Katakana Conversion\n"
878 " --prefix= Insert escape before troublesome characters of Shift_JIS\n"
880 " --cap-input, --url-input Convert hex after ':' or '%%'\n"
882 #ifdef NUMCHAR_OPTION
883 " --numchar-input Convert Unicode Character Reference\n"
885 #ifdef UTF8_INPUT_ENABLE
886 " --fb-{skip, html, xml, perl, java, subchar}\n"
887 " Specify how nkf handles unassigned characters\n"
890 " --in-place[=SUFFIX] --overwrite[=SUFFIX]\n"
891 " Overwrite original listed files by filtered result\n"
892 " --overwrite preserves timestamp of original files\n"
894 " -g --guess Guess the input code\n"
895 " --help --version Show this help/the version\n"
896 " For more information, see also man nkf\n"
902 show_configuration(void)
905 "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n"
908 " Compile-time options:\n"
909 " Compiled at: " __DATE__ " " __TIME__ "\n"
912 " Default output encoding: "
913 #ifdef DEFAULT_CODE_LOCALE
914 "LOCALE (%s)\n", nkf_enc_name(nkf_default_encoding())
915 #elif defined(DEFAULT_ENCIDX)
916 "CONFIG (%s)\n", nkf_enc_name(nkf_default_encoding())
922 " Default output end of line: "
923 #if DEFAULT_NEWLINE == CR
925 #elif DEFAULT_NEWLINE == CRLF
931 " Decode MIME encoded string: "
932 #if MIME_DECODE_DEFAULT
938 " Convert JIS X 0201 Katakana: "
945 " --help, --version output: "
946 #if HELP_OUTPUT_HELP_OUTPUT
957 get_backup_filename(const char *suffix, const char *filename)
959 char *backup_filename;
960 int asterisk_count = 0;
962 int filename_length = strlen(filename);
964 for(i = 0; suffix[i]; i++){
965 if(suffix[i] == '*') asterisk_count++;
969 backup_filename = nkf_malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
970 for(i = 0, j = 0; suffix[i];){
971 if(suffix[i] == '*'){
972 backup_filename[j] = '\0';
973 strncat(backup_filename, filename, filename_length);
975 j += filename_length;
977 backup_filename[j++] = suffix[i++];
980 backup_filename[j] = '\0';
982 j = filename_length + strlen(suffix);
983 backup_filename = nkf_malloc(j + 1);
984 strcpy(backup_filename, filename);
985 strcat(backup_filename, suffix);
986 backup_filename[j] = '\0';
988 return backup_filename;
992 #ifdef UTF8_INPUT_ENABLE
994 nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
1001 (*f)(0, bin2hex(c>>shift));
1012 encode_fallback_html(nkf_char c)
1017 if(c >= NKF_INT32_C(1000000))
1018 (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
1019 if(c >= NKF_INT32_C(100000))
1020 (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
1022 (*oconv)(0, 0x30+(c/10000 )%10);
1024 (*oconv)(0, 0x30+(c/1000 )%10);
1026 (*oconv)(0, 0x30+(c/100 )%10);
1028 (*oconv)(0, 0x30+(c/10 )%10);
1030 (*oconv)(0, 0x30+ c %10);
1036 encode_fallback_xml(nkf_char c)
1041 nkf_each_char_to_hex(oconv, c);
1047 encode_fallback_java(nkf_char c)
1051 if(!nkf_char_unicode_bmp_p(c)){
1055 (*oconv)(0, bin2hex(c>>20));
1056 (*oconv)(0, bin2hex(c>>16));
1060 (*oconv)(0, bin2hex(c>>12));
1061 (*oconv)(0, bin2hex(c>> 8));
1062 (*oconv)(0, bin2hex(c>> 4));
1063 (*oconv)(0, bin2hex(c ));
1068 encode_fallback_perl(nkf_char c)
1073 nkf_each_char_to_hex(oconv, c);
1079 encode_fallback_subchar(nkf_char c)
1081 c = unicode_subchar;
1082 (*oconv)((c>>8)&0xFF, c&0xFF);
1087 static const struct {
1111 {"katakana-hiragana","h3"},
1119 #ifdef UTF8_OUTPUT_ENABLE
1129 {"fb-subchar=", ""},
1131 #ifdef UTF8_INPUT_ENABLE
1132 {"utf8-input", "W"},
1133 {"utf16-input", "W16"},
1134 {"no-cp932ext", ""},
1135 {"no-best-fit-chars",""},
1137 #ifdef UNICODE_NORMALIZATION
1138 {"utf8mac-input", ""},
1150 #ifdef NUMCHAR_OPTION
1151 {"numchar-input", ""},
1157 #ifdef SHIFTJIS_CP932
1168 set_input_encoding(nkf_encoding *enc)
1170 switch (nkf_enc_to_index(enc)) {
1177 #ifdef SHIFTJIS_CP932
1180 #ifdef UTF8_OUTPUT_ENABLE
1181 ms_ucs_map_f = UCS_MAP_CP932;
1191 case ISO_2022_JP_2004:
1198 #ifdef SHIFTJIS_CP932
1201 #ifdef UTF8_OUTPUT_ENABLE
1202 ms_ucs_map_f = UCS_MAP_CP932;
1207 #ifdef SHIFTJIS_CP932
1210 #ifdef UTF8_OUTPUT_ENABLE
1211 ms_ucs_map_f = UCS_MAP_CP10001;
1219 #ifdef SHIFTJIS_CP932
1222 #ifdef UTF8_OUTPUT_ENABLE
1223 ms_ucs_map_f = UCS_MAP_CP932;
1227 #ifdef SHIFTJIS_CP932
1230 #ifdef UTF8_OUTPUT_ENABLE
1231 ms_ucs_map_f = UCS_MAP_MS;
1235 #ifdef SHIFTJIS_CP932
1238 #ifdef UTF8_OUTPUT_ENABLE
1239 ms_ucs_map_f = UCS_MAP_ASCII;
1242 case SHIFT_JISX0213:
1243 case SHIFT_JIS_2004:
1245 #ifdef SHIFTJIS_CP932
1252 #ifdef SHIFTJIS_CP932
1256 #ifdef UTF8_INPUT_ENABLE
1257 #ifdef UNICODE_NORMALIZATION
1265 input_endian = ENDIAN_BIG;
1269 input_endian = ENDIAN_LITTLE;
1274 input_endian = ENDIAN_BIG;
1278 input_endian = ENDIAN_LITTLE;
1285 set_output_encoding(nkf_encoding *enc)
1287 switch (nkf_enc_to_index(enc)) {
1290 #ifdef SHIFTJIS_CP932
1291 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1293 #ifdef UTF8_OUTPUT_ENABLE
1294 ms_ucs_map_f = UCS_MAP_CP932;
1298 #ifdef SHIFTJIS_CP932
1299 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1301 #ifdef UTF8_OUTPUT_ENABLE
1302 ms_ucs_map_f = UCS_MAP_CP932;
1307 #ifdef SHIFTJIS_CP932
1308 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1314 #ifdef SHIFTJIS_CP932
1315 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1321 #ifdef UTF8_OUTPUT_ENABLE
1322 ms_ucs_map_f = UCS_MAP_CP932;
1326 #ifdef UTF8_OUTPUT_ENABLE
1327 ms_ucs_map_f = UCS_MAP_CP10001;
1332 #ifdef SHIFTJIS_CP932
1333 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1335 #ifdef UTF8_OUTPUT_ENABLE
1336 ms_ucs_map_f = UCS_MAP_ASCII;
1341 #ifdef SHIFTJIS_CP932
1342 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1344 #ifdef UTF8_OUTPUT_ENABLE
1345 ms_ucs_map_f = UCS_MAP_ASCII;
1349 #ifdef SHIFTJIS_CP932
1350 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1352 #ifdef UTF8_OUTPUT_ENABLE
1353 ms_ucs_map_f = UCS_MAP_CP932;
1358 #ifdef UTF8_OUTPUT_ENABLE
1359 ms_ucs_map_f = UCS_MAP_MS;
1364 #ifdef UTF8_OUTPUT_ENABLE
1365 ms_ucs_map_f = UCS_MAP_ASCII;
1368 case SHIFT_JISX0213:
1369 case SHIFT_JIS_2004:
1371 #ifdef SHIFTJIS_CP932
1372 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1379 #ifdef SHIFTJIS_CP932
1380 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1383 #ifdef UTF8_OUTPUT_ENABLE
1385 output_bom_f = TRUE;
1389 output_bom_f = TRUE;
1392 output_endian = ENDIAN_LITTLE;
1393 output_bom_f = FALSE;
1396 output_endian = ENDIAN_LITTLE;
1397 output_bom_f = TRUE;
1400 output_bom_f = TRUE;
1403 output_endian = ENDIAN_LITTLE;
1404 output_bom_f = FALSE;
1407 output_endian = ENDIAN_LITTLE;
1408 output_bom_f = TRUE;
1414 static struct input_code*
1415 find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1418 struct input_code *p = input_code_list;
1420 if (iconv_func == p->iconv_func){
1430 set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1432 #ifdef INPUT_CODE_FIX
1433 if (f || !input_encoding)
1440 #ifdef INPUT_CODE_FIX
1441 && (f == -TRUE || !input_encoding) /* -TRUE means "FORCE" */
1447 if (estab_f && iconv_for_check != iconv){
1448 struct input_code *p = find_inputcode_byfunc(iconv);
1450 set_input_codename(p->name);
1453 iconv_for_check = iconv;
1460 x0212_shift(nkf_char c)
1465 if (0x75 <= c && c <= 0x7f){
1466 ret = c + (0x109 - 0x75);
1469 if (0x75 <= c && c <= 0x7f){
1470 ret = c + (0x113 - 0x75);
1478 x0212_unshift(nkf_char c)
1481 if (0x7f <= c && c <= 0x88){
1482 ret = c + (0x75 - 0x7f);
1483 }else if (0x89 <= c && c <= 0x92){
1484 ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
1488 #endif /* X0212_ENABLE */
1491 e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
1497 if((0x21 <= ndx && ndx <= 0x2F)){
1498 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
1499 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1501 }else if(0x6E <= ndx && ndx <= 0x7E){
1502 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
1503 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1509 else if(nkf_isgraph(ndx)){
1511 const unsigned short *ptr;
1512 ptr = x0212_shiftjis[ndx - 0x21];
1514 val = ptr[(c1 & 0x7f) - 0x21];
1523 c2 = x0212_shift(c2);
1525 #endif /* X0212_ENABLE */
1527 if(0x7F < c2) return 1;
1528 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
1529 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1534 s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
1536 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
1539 static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
1540 if (0xFC < c1) return 1;
1541 #ifdef SHIFTJIS_CP932
1542 if (!cp932inv_f && is_ibmext_in_sjis(c2)){
1543 val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
1550 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
1551 val = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
1557 #endif /* SHIFTJIS_CP932 */
1559 if (!x0213_f && is_ibmext_in_sjis(c2)){
1560 val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
1563 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
1576 if(x0213_f && c2 >= 0xF0){
1577 if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
1578 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
1579 }else{ /* 78<=k<=94 */
1580 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
1581 if (0x9E < c1) c2++;
1584 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
1585 #define SJ6394 0x0161 /* 63 - 94 ku offset */
1586 c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
1587 if (0x9E < c1) c2++;
1590 c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
1597 c2 = x0212_unshift(c2);
1604 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
1606 nkf_unicode_to_utf8(nkf_char val, nkf_char *p1, nkf_char *p2, nkf_char *p3, nkf_char *p4)
1614 }else if (val < 0x800){
1615 *p1 = 0xc0 | (val >> 6);
1616 *p2 = 0x80 | (val & 0x3f);
1619 } else if (nkf_char_unicode_bmp_p(val)) {
1620 *p1 = 0xe0 | (val >> 12);
1621 *p2 = 0x80 | ((val >> 6) & 0x3f);
1622 *p3 = 0x80 | ( val & 0x3f);
1624 } else if (nkf_char_unicode_value_p(val)) {
1625 *p1 = 0xe0 | (val >> 16);
1626 *p2 = 0x80 | ((val >> 12) & 0x3f);
1627 *p3 = 0x80 | ((val >> 6) & 0x3f);
1628 *p4 = 0x80 | ( val & 0x3f);
1638 nkf_utf8_to_unicode(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
1645 else if (c1 <= 0xC3) {
1646 /* trail byte or invalid */
1649 else if (c1 <= 0xDF) {
1651 wc = (c1 & 0x1F) << 6;
1654 else if (c1 <= 0xEF) {
1656 wc = (c1 & 0x0F) << 12;
1657 wc |= (c2 & 0x3F) << 6;
1660 else if (c2 <= 0xF4) {
1662 wc = (c1 & 0x0F) << 18;
1663 wc |= (c2 & 0x3F) << 12;
1664 wc |= (c3 & 0x3F) << 6;
1674 #ifdef UTF8_INPUT_ENABLE
1676 unicode_to_jis_common2(nkf_char c1, nkf_char c0,
1677 const unsigned short *const *pp, nkf_char psize,
1678 nkf_char *p2, nkf_char *p1)
1681 const unsigned short *p;
1684 if (pp == 0) return 1;
1687 if (c1 < 0 || psize <= c1) return 1;
1689 if (p == 0) return 1;
1692 if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
1694 if (val == 0) return 1;
1695 if (no_cp932ext_f && (
1696 (val>>8) == 0x2D || /* NEC special characters */
1697 val > NKF_INT32_C(0xF300) /* IBM extended characters */
1705 if (c2 == SO) c2 = JIS_X_0201_1976_K;
1713 unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
1715 const unsigned short *const *pp;
1716 const unsigned short *const *const *ppp;
1717 static const char no_best_fit_chars_table_C2[] =
1718 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1719 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1720 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
1721 0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
1722 static const char no_best_fit_chars_table_C2_ms[] =
1723 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1724 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1725 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
1726 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
1727 static const char no_best_fit_chars_table_932_C2[] =
1728 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1729 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1730 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1731 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
1732 static const char no_best_fit_chars_table_932_C3[] =
1733 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1734 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1735 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1736 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
1742 }else if(c2 < 0xe0){
1743 if(no_best_fit_chars_f){
1744 if(ms_ucs_map_f == UCS_MAP_CP932){
1747 if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
1750 if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1753 }else if(!cp932inv_f){
1756 if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
1759 if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1762 }else if(ms_ucs_map_f == UCS_MAP_MS){
1763 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
1764 }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1782 ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
1783 ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
1784 ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
1786 ret = unicode_to_jis_common2(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
1787 }else if(c0 < 0xF0){
1788 if(no_best_fit_chars_f){
1789 if(ms_ucs_map_f == UCS_MAP_CP932){
1790 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
1791 }else if(ms_ucs_map_f == UCS_MAP_MS){
1796 if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
1799 if(c0 == 0x92) return 1;
1804 if(c1 == 0x80 || c0 == 0x9C) return 1;
1807 }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1812 if(c0 == 0x94) return 1;
1815 if(c0 == 0xBB) return 1;
1825 if(c0 == 0x95) return 1;
1828 if(c0 == 0xA5) return 1;
1835 if(c0 == 0x8D) return 1;
1838 if(c0 == 0x9E && !cp932inv_f) return 1;
1841 if(0xA0 <= c0 && c0 <= 0xA5) return 1;
1849 ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
1850 ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
1851 ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
1853 ret = unicode_to_jis_common2(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
1855 #ifdef SHIFTJIS_CP932
1856 if (!ret && !cp932inv_f && is_eucg3(*p2)) {
1858 if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
1859 s2e_conv(s2, s1, p2, p1);
1868 #ifdef UTF8_OUTPUT_ENABLE
1870 e2w_conv(nkf_char c2, nkf_char c1)
1872 const unsigned short *p;
1874 if (c2 == JIS_X_0201_1976_K) {
1875 if (ms_ucs_map_f == UCS_MAP_CP10001) {
1883 p = euc_to_utf8_1byte;
1885 } else if (is_eucg3(c2)){
1886 if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
1889 c2 = (c2&0x7f) - 0x21;
1890 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
1891 p = x0212_to_utf8_2bytes[c2];
1897 c2 = (c2&0x7f) - 0x21;
1898 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
1900 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
1901 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
1902 euc_to_utf8_2bytes_ms[c2];
1907 c1 = (c1 & 0x7f) - 0x21;
1908 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
1915 w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
1922 }else if (0xc0 <= c2 && c2 <= 0xef) {
1923 ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
1924 #ifdef NUMCHAR_OPTION
1927 if (p1) *p1 = nkf_char_unicode_new(nkf_utf8_to_unicode(c2, c1, c0, 0));
1935 #ifdef UTF8_INPUT_ENABLE
1937 w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
1939 nkf_char c1, c2, c3, c4;
1946 else if (nkf_char_unicode_bmp_p(val)){
1947 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
1948 ret = unicode_to_jis_common(c1, c2, c3, p2, p1);
1951 *p1 = nkf_char_unicode_new(val);
1957 *p1 = nkf_char_unicode_new(val);
1964 e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
1966 if (c2 == JIS_X_0201_1976_K || c2 == SS2){
1967 if (iso2022jp_f && !x0201_f) {
1968 c2 = GETA1; c1 = GETA2;
1970 c2 = JIS_X_0201_1976_K;
1974 }else if (c2 == 0x8f){
1978 if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
1979 /* encoding is eucJP-ms, so invert to Unicode Private User Area */
1980 c1 = nkf_char_unicode_new((c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC);
1983 c2 = (c2 << 8) | (c1 & 0x7f);
1985 #ifdef SHIFTJIS_CP932
1988 if (e2s_conv(c2, c1, &s2, &s1) == 0){
1989 s2e_conv(s2, s1, &c2, &c1);
1996 #endif /* SHIFTJIS_CP932 */
1998 #endif /* X0212_ENABLE */
1999 } else if ((c2 == EOF) || (c2 == 0) || c2 < SP || c2 == ISO_8859_1) {
2002 if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
2003 /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2004 c1 = nkf_char_unicode_new((c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000);
2009 #ifdef SHIFTJIS_CP932
2010 if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
2012 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2013 s2e_conv(s2, s1, &c2, &c1);
2020 #endif /* SHIFTJIS_CP932 */
2028 s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
2030 if (c2 == JIS_X_0201_1976_K || (0xA1 <= c2 && c2 <= 0xDF)) {
2031 if (iso2022jp_f && !x0201_f) {
2032 c2 = GETA1; c1 = GETA2;
2036 } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
2038 } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
2040 if(c1 == 0x7F) return 0;
2041 c1 = nkf_char_unicode_new((c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000);
2044 nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
2045 if (ret) return ret;
2052 w_iconv(nkf_char c1, nkf_char c2, nkf_char c3)
2054 nkf_char ret = 0, c4 = 0;
2055 static const char w_iconv_utf8_1st_byte[] =
2057 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2058 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2059 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
2060 40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
2067 if (c1 < 0 || 0xff < c1) {
2068 }else if (c1 == 0) { /* 0 : 1 byte*/
2070 } else if ((c1 & 0xC0) == 0x80) { /* 0x80-0xbf : trail byte */
2073 switch (w_iconv_utf8_1st_byte[c1 - 0xC0]) {
2075 if (c2 < 0x80 || 0xBF < c2) return 0;
2078 if (c3 == 0) return -1;
2079 if (c2 < 0xA0 || 0xBF < c2 || (c3 & 0xC0) != 0x80)
2084 if (c3 == 0) return -1;
2085 if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80)
2089 if (c3 == 0) return -1;
2090 if (c2 < 0x80 || 0x9F < c2 || (c3 & 0xC0) != 0x80)
2094 if (c3 == 0) return -2;
2095 if (c2 < 0x90 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2099 if (c3 == 0) return -2;
2100 if (c2 < 0x80 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2104 if (c3 == 0) return -2;
2105 if (c2 < 0x80 || 0x8F < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2113 if (c1 == 0 || c1 == EOF){
2114 } else if ((c1 & 0xf8) == 0xf0) { /* 4 bytes */
2115 c2 = nkf_char_unicode_new(nkf_utf8_to_unicode(c1, c2, c3, c4));
2118 ret = w2e_conv(c1, c2, c3, &c1, &c2);
2126 #define NKF_ICONV_INVALID_CODE_RANGE -13
2128 unicode_iconv(nkf_char wc)
2136 }else if ((wc>>11) == 27) {
2137 /* unpaired surrogate */
2138 return NKF_ICONV_INVALID_CODE_RANGE;
2139 }else if (wc < 0xFFFF) {
2140 ret = w16e_conv(wc, &c2, &c1);
2141 if (ret) return ret;
2142 }else if (wc < 0x10FFFF) {
2144 c1 = nkf_char_unicode_new(wc);
2146 return NKF_ICONV_INVALID_CODE_RANGE;
2152 #define NKF_ICONV_NEED_ONE_MORE_BYTE -1
2153 #define NKF_ICONV_NEED_TWO_MORE_BYTES -2
2154 #define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00))
2156 nkf_iconv_utf_16(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2165 if (input_endian == ENDIAN_BIG) {
2166 if (0xD8 <= c1 && c1 <= 0xDB) {
2167 if (0xDC <= c3 && c3 <= 0xDF) {
2168 wc = UTF16_TO_UTF32(c1 << 8 | c2, c3 << 8 | c4);
2169 } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2174 if (0xD8 <= c2 && c2 <= 0xDB) {
2175 if (0xDC <= c4 && c4 <= 0xDF) {
2176 wc = UTF16_TO_UTF32(c2 << 8 | c1, c4 << 8 | c3);
2177 } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2183 return (*unicode_iconv)(wc);
2187 w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
2193 w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
2199 nkf_iconv_utf_32(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2208 switch(input_endian){
2210 wc = c2 << 16 | c3 << 8 | c4;
2213 wc = c3 << 16 | c2 << 8 | c1;
2216 wc = c1 << 16 | c4 << 8 | c3;
2219 wc = c4 << 16 | c1 << 8 | c2;
2222 return NKF_ICONV_INVALID_CODE_RANGE;
2225 return (*unicode_iconv)(wc);
2229 #define output_ascii_escape_sequence(mode) do { \
2230 if (output_mode != ASCII && output_mode != ISO_8859_1) { \
2233 (*o_putc)(ascii_intro); \
2234 output_mode = mode; \
2239 output_escape_sequence(int mode)
2241 if (output_mode == mode)
2249 case JIS_X_0201_1976_K:
2257 (*o_putc)(kanji_intro);
2282 j_oconv(nkf_char c2, nkf_char c1)
2284 #ifdef NUMCHAR_OPTION
2285 if (c2 == 0 && nkf_char_unicode_p(c1)){
2286 w16e_conv(c1, &c2, &c1);
2287 if (c2 == 0 && nkf_char_unicode_p(c1)){
2288 c2 = c1 & VALUE_MASK;
2289 if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
2292 c2 = 0x7F + c1 / 94;
2293 c1 = 0x21 + c1 % 94;
2295 if (encode_fallback) (*encode_fallback)(c1);
2302 output_ascii_escape_sequence(ASCII);
2305 else if (c2 == EOF) {
2306 output_ascii_escape_sequence(ASCII);
2309 else if (c2 == ISO_8859_1) {
2310 output_ascii_escape_sequence(ISO_8859_1);
2313 else if (c2 == JIS_X_0201_1976_K) {
2314 output_escape_sequence(JIS_X_0201_1976_K);
2317 } else if (is_eucg3(c2)){
2318 output_escape_sequence(x0213_f ? JIS_X_0213_2 : JIS_X_0212);
2319 (*o_putc)(c2 & 0x7f);
2324 ? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1
2325 : c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1) return;
2326 output_escape_sequence(x0213_f ? JIS_X_0213_1 : JIS_X_0208);
2333 e_oconv(nkf_char c2, nkf_char c1)
2335 if (c2 == 0 && nkf_char_unicode_p(c1)){
2336 w16e_conv(c1, &c2, &c1);
2337 if (c2 == 0 && nkf_char_unicode_p(c1)){
2338 c2 = c1 & VALUE_MASK;
2339 if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
2343 c2 += c2 < 10 ? 0x75 : 0x8FEB;
2344 c1 = 0x21 + c1 % 94;
2347 (*o_putc)((c2 & 0x7f) | 0x080);
2348 (*o_putc)(c1 | 0x080);
2350 (*o_putc)((c2 & 0x7f) | 0x080);
2351 (*o_putc)(c1 | 0x080);
2355 if (encode_fallback) (*encode_fallback)(c1);
2363 } else if (c2 == 0) {
2364 output_mode = ASCII;
2366 } else if (c2 == JIS_X_0201_1976_K) {
2367 output_mode = EUC_JP;
2368 (*o_putc)(SS2); (*o_putc)(c1|0x80);
2369 } else if (c2 == ISO_8859_1) {
2370 output_mode = ISO_8859_1;
2371 (*o_putc)(c1 | 0x080);
2373 } else if (is_eucg3(c2)){
2374 output_mode = EUC_JP;
2375 #ifdef SHIFTJIS_CP932
2378 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2379 s2e_conv(s2, s1, &c2, &c1);
2384 output_mode = ASCII;
2386 }else if (is_eucg3(c2)){
2389 (*o_putc)((c2 & 0x7f) | 0x080);
2390 (*o_putc)(c1 | 0x080);
2393 (*o_putc)((c2 & 0x7f) | 0x080);
2394 (*o_putc)(c1 | 0x080);
2398 if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
2399 set_iconv(FALSE, 0);
2400 return; /* too late to rescue this char */
2402 output_mode = EUC_JP;
2403 (*o_putc)(c2 | 0x080);
2404 (*o_putc)(c1 | 0x080);
2409 s_oconv(nkf_char c2, nkf_char c1)
2411 #ifdef NUMCHAR_OPTION
2412 if (c2 == 0 && nkf_char_unicode_p(c1)){
2413 w16e_conv(c1, &c2, &c1);
2414 if (c2 == 0 && nkf_char_unicode_p(c1)){
2415 c2 = c1 & VALUE_MASK;
2416 if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
2419 c2 = c1 / 188 + (cp932inv_f ? 0xF0 : 0xEB);
2421 c1 += 0x40 + (c1 > 0x3e);
2426 if(encode_fallback)(*encode_fallback)(c1);
2435 } else if (c2 == 0) {
2436 output_mode = ASCII;
2438 } else if (c2 == JIS_X_0201_1976_K) {
2439 output_mode = SHIFT_JIS;
2441 } else if (c2 == ISO_8859_1) {
2442 output_mode = ISO_8859_1;
2443 (*o_putc)(c1 | 0x080);
2445 } else if (is_eucg3(c2)){
2446 output_mode = SHIFT_JIS;
2447 if (e2s_conv(c2, c1, &c2, &c1) == 0){
2453 if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
2454 set_iconv(FALSE, 0);
2455 return; /* too late to rescue this char */
2457 output_mode = SHIFT_JIS;
2458 e2s_conv(c2, c1, &c2, &c1);
2460 #ifdef SHIFTJIS_CP932
2462 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2463 nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
2469 #endif /* SHIFTJIS_CP932 */
2472 if (prefix_table[(unsigned char)c1]){
2473 (*o_putc)(prefix_table[(unsigned char)c1]);
2479 #ifdef UTF8_OUTPUT_ENABLE
2481 w_oconv(nkf_char c2, nkf_char c1)
2487 output_bom_f = FALSE;
2498 if (c2 == 0 && nkf_char_unicode_p(c1)){
2499 val = c1 & VALUE_MASK;
2500 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2502 if (c2) (*o_putc)(c2);
2503 if (c3) (*o_putc)(c3);
2504 if (c4) (*o_putc)(c4);
2511 val = e2w_conv(c2, c1);
2513 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2515 if (c2) (*o_putc)(c2);
2516 if (c3) (*o_putc)(c3);
2517 if (c4) (*o_putc)(c4);
2523 w_oconv16(nkf_char c2, nkf_char c1)
2526 output_bom_f = FALSE;
2527 if (output_endian == ENDIAN_LITTLE){
2541 if (c2 == 0 && nkf_char_unicode_p(c1)) {
2542 if (nkf_char_unicode_bmp_p(c1)) {
2543 c2 = (c1 >> 8) & 0xff;
2547 if (c1 <= UNICODE_MAX) {
2548 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0); /* high surrogate */
2549 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
2550 if (output_endian == ENDIAN_LITTLE){
2551 (*o_putc)(c2 & 0xff);
2552 (*o_putc)((c2 >> 8) & 0xff);
2553 (*o_putc)(c1 & 0xff);
2554 (*o_putc)((c1 >> 8) & 0xff);
2556 (*o_putc)((c2 >> 8) & 0xff);
2557 (*o_putc)(c2 & 0xff);
2558 (*o_putc)((c1 >> 8) & 0xff);
2559 (*o_putc)(c1 & 0xff);
2565 nkf_char val = e2w_conv(c2, c1);
2566 c2 = (val >> 8) & 0xff;
2571 if (output_endian == ENDIAN_LITTLE){
2581 w_oconv32(nkf_char c2, nkf_char c1)
2584 output_bom_f = FALSE;
2585 if (output_endian == ENDIAN_LITTLE){
2603 if (c2 == ISO_8859_1) {
2605 } else if (c2 == 0 && nkf_char_unicode_p(c1)) {
2608 c1 = e2w_conv(c2, c1);
2611 if (output_endian == ENDIAN_LITTLE){
2612 (*o_putc)( c1 & 0xFF);
2613 (*o_putc)((c1 >> 8) & 0xFF);
2614 (*o_putc)((c1 >> 16) & 0xFF);
2618 (*o_putc)((c1 >> 16) & 0xFF);
2619 (*o_putc)((c1 >> 8) & 0xFF);
2620 (*o_putc)( c1 & 0xFF);
2625 #define SCORE_L2 (1) /*
\e$BBh
\e(B2
\e$B?e=`4A;z
\e(B */
2626 #define SCORE_KANA (SCORE_L2 << 1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
2627 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
2628 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932
\e$B$K$h$kFI$_49$(
\e(B (IBM extended characters) */
2629 #define SCORE_X0212 (SCORE_CP932 << 1) /* JIS X 0212 */
2630 #define SCORE_NO_EXIST (SCORE_X0212 << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
2631 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME
\e$B$K$h$k;XDj
\e(B */
2632 #define SCORE_ERROR (SCORE_iMIME << 1) /*
\e$B%(%i!<
\e(B */
2634 #define SCORE_INIT (SCORE_iMIME)
2636 static const char score_table_A0[] = {
2639 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
2640 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
2643 static const char score_table_F0[] = {
2644 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
2645 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
2646 SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
2647 SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
2651 set_code_score(struct input_code *ptr, nkf_char score)
2654 ptr->score |= score;
2659 clr_code_score(struct input_code *ptr, nkf_char score)
2662 ptr->score &= ~score;
2667 code_score(struct input_code *ptr)
2669 nkf_char c2 = ptr->buf[0];
2670 #ifdef UTF8_OUTPUT_ENABLE
2671 nkf_char c1 = ptr->buf[1];
2674 set_code_score(ptr, SCORE_ERROR);
2675 }else if (c2 == SS2){
2676 set_code_score(ptr, SCORE_KANA);
2677 }else if (c2 == 0x8f){
2678 set_code_score(ptr, SCORE_X0212);
2679 #ifdef UTF8_OUTPUT_ENABLE
2680 }else if (!e2w_conv(c2, c1)){
2681 set_code_score(ptr, SCORE_NO_EXIST);
2683 }else if ((c2 & 0x70) == 0x20){
2684 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2685 }else if ((c2 & 0x70) == 0x70){
2686 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2687 }else if ((c2 & 0x70) >= 0x50){
2688 set_code_score(ptr, SCORE_L2);
2693 status_disable(struct input_code *ptr)
2698 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2702 status_push_ch(struct input_code *ptr, nkf_char c)
2704 ptr->buf[ptr->index++] = c;
2708 status_clear(struct input_code *ptr)
2715 status_reset(struct input_code *ptr)
2718 ptr->score = SCORE_INIT;
2722 status_reinit(struct input_code *ptr)
2725 ptr->_file_stat = 0;
2729 status_check(struct input_code *ptr, nkf_char c)
2731 if (c <= DEL && estab_f){
2737 s_status(struct input_code *ptr, nkf_char c)
2741 status_check(ptr, c);
2746 }else if (nkf_char_unicode_p(c)){
2748 }else if (0xa1 <= c && c <= 0xdf){
2749 status_push_ch(ptr, SS2);
2750 status_push_ch(ptr, c);
2753 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
2755 status_push_ch(ptr, c);
2756 }else if (0xed <= c && c <= 0xee){
2758 status_push_ch(ptr, c);
2759 #ifdef SHIFTJIS_CP932
2760 }else if (is_ibmext_in_sjis(c)){
2762 status_push_ch(ptr, c);
2763 #endif /* SHIFTJIS_CP932 */
2765 }else if (0xf0 <= c && c <= 0xfc){
2767 status_push_ch(ptr, c);
2768 #endif /* X0212_ENABLE */
2770 status_disable(ptr);
2774 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2775 status_push_ch(ptr, c);
2776 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2780 status_disable(ptr);
2784 #ifdef SHIFTJIS_CP932
2785 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
2786 status_push_ch(ptr, c);
2787 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
2788 set_code_score(ptr, SCORE_CP932);
2793 #endif /* SHIFTJIS_CP932 */
2794 status_disable(ptr);
2797 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2798 status_push_ch(ptr, c);
2799 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2800 set_code_score(ptr, SCORE_CP932);
2803 status_disable(ptr);
2810 e_status(struct input_code *ptr, nkf_char c)
2814 status_check(ptr, c);
2819 }else if (nkf_char_unicode_p(c)){
2821 }else if (SS2 == c || (0xa1 <= c && c <= 0xfe)){
2823 status_push_ch(ptr, c);
2825 }else if (0x8f == c){
2827 status_push_ch(ptr, c);
2828 #endif /* X0212_ENABLE */
2830 status_disable(ptr);
2834 if (0xa1 <= c && c <= 0xfe){
2835 status_push_ch(ptr, c);
2839 status_disable(ptr);
2844 if (0xa1 <= c && c <= 0xfe){
2846 status_push_ch(ptr, c);
2848 status_disable(ptr);
2850 #endif /* X0212_ENABLE */
2854 #ifdef UTF8_INPUT_ENABLE
2856 w_status(struct input_code *ptr, nkf_char c)
2860 status_check(ptr, c);
2865 }else if (nkf_char_unicode_p(c)){
2867 }else if (0xc0 <= c && c <= 0xdf){
2869 status_push_ch(ptr, c);
2870 }else if (0xe0 <= c && c <= 0xef){
2872 status_push_ch(ptr, c);
2873 }else if (0xf0 <= c && c <= 0xf4){
2875 status_push_ch(ptr, c);
2877 status_disable(ptr);
2882 if (0x80 <= c && c <= 0xbf){
2883 status_push_ch(ptr, c);
2884 if (ptr->index > ptr->stat){
2885 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2886 && ptr->buf[2] == 0xbf);
2887 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2888 &ptr->buf[0], &ptr->buf[1]);
2895 status_disable(ptr);
2899 if (0x80 <= c && c <= 0xbf){
2900 if (ptr->index < ptr->stat){
2901 status_push_ch(ptr, c);
2906 status_disable(ptr);
2914 code_status(nkf_char c)
2916 int action_flag = 1;
2917 struct input_code *result = 0;
2918 struct input_code *p = input_code_list;
2920 if (!p->status_func) {
2924 if (!p->status_func)
2926 (p->status_func)(p, c);
2929 }else if(p->stat == 0){
2940 if (result && !estab_f){
2941 set_iconv(TRUE, result->iconv_func);
2942 }else if (c <= DEL){
2943 struct input_code *ptr = input_code_list;
2957 return std_gc_buf[--std_gc_ndx];
2964 std_ungetc(nkf_char c, FILE *f)
2966 if (std_gc_ndx == STD_GC_BUFSIZE){
2969 std_gc_buf[std_gc_ndx++] = c;
2975 std_putc(nkf_char c)
2982 static unsigned char hold_buf[HOLD_SIZE*2];
2983 static int hold_count = 0;
2985 push_hold_buf(nkf_char c2)
2987 if (hold_count >= HOLD_SIZE*2)
2989 hold_buf[hold_count++] = (unsigned char)c2;
2990 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
2994 h_conv(FILE *f, int c1, int c2)
3000 /** it must NOT be in the kanji shifte sequence */
3001 /** it must NOT be written in JIS7 */
3002 /** and it must be after 2 byte 8bit code */
3008 while ((c2 = (*i_getc)(f)) != EOF) {
3014 if (push_hold_buf(c2) == EOF || estab_f) {
3020 struct input_code *p = input_code_list;
3021 struct input_code *result = p;
3026 if (p->status_func && p->score < result->score) {
3031 set_iconv(TRUE, result->iconv_func);
3036 ** 1) EOF is detected, or
3037 ** 2) Code is established, or
3038 ** 3) Buffer is FULL (but last word is pushed)
3040 ** in 1) and 3) cases, we continue to use
3041 ** Kanji codes by oconv and leave estab_f unchanged.
3046 while (hold_index < hold_count){
3047 c1 = hold_buf[hold_index++];
3051 }else if (iconv == s_iconv && 0xa1 <= c1 && c1 <= 0xdf){
3052 (*iconv)(JIS_X_0201_1976_K, c1, 0);
3055 if (hold_index < hold_count){
3056 c2 = hold_buf[hold_index++];
3066 switch ((*iconv)(c1, c2, 0)) { /* can be EUC/SJIS/UTF-8 */
3069 if (hold_index < hold_count){
3070 c3 = hold_buf[hold_index++];
3071 } else if ((c3 = (*i_getc)(f)) == EOF) {
3076 if (hold_index < hold_count){
3077 c4 = hold_buf[hold_index++];
3078 } else if ((c4 = (*i_getc)(f)) == EOF) {
3083 (*iconv)(c1, c2, (c3<<8)|c4);
3088 /* 3 bytes EUC or UTF-8 */
3089 if (hold_index < hold_count){
3090 c3 = hold_buf[hold_index++];
3091 } else if ((c3 = (*i_getc)(f)) == EOF) {
3097 (*iconv)(c1, c2, c3);
3100 if (c3 == EOF) break;
3106 * Check and Ignore BOM
3112 switch(c2 = (*i_getc)(f)){
3114 if((c2 = (*i_getc)(f)) == 0x00){
3115 if((c2 = (*i_getc)(f)) == 0xFE){
3116 if((c2 = (*i_getc)(f)) == 0xFF){
3117 if(!input_encoding){
3118 set_iconv(TRUE, w_iconv32);
3120 if (iconv == w_iconv32) {
3121 input_endian = ENDIAN_BIG;
3124 (*i_ungetc)(0xFF,f);
3125 }else (*i_ungetc)(c2,f);
3126 (*i_ungetc)(0xFE,f);
3127 }else if(c2 == 0xFF){
3128 if((c2 = (*i_getc)(f)) == 0xFE){
3129 if(!input_encoding){
3130 set_iconv(TRUE, w_iconv32);
3132 if (iconv == w_iconv32) {
3133 input_endian = ENDIAN_2143;
3136 (*i_ungetc)(0xFF,f);
3137 }else (*i_ungetc)(c2,f);
3138 (*i_ungetc)(0xFF,f);
3139 }else (*i_ungetc)(c2,f);
3140 (*i_ungetc)(0x00,f);
3141 }else (*i_ungetc)(c2,f);
3142 (*i_ungetc)(0x00,f);
3145 if((c2 = (*i_getc)(f)) == 0xBB){
3146 if((c2 = (*i_getc)(f)) == 0xBF){
3147 if(!input_encoding){
3148 set_iconv(TRUE, w_iconv);
3150 if (iconv == w_iconv) {
3153 (*i_ungetc)(0xBF,f);
3154 }else (*i_ungetc)(c2,f);
3155 (*i_ungetc)(0xBB,f);
3156 }else (*i_ungetc)(c2,f);
3157 (*i_ungetc)(0xEF,f);
3160 if((c2 = (*i_getc)(f)) == 0xFF){
3161 if((c2 = (*i_getc)(f)) == 0x00){
3162 if((c2 = (*i_getc)(f)) == 0x00){
3163 if(!input_encoding){
3164 set_iconv(TRUE, w_iconv32);
3166 if (iconv == w_iconv32) {
3167 input_endian = ENDIAN_3412;
3170 (*i_ungetc)(0x00,f);
3171 }else (*i_ungetc)(c2,f);
3172 (*i_ungetc)(0x00,f);
3173 }else (*i_ungetc)(c2,f);
3174 if(!input_encoding){
3175 set_iconv(TRUE, w_iconv16);
3177 if (iconv == w_iconv16) {
3178 input_endian = ENDIAN_BIG;
3181 (*i_ungetc)(0xFF,f);
3182 }else (*i_ungetc)(c2,f);
3183 (*i_ungetc)(0xFE,f);
3186 if((c2 = (*i_getc)(f)) == 0xFE){
3187 if((c2 = (*i_getc)(f)) == 0x00){
3188 if((c2 = (*i_getc)(f)) == 0x00){
3189 if(!input_encoding){
3190 set_iconv(TRUE, w_iconv32);
3192 if (iconv == w_iconv32) {
3193 input_endian = ENDIAN_LITTLE;
3196 (*i_ungetc)(0x00,f);
3197 }else (*i_ungetc)(c2,f);
3198 (*i_ungetc)(0x00,f);
3199 }else (*i_ungetc)(c2,f);
3200 if(!input_encoding){
3201 set_iconv(TRUE, w_iconv16);
3203 if (iconv == w_iconv16) {
3204 input_endian = ENDIAN_LITTLE;
3207 (*i_ungetc)(0xFE,f);
3208 }else (*i_ungetc)(c2,f);
3209 (*i_ungetc)(0xFF,f);
3224 init_broken_state(void)
3226 memset(&broken_state, 0, sizeof(broken_state));
3232 broken_state.buf[broken_state.count++] = c;
3236 pop_broken_buf(void)
3238 return broken_state.buf[--broken_state.count];
3242 broken_getc(FILE *f)
3246 if (broken_state.count > 0) {
3247 return pop_broken_buf();
3250 if (c=='$' && broken_state.status != ESC
3251 && (input_mode == ASCII || input_mode == JIS_X_0201_1976_K)) {
3253 broken_state.status = 0;
3254 if (c1=='@'|| c1=='B') {
3255 push_broken_buf(c1);
3262 } else if (c=='(' && broken_state.status != ESC
3263 && (input_mode == JIS_X_0208 || input_mode == JIS_X_0201_1976_K)) {
3265 broken_state.status = 0;
3266 if (c1=='J'|| c1=='B') {
3267 push_broken_buf(c1);
3275 broken_state.status = c;
3281 broken_ungetc(nkf_char c, FILE *f)
3283 if (broken_state.count < 2)
3289 eol_conv(nkf_char c2, nkf_char c1)
3291 if (guess_f && input_eol != EOF) {
3292 if (c2 == 0 && c1 == LF) {
3293 if (!input_eol) input_eol = prev_cr ? CRLF : LF;
3294 else if (input_eol != (prev_cr ? CRLF : LF)) input_eol = EOF;
3295 } else if (c2 == 0 && c1 == CR && input_eol == LF) input_eol = EOF;
3297 else if (!input_eol) input_eol = CR;
3298 else if (input_eol != CR) input_eol = EOF;
3300 if (prev_cr || (c2 == 0 && c1 == LF)) {
3302 if (eolmode_f != LF) (*o_eol_conv)(0, CR);
3303 if (eolmode_f != CR) (*o_eol_conv)(0, LF);
3305 if (c2 == 0 && c1 == CR) prev_cr = CR;
3306 else if (c2 != 0 || c1 != LF) (*o_eol_conv)(c2, c1);
3310 Return value of fold_conv()
3312 LF add newline and output char
3313 CR add newline and output nothing
3316 1 (or else) normal output
3318 fold state in prev (previous character)
3320 >0x80 Japanese (X0208/X0201)
3325 This fold algorthm does not preserve heading space in a line.
3326 This is the main difference from fmt.
3329 #define char_size(c2,c1) (c2?2:1)
3332 fold_conv(nkf_char c2, nkf_char c1)
3335 nkf_char fold_state;
3337 if (c1== CR && !fold_preserve_f) {
3338 fold_state=0; /* ignore cr */
3339 }else if (c1== LF&&f_prev==CR && fold_preserve_f) {
3341 fold_state=0; /* ignore cr */
3342 } else if (c1== BS) {
3343 if (f_line>0) f_line--;
3345 } else if (c2==EOF && f_line != 0) { /* close open last line */
3347 } else if ((c1==LF && !fold_preserve_f)
3348 || ((c1==CR||(c1==LF&&f_prev!=CR))
3349 && fold_preserve_f)) {
3351 if (fold_preserve_f) {
3355 } else if ((f_prev == c1 && !fold_preserve_f)
3356 || (f_prev == LF && fold_preserve_f)
3357 ) { /* duplicate newline */
3360 fold_state = LF; /* output two newline */
3366 if (f_prev&0x80) { /* Japanese? */
3368 fold_state = 0; /* ignore given single newline */
3369 } else if (f_prev==SP) {
3373 if (++f_line<=fold_len)
3377 fold_state = CR; /* fold and output nothing */
3381 } else if (c1=='\f') {
3384 fold_state = LF; /* output newline and clear */
3385 } else if ( (c2==0 && c1==SP)||
3386 (c2==0 && c1==TAB)||
3387 (c2=='!'&& c1=='!')) {
3388 /* X0208 kankaku or ascii space */
3390 fold_state = 0; /* remove duplicate spaces */
3393 if (++f_line<=fold_len)
3394 fold_state = SP; /* output ASCII space only */
3396 f_prev = SP; f_line = 0;
3397 fold_state = CR; /* fold and output nothing */
3401 prev0 = f_prev; /* we still need this one... , but almost done */
3403 if (c2 || c2 == JIS_X_0201_1976_K)
3404 f_prev |= 0x80; /* this is Japanese */
3405 f_line += char_size(c2,c1);
3406 if (f_line<=fold_len) { /* normal case */
3409 if (f_line>fold_len+fold_margin) { /* too many kinsoku suspension */
3410 f_line = char_size(c2,c1);
3411 fold_state = LF; /* We can't wait, do fold now */
3412 } else if (c2 == JIS_X_0201_1976_K) {
3413 /* simple kinsoku rules return 1 means no folding */
3414 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
3415 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
3416 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
3417 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
3418 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
3419 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
3420 else if (SP<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
3422 fold_state = LF;/* add one new f_line before this character */
3425 fold_state = LF;/* add one new f_line before this character */
3428 /* kinsoku point in ASCII */
3429 if ( c1==')'|| /* { [ ( */
3440 /* just after special */
3441 } else if (!is_alnum(prev0)) {
3442 f_line = char_size(c2,c1);
3444 } else if ((prev0==SP) || /* ignored new f_line */
3445 (prev0==LF)|| /* ignored new f_line */
3446 (prev0&0x80)) { /* X0208 - ASCII */
3447 f_line = char_size(c2,c1);
3448 fold_state = LF;/* add one new f_line before this character */
3450 fold_state = 1; /* default no fold in ASCII */
3454 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
3455 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
3456 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
3457 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
3458 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
3459 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
3460 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
3461 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
3462 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
3463 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
3464 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
3465 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
3466 /* default no fold in kinsoku */
3469 f_line = char_size(c2,c1);
3470 /* add one new f_line before this character */
3473 f_line = char_size(c2,c1);
3475 /* add one new f_line before this character */
3480 /* terminator process */
3481 switch(fold_state) {
3483 OCONV_NEWLINE((*o_fconv));
3489 OCONV_NEWLINE((*o_fconv));
3500 static nkf_char z_prev2=0,z_prev1=0;
3503 z_conv(nkf_char c2, nkf_char c1)
3506 /* if (c2) c1 &= 0x7f; assertion */
3508 if (c2 == JIS_X_0201_1976_K && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) {
3514 if (z_prev2 == JIS_X_0201_1976_K) {
3515 if (c2 == JIS_X_0201_1976_K) {
3516 if (c1 == (0xde&0x7f)) { /*
\e$BByE@
\e(B */
3518 (*o_zconv)(dv[(z_prev1-SP)*2], dv[(z_prev1-SP)*2+1]);
3520 } else if (c1 == (0xdf&0x7f) && ev[(z_prev1-SP)*2]) { /*
\e$BH>ByE@
\e(B */
3522 (*o_zconv)(ev[(z_prev1-SP)*2], ev[(z_prev1-SP)*2+1]);
3527 (*o_zconv)(cv[(z_prev1-SP)*2], cv[(z_prev1-SP)*2+1]);
3529 if (c2 == JIS_X_0201_1976_K) {
3530 if (dv[(c1-SP)*2] || ev[(c1-SP)*2]) {
3531 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
3536 (*o_zconv)(cv[(c1-SP)*2], cv[(c1-SP)*2+1]);
3547 if (alpha_f&1 && c2 == 0x23) {
3548 /* JISX0208 Alphabet */
3550 } else if (c2 == 0x21) {
3551 /* JISX0208 Kigou */
3556 } else if (alpha_f&4) {
3561 } else if (alpha_f&1 && 0x20<c1 && c1<0x7f && fv[c1-0x20]) {
3567 if (alpha_f&8 && c2 == 0) {
3569 const char *entity = 0;
3571 case '>': entity = ">"; break;
3572 case '<': entity = "<"; break;
3573 case '\"': entity = """; break;
3574 case '&': entity = "&"; break;
3577 while (*entity) (*o_zconv)(0, *entity++);
3583 /* JIS X 0208 Katakana to JIS X 0201 Katakana */
3588 /* U+3002 (0x8142) Ideographic Full Stop -> U+FF61 (0xA1) Halfwidth Ideographic Full Stop */
3592 /* U+300C (0x8175) Left Corner Bracket -> U+FF62 (0xA2) Halfwidth Left Corner Bracket */
3596 /* U+300D (0x8176) Right Corner Bracket -> U+FF63 (0xA3) Halfwidth Right Corner Bracket */
3600 /* U+3001 (0x8141) Ideographic Comma -> U+FF64 (0xA4) Halfwidth Ideographic Comma */
3604 /* U+30FB (0x8145) Katakana Middle Dot -> U+FF65 (0xA5) Halfwidth Katakana Middle Dot */
3608 /* U+30FC (0x815B) Katakana-Hiragana Prolonged Sound Mark -> U+FF70 (0xB0) Halfwidth Katakana-Hiragana Prolonged Sound Mark */
3612 /* U+309B (0x814A) Katakana-Hiragana Voiced Sound Mark -> U+FF9E (0xDE) Halfwidth Katakana Voiced Sound Mark */
3616 /* U+309C (0x814B) Katakana-Hiragana Semi-Voiced Sound Mark -> U+FF9F (0xDF) Halfwidth Katakana Semi-Voiced Sound Mark */
3621 (*o_zconv)(JIS_X_0201_1976_K, c);
3624 } else if (c2 == 0x25) {
3625 /* JISX0208 Katakana */
3626 static const int fullwidth_to_halfwidth[] =
3628 0x0000, 0x2700, 0x3100, 0x2800, 0x3200, 0x2900, 0x3300, 0x2A00,
3629 0x3400, 0x2B00, 0x3500, 0x3600, 0x365E, 0x3700, 0x375E, 0x3800,
3630 0x385E, 0x3900, 0x395E, 0x3A00, 0x3A5E, 0x3B00, 0x3B5E, 0x3C00,
3631 0x3C5E, 0x3D00, 0x3D5E, 0x3E00, 0x3E5E, 0x3F00, 0x3F5E, 0x4000,
3632 0x405E, 0x4100, 0x415E, 0x2F00, 0x4200, 0x425E, 0x4300, 0x435E,
3633 0x4400, 0x445E, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900, 0x4A00,
3634 0x4A5E, 0x4A5F, 0x4B00, 0x4B5E, 0x4B5F, 0x4C00, 0x4C5E, 0x4C5F,
3635 0x4D00, 0x4D5E, 0x4D5F, 0x4E00, 0x4E5E, 0x4E5F, 0x4F00, 0x5000,
3636 0x5100, 0x5200, 0x5300, 0x2C00, 0x5400, 0x2D00, 0x5500, 0x2E00,
3637 0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x5B00, 0x0000, 0x5C00,
3638 0x0000, 0x0000, 0x2600, 0x5D00, 0x335E, 0x0000, 0x0000, 0x0000,
3639 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
3641 if (fullwidth_to_halfwidth[c1-0x20]){
3642 c2 = fullwidth_to_halfwidth[c1-0x20];
3643 (*o_zconv)(JIS_X_0201_1976_K, c2>>8);
3645 (*o_zconv)(JIS_X_0201_1976_K, c2&0xFF);
3655 #define rot13(c) ( \
3657 (c <= 'M') ? (c + 13): \
3658 (c <= 'Z') ? (c - 13): \
3660 (c <= 'm') ? (c + 13): \
3661 (c <= 'z') ? (c - 13): \
3665 #define rot47(c) ( \
3667 ( c <= 'O') ? (c + 47) : \
3668 ( c <= '~') ? (c - 47) : \
3673 rot_conv(nkf_char c2, nkf_char c1)
3675 if (c2 == 0 || c2 == JIS_X_0201_1976_K || c2 == ISO_8859_1) {
3681 (*o_rot_conv)(c2,c1);
3685 hira_conv(nkf_char c2, nkf_char c1)
3689 if (0x20 < c1 && c1 < 0x74) {
3691 (*o_hira_conv)(c2,c1);
3693 } else if (c1 == 0x74 && nkf_enc_unicode_p(output_encoding)) {
3695 c1 = nkf_char_unicode_new(0x3094);
3696 (*o_hira_conv)(c2,c1);
3699 } else if (c2 == 0x21 && (c1 == 0x33 || c1 == 0x34)) {
3701 (*o_hira_conv)(c2,c1);
3706 if (c2 == 0 && c1 == nkf_char_unicode_new(0x3094)) {
3709 } else if (c2 == 0x24 && 0x20 < c1 && c1 < 0x74) {
3711 } else if (c2 == 0x21 && (c1 == 0x35 || c1 == 0x36)) {
3715 (*o_hira_conv)(c2,c1);
3720 iso2022jp_check_conv(nkf_char c2, nkf_char c1)
3722 #define RANGE_NUM_MAX 18
3723 static const nkf_char range[RANGE_NUM_MAX][2] = {
3744 nkf_char start, end, c;
3746 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
3750 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
3755 for (i = 0; i < RANGE_NUM_MAX; i++) {
3756 start = range[i][0];
3759 if (c >= start && c <= end) {
3764 (*o_iso2022jp_check_conv)(c2,c1);
3768 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
3770 static const unsigned char *mime_pattern[] = {
3771 (const unsigned char *)"\075?EUC-JP?B?",
3772 (const unsigned char *)"\075?SHIFT_JIS?B?",
3773 (const unsigned char *)"\075?ISO-8859-1?Q?",
3774 (const unsigned char *)"\075?ISO-8859-1?B?",
3775 (const unsigned char *)"\075?ISO-2022-JP?B?",
3776 (const unsigned char *)"\075?ISO-2022-JP?Q?",
3777 #if defined(UTF8_INPUT_ENABLE)
3778 (const unsigned char *)"\075?UTF-8?B?",
3779 (const unsigned char *)"\075?UTF-8?Q?",
3781 (const unsigned char *)"\075?US-ASCII?Q?",
3786 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
3787 nkf_char (*mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0) = {
3788 e_iconv, s_iconv, 0, 0, 0, 0,
3789 #if defined(UTF8_INPUT_ENABLE)
3795 static const nkf_char mime_encode[] = {
3796 EUC_JP, SHIFT_JIS, ISO_8859_1, ISO_8859_1, JIS_X_0208, JIS_X_0201_1976_K,
3797 #if defined(UTF8_INPUT_ENABLE)
3804 static const nkf_char mime_encode_method[] = {
3805 'B', 'B','Q', 'B', 'B', 'Q',
3806 #if defined(UTF8_INPUT_ENABLE)
3814 /* MIME preprocessor fifo */
3816 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
3817 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
3818 #define mime_input_buf(n) mime_input_state.buf[(n)&MIME_BUF_MASK]
3820 unsigned char buf[MIME_BUF_SIZE];
3822 unsigned int last; /* decoded */
3823 unsigned int input; /* undecoded */
3825 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
3827 #define MAXRECOVER 20
3830 mime_input_buf_unshift(nkf_char c)
3832 mime_input_buf(--mime_input_state.top) = (unsigned char)c;
3836 mime_ungetc(nkf_char c, FILE *f)
3838 mime_input_buf_unshift(c);
3843 mime_ungetc_buf(nkf_char c, FILE *f)
3846 (*i_mungetc_buf)(c,f);
3848 mime_input_buf(--mime_input_state.input) = (unsigned char)c;
3853 mime_getc_buf(FILE *f)
3855 /* we don't keep eof of mime_input_buf, becase it contains ?= as
3856 a terminator. It was checked in mime_integrity. */
3857 return ((mimebuf_f)?
3858 (*i_mgetc_buf)(f):mime_input_buf(mime_input_state.input++));
3862 switch_mime_getc(void)
3864 if (i_getc!=mime_getc) {
3865 i_mgetc = i_getc; i_getc = mime_getc;
3866 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
3867 if(mime_f==STRICT_MIME) {
3868 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
3869 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
3875 unswitch_mime_getc(void)
3877 if(mime_f==STRICT_MIME) {
3878 i_mgetc = i_mgetc_buf;
3879 i_mungetc = i_mungetc_buf;
3882 i_ungetc = i_mungetc;
3883 if(mime_iconv_back)set_iconv(FALSE, mime_iconv_back);
3884 mime_iconv_back = NULL;
3888 mime_integrity(FILE *f, const unsigned char *p)
3892 /* In buffered mode, read until =? or NL or buffer full
3894 mime_input_state.input = mime_input_state.top;
3895 mime_input_state.last = mime_input_state.top;
3897 while(*p) mime_input_buf(mime_input_state.input++) = *p++;
3899 q = mime_input_state.input;
3900 while((c=(*i_getc)(f))!=EOF) {
3901 if (((mime_input_state.input-mime_input_state.top)&MIME_BUF_MASK)==0) {
3902 break; /* buffer full */
3904 if (c=='=' && d=='?') {
3905 /* checked. skip header, start decode */
3906 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
3907 /* mime_last_input = mime_input_state.input; */
3908 mime_input_state.input = q;
3912 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
3914 /* Should we check length mod 4? */
3915 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
3918 /* In case of Incomplete MIME, no MIME decode */
3919 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
3920 mime_input_state.last = mime_input_state.input; /* point undecoded buffer */
3921 mime_decode_mode = 1; /* no decode on mime_input_buf last in mime_getc */
3922 switch_mime_getc(); /* anyway we need buffered getc */
3927 mime_begin_strict(FILE *f)
3931 const unsigned char *p,*q;
3932 nkf_char r[MAXRECOVER]; /* recovery buffer, max mime pattern length */
3934 mime_decode_mode = FALSE;
3935 /* =? has been checked */
3937 p = mime_pattern[j];
3940 for(i=2;p[i]>SP;i++) { /* start at =? */
3941 if (((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i]) {
3942 /* pattern fails, try next one */
3944 while (mime_pattern[++j]) {
3945 p = mime_pattern[j];
3946 for(k=2;k<i;k++) /* assume length(p) > i */
3947 if (p[k]!=q[k]) break;
3948 if (k==i && nkf_toupper(c1)==p[k]) break;
3950 p = mime_pattern[j];
3951 if (p) continue; /* found next one, continue */
3952 /* all fails, output from recovery buffer */
3960 mime_decode_mode = p[i-2];
3962 mime_iconv_back = iconv;
3963 set_iconv(FALSE, mime_priority_func[j]);
3964 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
3966 if (mime_decode_mode=='B') {
3967 mimebuf_f = unbuf_f;
3969 /* do MIME integrity check */
3970 return mime_integrity(f,mime_pattern[j]);
3984 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
3985 /* re-read and convert again from mime_buffer. */
3987 /* =? has been checked */
3988 k = mime_input_state.last;
3989 mime_input_buf(mime_input_state.last++)='='; mime_input_buf(mime_input_state.last++)='?';
3990 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
3991 /* We accept any character type even if it is breaked by new lines */
3992 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
3993 if (c1==LF||c1==SP||c1==CR||
3994 c1=='-'||c1=='_'||is_alnum(c1)) continue;
3996 /* Failed. But this could be another MIME preemble */
3998 mime_input_state.last--;
4004 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4005 if (!(++i<MAXRECOVER) || c1==EOF) break;
4006 if (c1=='b'||c1=='B') {
4007 mime_decode_mode = 'B';
4008 } else if (c1=='q'||c1=='Q') {
4009 mime_decode_mode = 'Q';
4013 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4014 if (!(++i<MAXRECOVER) || c1==EOF) break;
4016 mime_decode_mode = FALSE;
4022 if (!mime_decode_mode) {
4023 /* false MIME premble, restart from mime_buffer */
4024 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
4025 /* Since we are in MIME mode until buffer becomes empty, */
4026 /* we never go into mime_begin again for a while. */
4029 /* discard mime preemble, and goto MIME mode */
4030 mime_input_state.last = k;
4031 /* do no MIME integrity check */
4032 return c1; /* used only for checking EOF */
4043 debug(const char *str)
4046 fprintf(stderr, "%s\n", str ? str : "NULL");
4052 set_input_codename(const char *codename)
4054 if (!input_codename) {
4055 input_codename = codename;
4056 } else if (strcmp(codename, input_codename) != 0) {
4057 input_codename = "";
4062 get_guessed_code(void)
4064 if (input_codename && !*input_codename) {
4065 input_codename = "BINARY";
4067 struct input_code *p = find_inputcode_byfunc(iconv);
4068 if (!input_codename) {
4069 input_codename = "ASCII";
4070 } else if (strcmp(input_codename, "Shift_JIS") == 0) {
4071 if (p->score & (SCORE_DEPEND|SCORE_CP932))
4072 input_codename = "CP932";
4073 } else if (strcmp(input_codename, "EUC-JP") == 0) {
4074 if (p->score & (SCORE_X0212))
4075 input_codename = "EUCJP-MS";
4076 else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4077 input_codename = "CP51932";
4078 } else if (strcmp(input_codename, "ISO-2022-JP") == 0) {
4079 if (p->score & (SCORE_KANA))
4080 input_codename = "CP50221";
4081 else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4082 input_codename = "CP50220";
4085 return input_codename;
4088 #if !defined(PERL_XS) && !defined(WIN32DLL)
4090 print_guessed_code(char *filename)
4092 if (filename != NULL) printf("%s: ", filename);
4093 if (input_codename && !*input_codename) {
4096 input_codename = get_guessed_code();
4098 printf("%s\n", input_codename);
4102 input_eol == CR ? " (CR)" :
4103 input_eol == LF ? " (LF)" :
4104 input_eol == CRLF ? " (CRLF)" :
4105 input_eol == EOF ? " (MIXED NL)" :
4115 hex_getc(nkf_char ch, FILE *f, nkf_char (*g)(FILE *f), nkf_char (*u)(nkf_char c, FILE *f))
4117 nkf_char c1, c2, c3;
4123 if (!nkf_isxdigit(c2)){
4128 if (!nkf_isxdigit(c3)){
4133 return (hex2bin(c2) << 4) | hex2bin(c3);
4139 return hex_getc(':', f, i_cgetc, i_cungetc);
4143 cap_ungetc(nkf_char c, FILE *f)
4145 return (*i_cungetc)(c, f);
4151 return hex_getc('%', f, i_ugetc, i_uungetc);
4155 url_ungetc(nkf_char c, FILE *f)
4157 return (*i_uungetc)(c, f);
4161 #ifdef NUMCHAR_OPTION
4163 numchar_getc(FILE *f)
4165 nkf_char (*g)(FILE *) = i_ngetc;
4166 nkf_char (*u)(nkf_char c ,FILE *f) = i_nungetc;
4177 if (buf[i] == 'x' || buf[i] == 'X'){
4178 for (j = 0; j < 7; j++){
4180 if (!nkf_isxdigit(buf[i])){
4187 c |= hex2bin(buf[i]);
4190 for (j = 0; j < 8; j++){
4194 if (!nkf_isdigit(buf[i])){
4201 c += hex2bin(buf[i]);
4207 return nkf_char_unicode_new(c);
4217 numchar_ungetc(nkf_char c, FILE *f)
4219 return (*i_nungetc)(c, f);
4223 #ifdef UNICODE_NORMALIZATION
4232 nkf_ary_new(int length)
4234 nkf_ary *ary = nkf_malloc(sizeof(nkf_ary));
4235 ary->ary = nkf_malloc(length);
4236 ary->max_length = length;
4242 nkf_ary_dispose(nkf_ary *ary)
4248 #define nkf_ary_length(ary) ((ary)->count)
4249 #define nkf_ary_empty_p(ary) ((ary)->count == 0)
4251 static unsigned char
4252 nkf_ary_at(nkf_ary *ary, int index)
4254 assert(index <= ary->count);
4255 return ary->ary[index];
4259 nkf_ary_clear(nkf_ary *ary)
4264 static unsigned char
4265 nkf_ary_push(nkf_ary *ary, nkf_char c)
4267 assert(ary->max_length > ary->count);
4268 ary->ary[ary->count++] = c;
4272 static unsigned char
4273 nkf_ary_pop(nkf_ary *ary)
4275 assert(0 < ary->count);
4276 return ary->ary[--ary->count];
4279 /* Normalization Form C */
4283 nkf_char (*g)(FILE *f) = i_nfc_getc;
4284 nkf_char (*u)(nkf_char c ,FILE *f) = i_nfc_ungetc;
4285 nkf_ary *buf = nkf_ary_new(9);
4286 const unsigned char *array;
4287 int lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
4288 nkf_char c = (*g)(f);
4290 if (c == EOF || c > 0xFF || (c & 0xc0) == 0x80) return c;
4292 nkf_ary_push(buf, (unsigned char)c);
4294 while (lower <= upper) {
4295 int mid = (lower+upper) / 2;
4297 array = normalization_table[mid].nfd;
4298 for (len=0; len < NORMALIZATION_TABLE_NFD_LENGTH && array[len]; len++) {
4299 if (len >= nkf_ary_length(buf)) {
4303 lower = 1, upper = 0;
4306 nkf_ary_push(buf, c);
4308 if (array[len] != nkf_ary_at(buf, len)) {
4309 if (array[len] < nkf_ary_at(buf, len)) lower = mid + 1;
4310 else upper = mid - 1;
4317 array = normalization_table[mid].nfc;
4319 for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++)
4320 nkf_ary_push(buf, array[i]);
4324 } while (lower <= upper);
4326 while (nkf_ary_length(buf) > 1) (*u)(nkf_ary_pop(buf), f);
4327 c = nkf_ary_pop(buf);
4328 nkf_ary_dispose(buf);
4334 nfc_ungetc(nkf_char c, FILE *f)
4336 return (*i_nfc_ungetc)(c, f);
4338 #endif /* UNICODE_NORMALIZATION */
4342 base64decode(nkf_char c)
4347 i = c - 'A'; /* A..Z 0-25 */
4348 } else if (c == '_') {
4349 i = '?' /* 63 */ ; /* _ 63 */
4351 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
4353 } else if (c > '/') {
4354 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
4355 } else if (c == '+' || c == '-') {
4356 i = '>' /* 62 */ ; /* + and - 62 */
4358 i = '?' /* 63 */ ; /* / 63 */
4366 nkf_char c1, c2, c3, c4, cc;
4367 nkf_char t1, t2, t3, t4, mode, exit_mode;
4368 nkf_char lwsp_count;
4371 nkf_char lwsp_size = 128;
4373 if (mime_input_state.top != mime_input_state.last) { /* Something is in FIFO */
4374 return mime_input_buf(mime_input_state.top++);
4376 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
4377 mime_decode_mode=FALSE;
4378 unswitch_mime_getc();
4379 return (*i_getc)(f);
4382 if (mimebuf_f == FIXED_MIME)
4383 exit_mode = mime_decode_mode;
4386 if (mime_decode_mode == 'Q') {
4387 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4389 if (c1=='_' && mimebuf_f != FIXED_MIME) return SP;
4390 if (c1<=SP || DEL<=c1) {
4391 mime_decode_mode = exit_mode; /* prepare for quit */
4394 if (c1!='=' && (c1!='?' || mimebuf_f == FIXED_MIME)) {
4398 mime_decode_mode = exit_mode; /* prepare for quit */
4399 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
4400 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
4401 /* end Q encoding */
4402 input_mode = exit_mode;
4404 lwsp_buf = nkf_malloc((lwsp_size+5)*sizeof(char));
4405 while ((c1=(*i_getc)(f))!=EOF) {
4410 if ((c1=(*i_getc)(f))!=EOF && (c1==SP||c1==TAB)) {
4418 if ((c1=(*i_getc)(f))!=EOF && c1 == LF) {
4419 if ((c1=(*i_getc)(f))!=EOF && (c1==SP||c1==TAB)) {
4434 lwsp_buf[lwsp_count] = (unsigned char)c1;
4435 if (lwsp_count++>lwsp_size){
4437 lwsp_buf_new = nkf_realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4438 lwsp_buf = lwsp_buf_new;
4444 if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
4446 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4447 i_ungetc(lwsp_buf[lwsp_count],f);
4453 if (c1=='='&&c2<SP) { /* this is soft wrap */
4454 while((c1 = (*i_mgetc)(f)) <=SP) {
4455 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4457 mime_decode_mode = 'Q'; /* still in MIME */
4458 goto restart_mime_q;
4461 mime_decode_mode = 'Q'; /* still in MIME */
4465 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
4466 if (c2<=SP) return c2;
4467 mime_decode_mode = 'Q'; /* still in MIME */
4468 return ((hex2bin(c2)<<4) + hex2bin(c3));
4471 if (mime_decode_mode != 'B') {
4472 mime_decode_mode = FALSE;
4473 return (*i_mgetc)(f);
4477 /* Base64 encoding */
4479 MIME allows line break in the middle of
4480 Base64, but we are very pessimistic in decoding
4481 in unbuf mode because MIME encoded code may broken by
4482 less or editor's control sequence (such as ESC-[-K in unbuffered
4483 mode. ignore incomplete MIME.
4485 mode = mime_decode_mode;
4486 mime_decode_mode = exit_mode; /* prepare for quit */
4488 while ((c1 = (*i_mgetc)(f))<=SP) {
4493 if ((c2 = (*i_mgetc)(f))<=SP) {
4496 if (mime_f != STRICT_MIME) goto mime_c2_retry;
4497 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4500 if ((c1 == '?') && (c2 == '=')) {
4503 lwsp_buf = nkf_malloc((lwsp_size+5)*sizeof(char));
4504 while ((c1=(*i_getc)(f))!=EOF) {
4509 if ((c1=(*i_getc)(f))!=EOF && (c1==SP||c1==TAB)) {
4517 if ((c1=(*i_getc)(f))!=EOF) {
4521 } else if ((c1=(*i_getc)(f))!=EOF && (c1==SP||c1==TAB)) {
4536 lwsp_buf[lwsp_count] = (unsigned char)c1;
4537 if (lwsp_count++>lwsp_size){
4539 lwsp_buf_new = nkf_realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4540 lwsp_buf = lwsp_buf_new;
4546 if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
4548 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4549 i_ungetc(lwsp_buf[lwsp_count],f);
4556 if ((c3 = (*i_mgetc)(f))<=SP) {
4559 if (mime_f != STRICT_MIME) goto mime_c3_retry;
4560 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4564 if ((c4 = (*i_mgetc)(f))<=SP) {
4567 if (mime_f != STRICT_MIME) goto mime_c4_retry;
4568 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4572 mime_decode_mode = mode; /* still in MIME sigh... */
4574 /* BASE 64 decoding */
4576 t1 = 0x3f & base64decode(c1);
4577 t2 = 0x3f & base64decode(c2);
4578 t3 = 0x3f & base64decode(c3);
4579 t4 = 0x3f & base64decode(c4);
4580 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
4582 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
4583 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
4585 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
4586 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
4588 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
4593 return mime_input_buf(mime_input_state.top++);
4596 static const char basis_64[] =
4597 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
4599 #define MIMEOUT_BUF_LENGTH 74
4601 char buf[MIMEOUT_BUF_LENGTH+1];
4606 /*nkf_char mime_lastchar2, mime_lastchar1;*/
4609 open_mime(nkf_char mode)
4611 const unsigned char *p;
4614 p = mime_pattern[0];
4615 for(i=0;mime_pattern[i];i++) {
4616 if (mode == mime_encode[i]) {
4617 p = mime_pattern[i];
4621 mimeout_mode = mime_encode_method[i];
4623 if (base64_count>45) {
4624 if (mimeout_state.count>0 && nkf_isblank(mimeout_state.buf[i])){
4625 (*o_mputc)(mimeout_state.buf[i]);
4628 PUT_NEWLINE((*o_mputc));
4631 if (mimeout_state.count>0
4632 && (mimeout_state.buf[i]==SP || mimeout_state.buf[i]==TAB
4633 || mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF)) {
4637 for (;i<mimeout_state.count;i++) {
4638 if (mimeout_state.buf[i]==SP || mimeout_state.buf[i]==TAB
4639 || mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF) {
4640 (*o_mputc)(mimeout_state.buf[i]);
4650 j = mimeout_state.count;
4651 mimeout_state.count = 0;
4653 mime_putc(mimeout_state.buf[i]);
4658 mime_prechar(nkf_char c2, nkf_char c1)
4660 if (mimeout_mode > 0){
4662 if (base64_count + mimeout_state.count/3*4> 73){
4663 (*o_base64conv)(EOF,0);
4664 OCONV_NEWLINE((*o_base64conv));
4665 (*o_base64conv)(0,SP);
4669 if (base64_count + mimeout_state.count/3*4> 66) {
4670 (*o_base64conv)(EOF,0);
4671 OCONV_NEWLINE((*o_base64conv));
4672 (*o_base64conv)(0,SP);
4678 if (c2 != EOF && base64_count + mimeout_state.count/3*4> 60) {
4679 mimeout_mode = (output_mode==ASCII ||output_mode == ISO_8859_1) ? 'Q' : 'B';
4680 open_mime(output_mode);
4681 (*o_base64conv)(EOF,0);
4682 OCONV_NEWLINE((*o_base64conv));
4683 (*o_base64conv)(0,SP);
4702 switch(mimeout_mode) {
4707 (*o_mputc)(basis_64[((mimeout_state.state & 0x3)<< 4)]);
4713 (*o_mputc)(basis_64[((mimeout_state.state & 0xF) << 2)]);
4718 if (mimeout_mode > 0) {
4719 if (mimeout_f!=FIXED_MIME) {
4721 } else if (mimeout_mode != 'Q')
4727 mimeout_addchar(nkf_char c)
4729 switch(mimeout_mode) {
4734 } else if(!nkf_isalnum(c)) {
4736 (*o_mputc)(bin2hex(((c>>4)&0xf)));
4737 (*o_mputc)(bin2hex((c&0xf)));
4745 mimeout_state.state=c;
4746 (*o_mputc)(basis_64[c>>2]);
4751 (*o_mputc)(basis_64[((mimeout_state.state & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
4752 mimeout_state.state=c;
4757 (*o_mputc)(basis_64[((mimeout_state.state & 0xF) << 2) | ((c & 0xC0) >>6)]);
4758 (*o_mputc)(basis_64[c & 0x3F]);
4770 mime_putc(nkf_char c)
4775 if (mimeout_f == FIXED_MIME){
4776 if (mimeout_mode == 'Q'){
4777 if (base64_count > 71){
4778 if (c!=CR && c!=LF) {
4780 PUT_NEWLINE((*o_mputc));
4785 if (base64_count > 71){
4787 PUT_NEWLINE((*o_mputc));
4790 if (c == EOF) { /* c==EOF */
4794 if (c != EOF) { /* c==EOF */
4800 /* mimeout_f != FIXED_MIME */
4802 if (c == EOF) { /* c==EOF */
4803 if (mimeout_mode == -1 && mimeout_state.count > 1) open_mime(output_mode);
4804 j = mimeout_state.count;
4805 mimeout_state.count = 0;
4807 if (mimeout_mode > 0) {
4808 if (!nkf_isblank(mimeout_state.buf[j-1])) {
4810 if (nkf_isspace(mimeout_state.buf[i]) && base64_count < 71){
4813 mimeout_addchar(mimeout_state.buf[i]);
4817 mimeout_addchar(mimeout_state.buf[i]);
4821 mimeout_addchar(mimeout_state.buf[i]);
4827 mimeout_addchar(mimeout_state.buf[i]);
4833 if (mimeout_state.count > 0){
4834 lastchar = mimeout_state.buf[mimeout_state.count - 1];
4839 if (mimeout_mode=='Q') {
4840 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
4841 if (c == CR || c == LF) {
4846 } else if (c <= SP) {
4848 if (base64_count > 70) {
4849 PUT_NEWLINE((*o_mputc));
4852 if (!nkf_isblank(c)) {
4857 if (base64_count > 70) {
4859 PUT_NEWLINE((*o_mputc));
4862 open_mime(output_mode);
4864 if (!nkf_noescape_mime(c)) {
4875 if (mimeout_mode <= 0) {
4876 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
4877 if (nkf_isspace(c)) {
4879 if (mimeout_mode == -1) {
4882 if (c==CR || c==LF) {
4884 open_mime(output_mode);
4890 for (i=0;i<mimeout_state.count;i++) {
4891 (*o_mputc)(mimeout_state.buf[i]);
4892 if (mimeout_state.buf[i] == CR || mimeout_state.buf[i] == LF){
4903 mimeout_state.buf[0] = (char)c;
4904 mimeout_state.count = 1;
4906 if (base64_count > 1
4907 && base64_count + mimeout_state.count > 76
4908 && mimeout_state.buf[0] != CR && mimeout_state.buf[0] != LF){
4909 static const char *str = "boundary=\"";
4910 static int len = 10;
4913 for (; i < mimeout_state.count - len; ++i) {
4914 if (!strncmp(mimeout_state.buf+i, str, len)) {
4920 if (i == 0 || i == mimeout_state.count - len) {
4921 PUT_NEWLINE((*o_mputc));
4923 if (!nkf_isspace(mimeout_state.buf[0])){
4930 for (j = 0; j <= i; ++j) {
4931 (*o_mputc)(mimeout_state.buf[j]);
4933 PUT_NEWLINE((*o_mputc));
4935 for (; j <= mimeout_state.count; ++j) {
4936 mimeout_state.buf[j - i] = mimeout_state.buf[j];
4938 mimeout_state.count -= i;
4941 mimeout_state.buf[mimeout_state.count++] = (char)c;
4942 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
4943 open_mime(output_mode);
4948 if (lastchar==CR || lastchar == LF){
4949 for (i=0;i<mimeout_state.count;i++) {
4950 (*o_mputc)(mimeout_state.buf[i]);
4953 mimeout_state.count = 0;
4956 for (i=0;i<mimeout_state.count-1;i++) {
4957 (*o_mputc)(mimeout_state.buf[i]);
4960 mimeout_state.buf[0] = SP;
4961 mimeout_state.count = 1;
4963 open_mime(output_mode);
4966 /* mimeout_mode == 'B', 1, 2 */
4967 if ( c<=DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
4968 if (lastchar == CR || lastchar == LF){
4969 if (nkf_isblank(c)) {
4970 for (i=0;i<mimeout_state.count;i++) {
4971 mimeout_addchar(mimeout_state.buf[i]);
4973 mimeout_state.count = 0;
4974 } else if (SP<c && c<DEL) {
4976 for (i=0;i<mimeout_state.count;i++) {
4977 (*o_mputc)(mimeout_state.buf[i]);
4980 mimeout_state.count = 0;
4982 mimeout_state.buf[mimeout_state.count++] = (char)c;
4985 if (c==SP || c==TAB || c==CR || c==LF) {
4986 for (i=0;i<mimeout_state.count;i++) {
4987 if (SP<mimeout_state.buf[i] && mimeout_state.buf[i]<DEL) {
4989 for (i=0;i<mimeout_state.count;i++) {
4990 (*o_mputc)(mimeout_state.buf[i]);
4993 mimeout_state.count = 0;
4996 mimeout_state.buf[mimeout_state.count++] = (char)c;
4997 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
4999 for (i=0;i<mimeout_state.count;i++) {
5000 (*o_mputc)(mimeout_state.buf[i]);
5003 mimeout_state.count = 0;
5007 if (mimeout_state.count>0 && SP<c && c!='=') {
5008 mimeout_state.buf[mimeout_state.count++] = (char)c;
5009 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
5010 j = mimeout_state.count;
5011 mimeout_state.count = 0;
5013 mimeout_addchar(mimeout_state.buf[i]);
5020 if (mimeout_state.count>0) {
5021 j = mimeout_state.count;
5022 mimeout_state.count = 0;
5024 if (mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF)
5026 mimeout_addchar(mimeout_state.buf[i]);
5032 (*o_mputc)(mimeout_state.buf[i]);
5034 open_mime(output_mode);
5041 base64_conv(nkf_char c2, nkf_char c1)
5043 mime_prechar(c2, c1);
5044 (*o_base64conv)(c2,c1);
5048 typedef struct nkf_iconv_t {
5051 size_t input_buffer_size;
5052 char *output_buffer;
5053 size_t output_buffer_size;
5057 nkf_iconv_new(char *tocode, char *fromcode)
5059 nkf_iconv_t converter;
5061 converter->input_buffer_size = IOBUF_SIZE;
5062 converter->input_buffer = nkf_malloc(converter->input_buffer_size);
5063 converter->output_buffer_size = IOBUF_SIZE * 2;
5064 converter->output_buffer = nkf_malloc(converter->output_buffer_size);
5065 converter->cd = iconv_open(tocode, fromcode);
5066 if (converter->cd == (iconv_t)-1)
5070 perror(fprintf("iconv doesn't support %s to %s conversion.", fromcode, tocode));
5073 perror("can't iconv_open");
5079 nkf_iconv_convert(nkf_iconv_t *converter, FILE *input)
5081 size_t invalid = (size_t)0;
5082 char *input_buffer = converter->input_buffer;
5083 size_t input_length = (size_t)0;
5084 char *output_buffer = converter->output_buffer;
5085 size_t output_length = converter->output_buffer_size;
5090 while ((c = (*i_getc)(f)) != EOF) {
5091 input_buffer[input_length++] = c;
5092 if (input_length < converter->input_buffer_size) break;
5096 size_t ret = iconv(converter->cd, &input_buffer, &input_length, &output_buffer, &output_length);
5097 while (output_length-- > 0) {
5098 (*o_putc)(output_buffer[converter->output_buffer_size-output_length]);
5100 if (ret == (size_t) - 1) {
5103 if (input_buffer != converter->input_buffer)
5104 memmove(converter->input_buffer, input_buffer, input_length);
5107 converter->output_buffer_size *= 2;
5108 output_buffer = realloc(converter->outbuf, converter->output_buffer_size);
5109 if (output_buffer == NULL) {
5110 perror("can't realloc");
5113 converter->output_buffer = output_buffer;
5116 perror("can't iconv");
5129 nkf_iconv_close(nkf_iconv_t *convert)
5131 nkf_free(converter->inbuf);
5132 nkf_free(converter->outbuf);
5133 iconv_close(converter->cd);
5142 struct input_code *p = input_code_list;
5154 mime_f = MIME_DECODE_DEFAULT;
5155 mime_decode_f = FALSE;
5160 x0201_f = X0201_DEFAULT;
5161 iso2022jp_f = FALSE;
5162 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
5163 ms_ucs_map_f = UCS_MAP_ASCII;
5165 #ifdef UTF8_INPUT_ENABLE
5166 no_cp932ext_f = FALSE;
5167 no_best_fit_chars_f = FALSE;
5168 encode_fallback = NULL;
5169 unicode_subchar = '?';
5170 input_endian = ENDIAN_BIG;
5172 #ifdef UTF8_OUTPUT_ENABLE
5173 output_bom_f = FALSE;
5174 output_endian = ENDIAN_BIG;
5176 #ifdef UNICODE_NORMALIZATION
5192 #ifdef SHIFTJIS_CP932
5202 for (i = 0; i < 256; i++){
5203 prefix_table[i] = 0;
5207 mimeout_state.count = 0;
5212 fold_preserve_f = FALSE;
5215 kanji_intro = DEFAULT_J;
5216 ascii_intro = DEFAULT_R;
5217 fold_margin = FOLD_MARGIN;
5218 o_zconv = no_connection;
5219 o_fconv = no_connection;
5220 o_eol_conv = no_connection;
5221 o_rot_conv = no_connection;
5222 o_hira_conv = no_connection;
5223 o_base64conv = no_connection;
5224 o_iso2022jp_check_conv = no_connection;
5227 i_ungetc = std_ungetc;
5229 i_bungetc = std_ungetc;
5232 i_mungetc = std_ungetc;
5233 i_mgetc_buf = std_getc;
5234 i_mungetc_buf = std_ungetc;
5235 output_mode = ASCII;
5237 mime_decode_mode = FALSE;
5243 init_broken_state();
5244 z_prev2=0,z_prev1=0;
5246 iconv_for_check = 0;
5248 input_codename = NULL;
5249 input_encoding = NULL;
5250 output_encoding = NULL;
5257 module_connection(void)
5259 if (input_encoding) set_input_encoding(input_encoding);
5260 if (!output_encoding) {
5261 output_encoding = nkf_default_encoding();
5263 if (!output_encoding) {
5264 if (noout_f || guess_f) output_encoding = nkf_enc_from_index(ISO_2022_JP);
5267 set_output_encoding(output_encoding);
5268 oconv = nkf_enc_to_oconv(output_encoding);
5271 /* replace continucation module, from output side */
5273 /* output redicrection */
5275 if (noout_f || guess_f){
5282 if (mimeout_f == TRUE) {
5283 o_base64conv = oconv; oconv = base64_conv;
5285 /* base64_count = 0; */
5288 if (eolmode_f || guess_f) {
5289 o_eol_conv = oconv; oconv = eol_conv;
5292 o_rot_conv = oconv; oconv = rot_conv;
5295 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
5298 o_hira_conv = oconv; oconv = hira_conv;
5301 o_fconv = oconv; oconv = fold_conv;
5304 if (alpha_f || x0201_f) {
5305 o_zconv = oconv; oconv = z_conv;
5309 i_ungetc = std_ungetc;
5310 /* input redicrection */
5313 i_cgetc = i_getc; i_getc = cap_getc;
5314 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
5317 i_ugetc = i_getc; i_getc = url_getc;
5318 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
5321 #ifdef NUMCHAR_OPTION
5323 i_ngetc = i_getc; i_getc = numchar_getc;
5324 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
5327 #ifdef UNICODE_NORMALIZATION
5329 i_nfc_getc = i_getc; i_getc = nfc_getc;
5330 i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
5333 if (mime_f && mimebuf_f==FIXED_MIME) {
5334 i_mgetc = i_getc; i_getc = mime_getc;
5335 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
5338 i_bgetc = i_getc; i_getc = broken_getc;
5339 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
5341 if (input_encoding) {
5342 set_iconv(-TRUE, nkf_enc_to_iconv(input_encoding));
5344 set_iconv(FALSE, e_iconv);
5348 struct input_code *p = input_code_list;
5357 Conversion main loop. Code detection only.
5360 #if !defined(PERL_XS) && !defined(WIN32DLL)
5367 module_connection();
5368 while ((c = (*i_getc)(f)) != EOF)
5375 #define NEXT continue /* no output, get next */
5376 #define SKIP c2=0;continue /* no output, get next */
5377 #define MORE c2=c1;continue /* need one more byte */
5378 #define SEND ; /* output c1 and c2, get next */
5379 #define LAST break /* end of loop, go closing */
5380 #define set_input_mode(mode) do { \
5381 input_mode = mode; \
5383 set_input_codename("ISO-2022-JP"); \
5384 debug("ISO-2022-JP"); \
5388 kanji_convert(FILE *f)
5390 nkf_char c1=0, c2=0, c3=0, c4=0;
5391 int shift_mode = 0; /* 0, 1, 2, 3 */
5393 int is_8bit = FALSE;
5395 if (input_encoding && !nkf_enc_asciicompat(input_encoding)) {
5400 output_mode = ASCII;
5402 if (module_connection() < 0) {
5403 #if !defined(PERL_XS) && !defined(WIN32DLL)
5404 fprintf(stderr, "no output encoding given\n");
5410 #ifdef UTF8_INPUT_ENABLE
5411 if(iconv == w_iconv32){
5412 while ((c1 = (*i_getc)(f)) != EOF &&
5413 (c2 = (*i_getc)(f)) != EOF &&
5414 (c3 = (*i_getc)(f)) != EOF &&
5415 (c4 = (*i_getc)(f)) != EOF) {
5416 nkf_iconv_utf_32(c1, c2, c3, c4);
5418 (*i_ungetc)(EOF, f);
5420 else if (iconv == w_iconv16) {
5421 while ((c1 = (*i_getc)(f)) != EOF &&
5422 (c2 = (*i_getc)(f)) != EOF) {
5423 if (nkf_iconv_utf_16(c1, c2, 0, 0) == -2 &&
5424 (c3 = (*i_getc)(f)) != EOF &&
5425 (c4 = (*i_getc)(f)) != EOF) {
5426 nkf_iconv_utf_16(c1, c2, c3, c4);
5429 (*i_ungetc)(EOF, f);
5433 while ((c1 = (*i_getc)(f)) != EOF) {
5434 #ifdef INPUT_CODE_FIX
5435 if (!input_encoding)
5441 /* in case of 8th bit is on */
5442 if (!estab_f&&!mime_decode_mode) {
5443 /* in case of not established yet */
5444 /* It is still ambiguious */
5445 if (h_conv(f, c2, c1)==EOF) {
5453 /* in case of already established */
5455 /* ignore bogus code */
5463 /* 2nd byte of 7 bit code or SJIS */
5467 else if (nkf_char_unicode_p(c1)) {
5473 if (input_mode == JIS_X_0208 && DEL <= c1 && c1 < 0x92) {
5476 } else if (c1 > DEL) {
5478 if (!estab_f && !iso8859_f) {
5479 /* not established yet */
5481 } else { /* estab_f==TRUE */
5487 else if ((iconv == s_iconv && 0xA0 <= c1 && c1 <= 0xDF) ||
5488 (ms_ucs_map_f == UCS_MAP_CP10001 && (c1 == 0xFD || c1 == 0xFE))) {
5490 c2 = JIS_X_0201_1976_K;
5495 /* already established */
5499 } else if (SP < c1 && c1 < DEL) {
5500 /* in case of Roman characters */
5502 /* output 1 shifted byte */
5506 } else if (nkf_byte_jisx0201_katakana_p(c1)){
5507 /* output 1 shifted byte */
5508 c2 = JIS_X_0201_1976_K;
5511 /* look like bogus code */
5514 } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
5515 input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) {
5516 /* in case of Kanji shifted */
5518 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
5519 /* Check MIME code */
5520 if ((c1 = (*i_getc)(f)) == EOF) {
5523 } else if (c1 == '?') {
5524 /* =? is mime conversion start sequence */
5525 if(mime_f == STRICT_MIME) {
5526 /* check in real detail */
5527 if (mime_begin_strict(f) == EOF)
5530 } else if (mime_begin(f) == EOF)
5539 /* normal ASCII code */
5542 } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
5545 } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
5548 } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
5549 if ((c1 = (*i_getc)(f)) == EOF) {
5550 /* (*oconv)(0, ESC); don't send bogus code */
5553 else if (c1 == '&') {
5555 if ((c1 = (*i_getc)(f)) == EOF) {
5561 else if (c1 == '$') {
5563 if ((c1 = (*i_getc)(f)) == EOF) {
5564 /* don't send bogus code
5566 (*oconv)(0, '$'); */
5568 } else if (c1 == '@' || c1 == 'B') {
5570 set_input_mode(JIS_X_0208);
5572 } else if (c1 == '(') {
5574 if ((c1 = (*i_getc)(f)) == EOF) {
5575 /* don't send bogus code
5581 } else if (c1 == '@'|| c1 == 'B') {
5583 set_input_mode(JIS_X_0208);
5586 } else if (c1 == 'D'){
5587 set_input_mode(JIS_X_0212);
5589 #endif /* X0212_ENABLE */
5590 } else if (c1 == 'O' || c1 == 'Q'){
5591 set_input_mode(JIS_X_0213_1);
5593 } else if (c1 == 'P'){
5594 set_input_mode(JIS_X_0213_2);
5597 /* could be some special code */
5604 } else if (broken_f&0x2) {
5605 /* accept any ESC-(-x as broken code ... */
5606 input_mode = JIS_X_0208;
5615 } else if (c1 == '(') {
5617 if ((c1 = (*i_getc)(f)) == EOF) {
5618 /* don't send bogus code
5620 (*oconv)(0, '('); */
5623 else if (c1 == 'I') {
5624 /* JIS X 0201 Katakana */
5625 set_input_mode(JIS_X_0201_1976_K);
5628 else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
5629 /* ISO-646IRV:1983 or JIS X 0201 Roman or JUNET */
5630 set_input_mode(ASCII);
5633 else if (broken_f&0x2) {
5634 set_input_mode(ASCII);
5643 else if (c1 == '.') {
5645 if ((c1 = (*i_getc)(f)) == EOF) {
5648 else if (c1 == 'A') {
5659 else if (c1 == 'N') {
5662 if (g2 == ISO_8859_1) {
5677 } else if (c1 == ESC && iconv == s_iconv) {
5678 /* ESC in Shift_JIS */
5679 if ((c1 = (*i_getc)(f)) == EOF) {
5680 /* (*oconv)(0, ESC); don't send bogus code */
5682 } else if (c1 == '$') {
5684 if ((c1 = (*i_getc)(f)) == EOF) {
5686 } else if (('E' <= c1 && c1 <= 'G') ||
5687 ('O' <= c1 && c1 <= 'Q')) {
5695 static const nkf_char jphone_emoji_first_table[7] =
5696 {0xE1E0, 0xDFE0, 0xE2E0, 0xE3E0, 0xE4E0, 0xDFE0, 0xE0E0};
5697 c3 = nkf_char_unicode_new(jphone_emoji_first_table[c1 % 7]);
5698 if ((c1 = (*i_getc)(f)) == EOF) LAST;
5699 while (SP <= c1 && c1 <= 'z') {
5700 (*oconv)(0, c1 + c3);
5701 if ((c1 = (*i_getc)(f)) == EOF) LAST;
5716 } else if (c1 == LF || c1 == CR) {
5718 input_mode = ASCII; set_iconv(FALSE, 0);
5720 } else if (mime_decode_f && !mime_decode_mode){
5722 if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
5730 } else { /* if (c1 == CR)*/
5731 if ((c1=(*i_getc)(f))!=EOF) {
5735 } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
5755 switch ((*iconv)(c2, c1, 0)) { /* can be EUC / SJIS / UTF-8 */
5758 if ((c3 = (*i_getc)(f)) != EOF) {
5761 if ((c4 = (*i_getc)(f)) != EOF) {
5763 (*iconv)(c2, c1, c3|c4);
5768 /* 3 bytes EUC or UTF-8 */
5769 if ((c3 = (*i_getc)(f)) != EOF) {
5771 (*iconv)(c2, c1, c3);
5779 0x7F <= c2 && c2 <= 0x92 &&
5780 0x21 <= c1 && c1 <= 0x7E) {
5782 c1 = nkf_char_unicode_new((c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000);
5785 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
5789 (*oconv)(PREFIX_EUCG3 | c2, c1);
5791 #endif /* X0212_ENABLE */
5793 (*oconv)(PREFIX_EUCG3 | c2, c1);
5796 (*oconv)(input_mode, c1); /* other special case */
5802 /* goto next_word */
5806 (*iconv)(EOF, 0, 0);
5807 if (!input_codename)
5810 struct input_code *p = input_code_list;
5811 struct input_code *result = p;
5813 if (p->score < result->score) result = p;
5816 set_input_codename(result->name);
5818 debug(result->name);
5826 * int options(unsigned char *cp)
5833 options(unsigned char *cp)
5837 unsigned char *cp_back = NULL;
5842 while(*cp && *cp++!='-');
5843 while (*cp || cp_back) {
5851 case '-': /* literal options */
5852 if (!*cp || *cp == SP) { /* ignore the rest of arguments */
5856 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
5857 p = (unsigned char *)long_option[i].name;
5858 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
5859 if (*p == cp[j] || cp[j] == SP){
5866 #if !defined(PERL_XS) && !defined(WIN32DLL)
5867 fprintf(stderr, "unknown long option: --%s\n", cp);
5871 while(*cp && *cp != SP && cp++);
5872 if (long_option[i].alias[0]){
5874 cp = (unsigned char *)long_option[i].alias;
5876 if (strcmp(long_option[i].name, "ic=") == 0){
5877 enc = nkf_enc_find((char *)p);
5879 input_encoding = enc;
5882 if (strcmp(long_option[i].name, "oc=") == 0){
5883 enc = nkf_enc_find((char *)p);
5884 /* if (enc <= 0) continue; */
5886 output_encoding = enc;
5889 if (strcmp(long_option[i].name, "guess=") == 0){
5890 if (p[0] == '0' || p[0] == '1') {
5898 if (strcmp(long_option[i].name, "overwrite") == 0){
5901 preserve_time_f = TRUE;
5904 if (strcmp(long_option[i].name, "overwrite=") == 0){
5907 preserve_time_f = TRUE;
5909 backup_suffix = (char *)p;
5912 if (strcmp(long_option[i].name, "in-place") == 0){
5915 preserve_time_f = FALSE;
5918 if (strcmp(long_option[i].name, "in-place=") == 0){
5921 preserve_time_f = FALSE;
5923 backup_suffix = (char *)p;
5928 if (strcmp(long_option[i].name, "cap-input") == 0){
5932 if (strcmp(long_option[i].name, "url-input") == 0){
5937 #ifdef NUMCHAR_OPTION
5938 if (strcmp(long_option[i].name, "numchar-input") == 0){
5944 if (strcmp(long_option[i].name, "no-output") == 0){
5948 if (strcmp(long_option[i].name, "debug") == 0){
5953 if (strcmp(long_option[i].name, "cp932") == 0){
5954 #ifdef SHIFTJIS_CP932
5958 #ifdef UTF8_OUTPUT_ENABLE
5959 ms_ucs_map_f = UCS_MAP_CP932;
5963 if (strcmp(long_option[i].name, "no-cp932") == 0){
5964 #ifdef SHIFTJIS_CP932
5968 #ifdef UTF8_OUTPUT_ENABLE
5969 ms_ucs_map_f = UCS_MAP_ASCII;
5973 #ifdef SHIFTJIS_CP932
5974 if (strcmp(long_option[i].name, "cp932inv") == 0){
5981 if (strcmp(long_option[i].name, "x0212") == 0){
5988 if (strcmp(long_option[i].name, "exec-in") == 0){
5992 if (strcmp(long_option[i].name, "exec-out") == 0){
5997 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
5998 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
5999 no_cp932ext_f = TRUE;
6002 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
6003 no_best_fit_chars_f = TRUE;
6006 if (strcmp(long_option[i].name, "fb-skip") == 0){
6007 encode_fallback = NULL;
6010 if (strcmp(long_option[i].name, "fb-html") == 0){
6011 encode_fallback = encode_fallback_html;
6014 if (strcmp(long_option[i].name, "fb-xml") == 0){
6015 encode_fallback = encode_fallback_xml;
6018 if (strcmp(long_option[i].name, "fb-java") == 0){
6019 encode_fallback = encode_fallback_java;
6022 if (strcmp(long_option[i].name, "fb-perl") == 0){
6023 encode_fallback = encode_fallback_perl;
6026 if (strcmp(long_option[i].name, "fb-subchar") == 0){
6027 encode_fallback = encode_fallback_subchar;
6030 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
6031 encode_fallback = encode_fallback_subchar;
6032 unicode_subchar = 0;
6034 /* decimal number */
6035 for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
6036 unicode_subchar *= 10;
6037 unicode_subchar += hex2bin(p[i]);
6039 }else if(p[1] == 'x' || p[1] == 'X'){
6040 /* hexadecimal number */
6041 for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
6042 unicode_subchar <<= 4;
6043 unicode_subchar |= hex2bin(p[i]);
6047 for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
6048 unicode_subchar *= 8;
6049 unicode_subchar += hex2bin(p[i]);
6052 w16e_conv(unicode_subchar, &i, &j);
6053 unicode_subchar = i<<8 | j;
6057 #ifdef UTF8_OUTPUT_ENABLE
6058 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
6059 ms_ucs_map_f = UCS_MAP_MS;
6063 #ifdef UNICODE_NORMALIZATION
6064 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
6069 if (strcmp(long_option[i].name, "prefix=") == 0){
6070 if (nkf_isgraph(p[0])){
6071 for (i = 1; nkf_isgraph(p[i]); i++){
6072 prefix_table[p[i]] = p[0];
6077 #if !defined(PERL_XS) && !defined(WIN32DLL)
6078 fprintf(stderr, "unsupported long option: --%s\n", long_option[i].name);
6083 case 'b': /* buffered mode */
6086 case 'u': /* non bufferd mode */
6089 case 't': /* transparent mode */
6094 } else if (*cp=='2') {
6098 * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
6106 case 'j': /* JIS output */
6108 output_encoding = nkf_enc_from_index(ISO_2022_JP);
6110 case 'e': /* AT&T EUC output */
6111 output_encoding = nkf_enc_from_index(EUCJP_NKF);
6113 case 's': /* SJIS output */
6114 output_encoding = nkf_enc_from_index(WINDOWS_31J);
6116 case 'l': /* ISO8859 Latin-1 support, no conversion */
6117 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
6118 input_encoding = nkf_enc_from_index(ISO_8859_1);
6120 case 'i': /* Kanji IN ESC-$-@/B */
6121 if (*cp=='@'||*cp=='B')
6122 kanji_intro = *cp++;
6124 case 'o': /* ASCII IN ESC-(-J/B */
6125 if (*cp=='J'||*cp=='B'||*cp=='H')
6126 ascii_intro = *cp++;
6130 bit:1 katakana->hiragana
6131 bit:2 hiragana->katakana
6133 if ('9'>= *cp && *cp>='0')
6134 hira_f |= (*cp++ -'0');
6141 #if defined(MSDOS) || defined(__OS2__)
6148 show_configuration();
6156 #ifdef UTF8_OUTPUT_ENABLE
6157 case 'w': /* UTF-8 output */
6162 output_encoding = nkf_enc_from_index(UTF_8N);
6164 output_bom_f = TRUE;
6165 output_encoding = nkf_enc_from_index(UTF_8_BOM);
6169 if ('1'== cp[0] && '6'==cp[1]) {
6172 } else if ('3'== cp[0] && '2'==cp[1]) {
6176 output_encoding = nkf_enc_from_index(UTF_8);
6181 output_endian = ENDIAN_LITTLE;
6182 } else if (cp[0] == 'B') {
6185 output_encoding = nkf_enc_from_index(enc_idx);
6190 enc_idx = enc_idx == UTF_16
6191 ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
6192 : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
6194 output_bom_f = TRUE;
6195 enc_idx = enc_idx == UTF_16
6196 ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
6197 : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
6199 output_encoding = nkf_enc_from_index(enc_idx);
6203 #ifdef UTF8_INPUT_ENABLE
6204 case 'W': /* UTF input */
6207 input_encoding = nkf_enc_from_index(UTF_8);
6210 if ('1'== cp[0] && '6'==cp[1]) {
6212 input_endian = ENDIAN_BIG;
6214 } else if ('3'== cp[0] && '2'==cp[1]) {
6216 input_endian = ENDIAN_BIG;
6219 input_encoding = nkf_enc_from_index(UTF_8);
6224 input_endian = ENDIAN_LITTLE;
6225 } else if (cp[0] == 'B') {
6227 input_endian = ENDIAN_BIG;
6229 enc_idx = (enc_idx == UTF_16
6230 ? (input_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
6231 : (input_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE));
6232 input_encoding = nkf_enc_from_index(enc_idx);
6236 /* Input code assumption */
6237 case 'J': /* ISO-2022-JP input */
6238 input_encoding = nkf_enc_from_index(ISO_2022_JP);
6240 case 'E': /* EUC-JP input */
6241 input_encoding = nkf_enc_from_index(EUCJP_NKF);
6243 case 'S': /* Windows-31J input */
6244 input_encoding = nkf_enc_from_index(WINDOWS_31J);
6246 case 'Z': /* Convert X0208 alphabet to asii */
6248 bit:0 Convert JIS X 0208 Alphabet to ASCII
6249 bit:1 Convert Kankaku to one space
6250 bit:2 Convert Kankaku to two spaces
6251 bit:3 Convert HTML Entity
6252 bit:4 Convert JIS X 0208 Katakana to JIS X 0201 Katakana
6254 while ('0'<= *cp && *cp <='9') {
6255 alpha_f |= 1 << (*cp++ - '0');
6257 if (!alpha_f) alpha_f = 1;
6259 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
6260 x0201_f = FALSE; /* No X0201->X0208 conversion */
6262 ESC-(-I in JIS, EUC, MS Kanji
6263 SI/SO in JIS, EUC, MS Kanji
6264 SS2 in EUC, JIS, not in MS Kanji
6265 MS Kanji (0xa0-0xdf)
6267 ESC-(-I in JIS (0x20-0x5f)
6268 SS2 in EUC (0xa0-0xdf)
6269 0xa0-0xd in MS Kanji (0xa0-0xdf)
6272 case 'X': /* Convert X0201 kana to X0208 */
6275 case 'F': /* prserve new lines */
6276 fold_preserve_f = TRUE;
6277 case 'f': /* folding -f60 or -f */
6280 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
6282 fold_len += *cp++ - '0';
6284 if (!(0<fold_len && fold_len<BUFSIZ))
6285 fold_len = DEFAULT_FOLD;
6289 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
6291 fold_margin += *cp++ - '0';
6295 case 'm': /* MIME support */
6296 /* mime_decode_f = TRUE; */ /* this has too large side effects... */
6297 if (*cp=='B'||*cp=='Q') {
6298 mime_decode_mode = *cp++;
6299 mimebuf_f = FIXED_MIME;
6300 } else if (*cp=='N') {
6301 mime_f = TRUE; cp++;
6302 } else if (*cp=='S') {
6303 mime_f = STRICT_MIME; cp++;
6304 } else if (*cp=='0') {
6305 mime_decode_f = FALSE;
6306 mime_f = FALSE; cp++;
6308 mime_f = STRICT_MIME;
6311 case 'M': /* MIME output */
6314 mimeout_f = FIXED_MIME; cp++;
6315 } else if (*cp=='Q') {
6317 mimeout_f = FIXED_MIME; cp++;
6322 case 'B': /* Broken JIS support */
6324 bit:1 allow any x on ESC-(-x or ESC-$-x
6325 bit:2 reset to ascii on NL
6327 if ('9'>= *cp && *cp>='0')
6328 broken_f |= 1<<(*cp++ -'0');
6333 case 'O':/* for Output file */
6337 case 'c':/* add cr code */
6340 case 'd':/* delete cr code */
6343 case 'I': /* ISO-2022-JP output */
6346 case 'L': /* line mode */
6347 if (*cp=='u') { /* unix */
6348 eolmode_f = LF; cp++;
6349 } else if (*cp=='m') { /* mac */
6350 eolmode_f = CR; cp++;
6351 } else if (*cp=='w') { /* windows */
6352 eolmode_f = CRLF; cp++;
6353 } else if (*cp=='0') { /* no conversion */
6354 eolmode_f = 0; cp++;
6359 if ('2' <= *cp && *cp <= '9') {
6362 } else if (*cp == '0' || *cp == '1') {
6371 /* module muliple options in a string are allowed for Perl moudle */
6372 while(*cp && *cp++!='-');
6375 #if !defined(PERL_XS) && !defined(WIN32DLL)
6376 fprintf(stderr, "unknown option: -%c\n", *(cp-1));
6378 /* bogus option but ignored */
6386 #include "nkf32dll.c"
6387 #elif defined(PERL_XS)
6388 #else /* WIN32DLL */
6390 main(int argc, char **argv)
6395 char *outfname = NULL;
6398 #ifdef EASYWIN /*Easy Win */
6399 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
6401 #ifdef DEFAULT_CODE_LOCALE
6402 setlocale(LC_CTYPE, "");
6404 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
6405 cp = (unsigned char *)*argv;
6410 if (pipe(fds) < 0 || (pid = fork()) < 0){
6421 execvp(argv[1], &argv[1]);
6438 int debug_f_back = debug_f;
6441 int exec_f_back = exec_f;
6444 int x0212_f_back = x0212_f;
6446 int x0213_f_back = x0213_f;
6447 int guess_f_back = guess_f;
6449 guess_f = guess_f_back;
6452 debug_f = debug_f_back;
6455 exec_f = exec_f_back;
6457 x0212_f = x0212_f_back;
6458 x0213_f = x0213_f_back;
6461 if (binmode_f == TRUE)
6462 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6463 if (freopen("","wb",stdout) == NULL)
6470 setbuf(stdout, (char *) NULL);
6472 setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
6475 if (binmode_f == TRUE)
6476 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6477 if (freopen("","rb",stdin) == NULL) return (-1);
6481 setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
6485 kanji_convert(stdin);
6486 if (guess_f) print_guessed_code(NULL);
6490 int is_argument_error = FALSE;
6492 input_codename = NULL;
6495 iconv_for_check = 0;
6497 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
6499 is_argument_error = TRUE;
6507 /* reopen file for stdout */
6508 if (file_out_f == TRUE) {
6511 outfname = nkf_malloc(strlen(origfname)
6512 + strlen(".nkftmpXXXXXX")
6514 strcpy(outfname, origfname);
6518 for (i = strlen(outfname); i; --i){
6519 if (outfname[i - 1] == '/'
6520 || outfname[i - 1] == '\\'){
6526 strcat(outfname, "ntXXXXXX");
6528 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
6529 S_IREAD | S_IWRITE);
6531 strcat(outfname, ".nkftmpXXXXXX");
6532 fd = mkstemp(outfname);
6535 || (fd_backup = dup(fileno(stdout))) < 0
6536 || dup2(fd, fileno(stdout)) < 0
6547 outfname = "nkf.out";
6550 if(freopen(outfname, "w", stdout) == NULL) {
6554 if (binmode_f == TRUE) {
6555 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6556 if (freopen("","wb",stdout) == NULL)
6563 if (binmode_f == TRUE)
6564 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6565 if (freopen("","rb",fin) == NULL)
6570 setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
6574 char *filename = NULL;
6576 if (nfiles > 1) filename = origfname;
6577 if (guess_f) print_guessed_code(filename);
6583 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
6591 if (dup2(fd_backup, fileno(stdout)) < 0){
6594 if (stat(origfname, &sb)) {
6595 fprintf(stderr, "Can't stat %s\n", origfname);
6597 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
6598 if (chmod(outfname, sb.st_mode)) {
6599 fprintf(stderr, "Can't set permission %s\n", outfname);
6602 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
6603 if(preserve_time_f){
6604 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
6605 tb[0] = tb[1] = sb.st_mtime;
6606 if (utime(outfname, tb)) {
6607 fprintf(stderr, "Can't set timestamp %s\n", outfname);
6610 tb.actime = sb.st_atime;
6611 tb.modtime = sb.st_mtime;
6612 if (utime(outfname, &tb)) {
6613 fprintf(stderr, "Can't set timestamp %s\n", outfname);
6618 char *backup_filename = get_backup_filename(backup_suffix, origfname);
6620 unlink(backup_filename);
6622 if (rename(origfname, backup_filename)) {
6623 perror(backup_filename);
6624 fprintf(stderr, "Can't rename %s to %s\n",
6625 origfname, backup_filename);
6627 nkf_free(backup_filename);
6630 if (unlink(origfname)){
6635 if (rename(outfname, origfname)) {
6637 fprintf(stderr, "Can't rename %s to %s\n",
6638 outfname, origfname);
6645 if (is_argument_error)
6648 #ifdef EASYWIN /*Easy Win */
6649 if (file_out_f == FALSE)
6650 scanf("%d",&end_check);
6653 #else /* for Other OS */
6654 if (file_out_f == TRUE)
6656 #endif /*Easy Win */
6659 #endif /* WIN32DLL */