1 /** Network Kanji Filter. (PDS Version)
2 ** -*- coding: ISO-2022-JP -*-
3 ************************************************************************
4 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
5 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
6 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
7 ** Copyright (C) 1996,1998
9 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
10 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
11 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
12 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
14 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
15 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
16 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
17 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
18 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
19 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
21 ** Everyone is permitted to do anything on this program
22 ** including copying, modifying, improving,
23 ** as long as you don't try to pretend that you wrote it.
24 ** i.e., the above copyright notice has to appear in all copies.
25 ** Binary distribution requires original version messages.
26 ** You don't have to ask before copying, redistribution or publishing.
27 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
28 ***********************************************************************/
30 /***********************************************************************
31 *
\e$B8=:_!"
\e(Bnkf
\e$B$O
\e(B SorceForge
\e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#
\e(B
32 * http://sourceforge.jp/projects/nkf/
33 ***********************************************************************/
34 #define NKF_IDENT "$Id: nkf.c,v 1.191 2008/11/09 21:10:04 naruse Exp $"
35 #define NKF_VERSION "2.0.8"
36 #define NKF_RELEASE_DATE "2008-11-10"
38 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
39 "Copyright (C) 2002-2008 Kono, Furukawa, Naruse, mastodon"
50 # define INCL_DOSERRORS
55 /* state of output_mode and input_mode
134 NKF_ENCODING_TABLE_SIZE,
135 JIS_X_0201_1976_K = 0x1013, /* I */ /* JIS C 6220-1969 */
136 /* JIS_X_0201_1976_R = 0x1014, */ /* J */ /* JIS C 6220-1969 */
137 /* JIS_X_0208_1978 = 0x1040, */ /* @ */ /* JIS C 6226-1978 */
138 /* JIS_X_0208_1983 = 0x1087, */ /* B */ /* JIS C 6226-1983 */
139 JIS_X_0208 = 0x1168, /* @B */
140 JIS_X_0212 = 0x1159, /* D */
141 /* JIS_X_0213_2000_1 = 0x1228, */ /* O */
142 JIS_X_0213_2 = 0x1229, /* P */
143 JIS_X_0213_1 = 0x1233, /* Q */
146 static nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
147 static nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
148 static nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
149 static nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
150 static nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
151 static void j_oconv(nkf_char c2, nkf_char c1);
152 static void s_oconv(nkf_char c2, nkf_char c1);
153 static void e_oconv(nkf_char c2, nkf_char c1);
154 static void w_oconv(nkf_char c2, nkf_char c1);
155 static void w_oconv16(nkf_char c2, nkf_char c1);
156 static void w_oconv32(nkf_char c2, nkf_char c1);
160 nkf_char (*iconv)(nkf_char c2, nkf_char c1, nkf_char c0);
161 void (*oconv)(nkf_char c2, nkf_char c1);
162 } nkf_native_encoding;
164 nkf_native_encoding NkfEncodingASCII = { "ASCII", e_iconv, e_oconv };
165 nkf_native_encoding NkfEncodingISO_2022_JP = { "ISO-2022-JP", e_iconv, j_oconv };
166 nkf_native_encoding NkfEncodingShift_JIS = { "Shift_JIS", s_iconv, s_oconv };
167 nkf_native_encoding NkfEncodingEUC_JP = { "EUC-JP", e_iconv, e_oconv };
168 nkf_native_encoding NkfEncodingUTF_8 = { "UTF-8", w_iconv, w_oconv };
169 nkf_native_encoding NkfEncodingUTF_16 = { "UTF-16", w_iconv16, w_oconv16 };
170 nkf_native_encoding NkfEncodingUTF_32 = { "UTF-32", w_iconv32, w_oconv32 };
175 const nkf_native_encoding *base_encoding;
178 nkf_encoding nkf_encoding_table[] = {
179 {ASCII, "US-ASCII", &NkfEncodingASCII},
180 {ISO_8859_1, "ISO-8859-1", &NkfEncodingASCII},
181 {ISO_2022_JP, "ISO-2022-JP", &NkfEncodingISO_2022_JP},
182 {CP50220, "CP50220", &NkfEncodingISO_2022_JP},
183 {CP50221, "CP50221", &NkfEncodingISO_2022_JP},
184 {CP50222, "CP50222", &NkfEncodingISO_2022_JP},
185 {ISO_2022_JP_1, "ISO-2022-JP-1", &NkfEncodingISO_2022_JP},
186 {ISO_2022_JP_3, "ISO-2022-JP-3", &NkfEncodingISO_2022_JP},
187 {ISO_2022_JP_2004, "ISO-2022-JP-2004", &NkfEncodingISO_2022_JP},
188 {SHIFT_JIS, "Shift_JIS", &NkfEncodingShift_JIS},
189 {WINDOWS_31J, "Windows-31J", &NkfEncodingShift_JIS},
190 {CP10001, "CP10001", &NkfEncodingShift_JIS},
191 {EUC_JP, "EUC-JP", &NkfEncodingEUC_JP},
192 {EUCJP_NKF, "eucJP-nkf", &NkfEncodingEUC_JP},
193 {CP51932, "CP51932", &NkfEncodingEUC_JP},
194 {EUCJP_MS, "eucJP-MS", &NkfEncodingEUC_JP},
195 {EUCJP_ASCII, "eucJP-ASCII", &NkfEncodingEUC_JP},
196 {SHIFT_JISX0213, "Shift_JISX0213", &NkfEncodingShift_JIS},
197 {SHIFT_JIS_2004, "Shift_JIS-2004", &NkfEncodingShift_JIS},
198 {EUC_JISX0213, "EUC-JISX0213", &NkfEncodingEUC_JP},
199 {EUC_JIS_2004, "EUC-JIS-2004", &NkfEncodingEUC_JP},
200 {UTF_8, "UTF-8", &NkfEncodingUTF_8},
201 {UTF_8N, "UTF-8N", &NkfEncodingUTF_8},
202 {UTF_8_BOM, "UTF-8-BOM", &NkfEncodingUTF_8},
203 {UTF8_MAC, "UTF8-MAC", &NkfEncodingUTF_8},
204 {UTF_16, "UTF-16", &NkfEncodingUTF_16},
205 {UTF_16BE, "UTF-16BE", &NkfEncodingUTF_16},
206 {UTF_16BE_BOM, "UTF-16BE-BOM", &NkfEncodingUTF_16},
207 {UTF_16LE, "UTF-16LE", &NkfEncodingUTF_16},
208 {UTF_16LE_BOM, "UTF-16LE-BOM", &NkfEncodingUTF_16},
209 {UTF_32, "UTF-32", &NkfEncodingUTF_32},
210 {UTF_32BE, "UTF-32BE", &NkfEncodingUTF_32},
211 {UTF_32BE_BOM, "UTF-32BE-BOM", &NkfEncodingUTF_32},
212 {UTF_32LE, "UTF-32LE", &NkfEncodingUTF_32},
213 {UTF_32LE_BOM, "UTF-32LE-BOM", &NkfEncodingUTF_32},
214 {BINARY, "BINARY", &NkfEncodingASCII},
221 } encoding_name_to_id_table[] = {
224 {"ISO-2022-JP", ISO_2022_JP},
225 {"ISO2022JP-CP932", CP50220},
226 {"CP50220", CP50220},
227 {"CP50221", CP50221},
228 {"CSISO2022JP", CP50221},
229 {"CP50222", CP50222},
230 {"ISO-2022-JP-1", ISO_2022_JP_1},
231 {"ISO-2022-JP-3", ISO_2022_JP_3},
232 {"ISO-2022-JP-2004", ISO_2022_JP_2004},
233 {"SHIFT_JIS", SHIFT_JIS},
235 {"WINDOWS-31J", WINDOWS_31J},
236 {"CSWINDOWS31J", WINDOWS_31J},
237 {"CP932", WINDOWS_31J},
238 {"MS932", WINDOWS_31J},
239 {"CP10001", CP10001},
242 {"EUCJP-NKF", EUCJP_NKF},
243 {"CP51932", CP51932},
244 {"EUC-JP-MS", EUCJP_MS},
245 {"EUCJP-MS", EUCJP_MS},
246 {"EUCJPMS", EUCJP_MS},
247 {"EUC-JP-ASCII", EUCJP_ASCII},
248 {"EUCJP-ASCII", EUCJP_ASCII},
249 {"SHIFT_JISX0213", SHIFT_JISX0213},
250 {"SHIFT_JIS-2004", SHIFT_JIS_2004},
251 {"EUC-JISX0213", EUC_JISX0213},
252 {"EUC-JIS-2004", EUC_JIS_2004},
255 {"UTF-8-BOM", UTF_8_BOM},
256 {"UTF8-MAC", UTF8_MAC},
257 {"UTF-8-MAC", UTF8_MAC},
259 {"UTF-16BE", UTF_16BE},
260 {"UTF-16BE-BOM", UTF_16BE_BOM},
261 {"UTF-16LE", UTF_16LE},
262 {"UTF-16LE-BOM", UTF_16LE_BOM},
264 {"UTF-32BE", UTF_32BE},
265 {"UTF-32BE-BOM", UTF_32BE_BOM},
266 {"UTF-32LE", UTF_32LE},
267 {"UTF-32LE-BOM", UTF_32LE_BOM},
272 #if defined(DEFAULT_CODE_JIS)
273 #define DEFAULT_ENCIDX ISO_2022_JP
274 #elif defined(DEFAULT_CODE_SJIS)
275 #define DEFAULT_ENCIDX SHIFT_JIS
276 #elif defined(DEFAULT_CODE_WINDOWS_31J)
277 #define DEFAULT_ENCIDX WINDOWS_31J
278 #elif defined(DEFAULT_CODE_EUC)
279 #define DEFAULT_ENCIDX EUC_JP
280 #elif defined(DEFAULT_CODE_UTF8)
281 #define DEFAULT_ENCIDX UTF_8
285 #define is_alnum(c) \
286 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
288 /* I don't trust portablity of toupper */
289 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
290 #define nkf_isoctal(c) ('0'<=c && c<='7')
291 #define nkf_isdigit(c) ('0'<=c && c<='9')
292 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
293 #define nkf_isblank(c) (c == SP || c == TAB)
294 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
295 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
296 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
297 #define nkf_isprint(c) (SP<=c && c<='~')
298 #define nkf_isgraph(c) ('!'<=c && c<='~')
299 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
300 ('A'<=c&&c<='F') ? (c-'A'+10) : \
301 ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
302 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
303 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
304 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
305 ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
306 && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
308 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
309 #define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c < (0xE0&0x7F))
311 #define HOLD_SIZE 1024
312 #if defined(INT_IS_SHORT)
313 #define IOBUF_SIZE 2048
315 #define IOBUF_SIZE 16384
318 #define DEFAULT_J 'B'
319 #define DEFAULT_R 'B'
326 /* MIME preprocessor */
328 #ifdef EASYWIN /*Easy Win */
329 extern POINT _BufferSize;
338 void (*status_func)(struct input_code *, nkf_char);
339 nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
343 static const char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
344 static nkf_encoding *input_encoding = NULL;
345 static nkf_encoding *output_encoding = NULL;
347 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
349 * 0: Shift_JIS, eucJP-ascii
354 #define UCS_MAP_ASCII 0
356 #define UCS_MAP_CP932 2
357 #define UCS_MAP_CP10001 3
358 static int ms_ucs_map_f = UCS_MAP_ASCII;
360 #ifdef UTF8_INPUT_ENABLE
361 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
362 static int no_cp932ext_f = FALSE;
363 /* ignore ZERO WIDTH NO-BREAK SPACE */
364 static int no_best_fit_chars_f = FALSE;
365 static int input_endian = ENDIAN_BIG;
366 static nkf_char unicode_subchar = '?'; /* the regular substitution character */
367 static void (*encode_fallback)(nkf_char c) = NULL;
368 static void w_status(struct input_code *, nkf_char);
370 #ifdef UTF8_OUTPUT_ENABLE
371 static int output_bom_f = FALSE;
372 static int output_endian = ENDIAN_BIG;
375 static void std_putc(nkf_char c);
376 static nkf_char std_getc(FILE *f);
377 static nkf_char std_ungetc(nkf_char c,FILE *f);
379 static nkf_char broken_getc(FILE *f);
380 static nkf_char broken_ungetc(nkf_char c,FILE *f);
382 static nkf_char mime_getc(FILE *f);
384 static void mime_putc(nkf_char c);
388 #if !defined(PERL_XS) && !defined(WIN32DLL)
389 static unsigned char stdibuf[IOBUF_SIZE];
390 static unsigned char stdobuf[IOBUF_SIZE];
394 static int unbuf_f = FALSE;
395 static int estab_f = FALSE;
396 static int nop_f = FALSE;
397 static int binmode_f = TRUE; /* binary mode */
398 static int rot_f = FALSE; /* rot14/43 mode */
399 static int hira_f = FALSE; /* hira/kata henkan */
400 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
401 static int mime_f = MIME_DECODE_DEFAULT; /* convert MIME B base64 or Q */
402 static int mime_decode_f = FALSE; /* mime decode is explicitly on */
403 static int mimebuf_f = FALSE; /* MIME buffered input */
404 static int broken_f = FALSE; /* convert ESC-less broken JIS */
405 static int iso8859_f = FALSE; /* ISO8859 through */
406 static int mimeout_f = FALSE; /* base64 mode */
407 static int x0201_f = X0201_DEFAULT; /* convert JIS X 0201 */
408 static int iso2022jp_f = FALSE; /* replace non ISO-2022-JP with GETA */
410 #ifdef UNICODE_NORMALIZATION
411 static int nfc_f = FALSE;
412 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
413 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
417 static int cap_f = FALSE;
418 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
419 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
421 static int url_f = FALSE;
422 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
423 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
426 #define PREFIX_EUCG3 NKF_INT32_C(0x8F00)
427 #define CLASS_MASK NKF_INT32_C(0xFF000000)
428 #define CLASS_UNICODE NKF_INT32_C(0x01000000)
429 #define VALUE_MASK NKF_INT32_C(0x00FFFFFF)
430 #define UNICODE_BMP_MAX NKF_INT32_C(0x0000FFFF)
431 #define UNICODE_MAX NKF_INT32_C(0x0010FFFF)
432 #define nkf_char_euc3_new(c) ((c) | PREFIX_EUCG3)
433 #define nkf_char_unicode_new(c) ((c) | CLASS_UNICODE)
434 #define nkf_char_unicode_p(c) ((c & CLASS_MASK) == CLASS_UNICODE)
435 #define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= UNICODE_BMP_MAX)
436 #define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= UNICODE_MAX)
438 #ifdef NUMCHAR_OPTION
439 static int numchar_f = FALSE;
440 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
441 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
445 static int noout_f = FALSE;
446 static void no_putc(nkf_char c);
447 static int debug_f = FALSE;
448 static void debug(const char *str);
449 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
452 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
453 static void set_input_codename(const char *codename);
456 static int exec_f = 0;
459 #ifdef SHIFTJIS_CP932
460 /* invert IBM extended characters to others */
461 static int cp51932_f = FALSE;
463 /* invert NEC-selected IBM extended characters to IBM extended characters */
464 static int cp932inv_f = TRUE;
466 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
467 #endif /* SHIFTJIS_CP932 */
469 static int x0212_f = FALSE;
470 static int x0213_f = FALSE;
472 static unsigned char prefix_table[256];
474 static void e_status(struct input_code *, nkf_char);
475 static void s_status(struct input_code *, nkf_char);
477 struct input_code input_code_list[] = {
478 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
479 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
480 #ifdef UTF8_INPUT_ENABLE
481 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
486 static int mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
487 static int base64_count = 0;
489 /* X0208 -> ASCII converter */
492 static int f_line = 0; /* chars in line */
493 static int f_prev = 0;
494 static int fold_preserve_f = FALSE; /* preserve new lines */
495 static int fold_f = FALSE;
496 static int fold_len = 0;
499 static unsigned char kanji_intro = DEFAULT_J;
500 static unsigned char ascii_intro = DEFAULT_R;
504 #define FOLD_MARGIN 10
505 #define DEFAULT_FOLD 60
507 static int fold_margin = FOLD_MARGIN;
509 /* process default */
512 no_connection2(nkf_char c2, nkf_char c1, nkf_char c0)
514 fprintf(stderr,"nkf internal module connection failure.\n");
520 no_connection(nkf_char c2, nkf_char c1)
522 no_connection2(c2,c1,0);
525 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
526 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
528 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
529 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
530 static void (*o_eol_conv)(nkf_char c2,nkf_char c1) = no_connection;
531 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
532 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
533 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
534 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
536 /* static redirections */
538 static void (*o_putc)(nkf_char c) = std_putc;
540 static nkf_char (*i_getc)(FILE *f) = std_getc; /* general input */
541 static nkf_char (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
543 static nkf_char (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
544 static nkf_char (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
546 static void (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
548 static nkf_char (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
549 static nkf_char (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
551 /* for strict mime */
552 static nkf_char (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
553 static nkf_char (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
556 static int output_mode = ASCII; /* output kanji mode */
557 static int input_mode = ASCII; /* input kanji mode */
558 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
560 /* X0201 / X0208 conversion tables */
562 /* X0201 kana conversion table */
564 static const unsigned char cv[]= {
565 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
566 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
567 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
568 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
569 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
570 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
571 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
572 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
573 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
574 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
575 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
576 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
577 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
578 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
579 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
580 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
584 /* X0201 kana conversion table for daguten */
586 static const unsigned char dv[]= {
587 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
588 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
589 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
590 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
591 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
592 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
593 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
594 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
595 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
596 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
597 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
598 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
601 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
602 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
605 /* X0201 kana conversion table for han-daguten */
607 static const unsigned char ev[]= {
608 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
610 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
611 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
614 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
615 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
616 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
617 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
618 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
619 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
620 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
621 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
622 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
623 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
627 /* X0208 kigou conversion table */
628 /* 0x8140 - 0x819e */
629 static const unsigned char fv[] = {
631 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
632 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
633 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
634 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
635 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
636 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
637 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
638 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
639 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
640 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
641 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
642 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
647 static int option_mode = 0;
648 static int file_out_f = FALSE;
650 static int overwrite_f = FALSE;
651 static int preserve_time_f = FALSE;
652 static int backup_f = FALSE;
653 static char *backup_suffix = "";
656 static int eolmode_f = 0; /* CR, LF, CRLF */
657 static int input_eol = 0; /* 0: unestablished, EOF: MIXED */
658 static nkf_char prev_cr = 0; /* CR or 0 */
659 #ifdef EASYWIN /*Easy Win */
660 static int end_check;
663 #define STD_GC_BUFSIZE (256)
664 nkf_char std_gc_buf[STD_GC_BUFSIZE];
668 nkf_malloc(size_t size)
672 if (size == 0) size = 1;
676 perror("can't malloc");
684 nkf_realloc(void *ptr, size_t size)
686 if (size == 0) size = 1;
688 ptr = realloc(ptr, size);
690 perror("can't realloc");
697 #define nkf_free(ptr) free(ptr)
700 nkf_str_caseeql(const char *src, const char *target)
703 for (i = 0; src[i] && target[i]; i++) {
704 if (nkf_toupper(src[i]) != nkf_toupper(target[i])) return FALSE;
706 if (src[i] || target[i]) return FALSE;
711 nkf_enc_from_index(int idx)
713 if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
716 return &nkf_encoding_table[idx];
720 nkf_enc_find_index(const char *name)
723 if (name[0] == 'X' && *(name+1) == '-') name += 2;
724 for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
725 if (nkf_str_caseeql(encoding_name_to_id_table[i].name, name)) {
726 return encoding_name_to_id_table[i].id;
733 nkf_enc_find(const char *name)
736 idx = nkf_enc_find_index(name);
737 if (idx < 0) return 0;
738 return nkf_enc_from_index(idx);
741 #define nkf_enc_name(enc) (enc)->name
742 #define nkf_enc_to_index(enc) (enc)->id
743 #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
744 #define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
745 #define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
746 #define nkf_enc_asciicompat(enc) (\
747 nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
748 nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
749 #define nkf_enc_unicode_p(enc) (\
750 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
751 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
752 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
753 #define nkf_enc_cp5022x_p(enc) (\
754 nkf_enc_to_index(enc) == CP50220 ||\
755 nkf_enc_to_index(enc) == CP50221 ||\
756 nkf_enc_to_index(enc) == CP50222)
758 #ifdef DEFAULT_CODE_LOCALE
762 #ifdef HAVE_LANGINFO_H
763 return nl_langinfo(CODESET);
764 #elif defined(__WIN32__)
767 int len = sprintf(buf, "CP%d", GetACP());
769 str = nkf_malloc(len + 1);
775 #elif defined(__OS2__)
776 # if defined(INT_IS_SHORT)
782 ULONG ulCP[1], ulncp;
783 DosQueryCp(sizeof(ulCP), ulCP, &ulncp);
784 if (ulCP[0] == 932 || ulCP[0] == 943)
785 strcpy(buf, "Shift_JIS");
787 sprintf(buf, "CP%lu", ulCP[0]);
796 nkf_locale_encoding()
798 nkf_encoding *enc = 0;
799 char *encname = nkf_locale_charmap();
801 enc = nkf_enc_find(encname);
804 #endif /* DEFAULT_CODE_LOCALE */
809 return &nkf_encoding_table[UTF_8];
813 nkf_default_encoding()
815 nkf_encoding *enc = 0;
816 #ifdef DEFAULT_CODE_LOCALE
817 enc = nkf_locale_encoding();
818 #elif defined(DEFAULT_ENCIDX)
819 enc = nkf_enc_from_index(DEFAULT_ENCIDX);
821 if (!enc) enc = nkf_utf8_encoding();
827 #define fprintf dllprintf
833 fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n");
840 "USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n"
842 "b,u Output is buffered (DEFAULT),Output is unbuffered\n"
843 "j,s,e,w Output code is ISO-2022-JP, Shift JIS, EUC-JP, UTF-8N\n"
844 #ifdef UTF8_OUTPUT_ENABLE
845 " After 'w' you can add more options. -w[ 8 [0], 16 [[BL] [0]] ]\n"
847 "J,S,E,W Input assumption is JIS 7 bit , Shift JIS, EUC-JP, UTF-8\n"
848 #ifdef UTF8_INPUT_ENABLE
849 " After 'W' you can add more options. -W[ 8, 16 [BL] ] \n"
852 "i[@B] Specify the Esc Seq for JIS X 0208-1978/83 (DEFAULT B)\n"
853 "o[BJH] Specify the Esc Seq for ASCII/Roman (DEFAULT B)\n"
854 "r {de/en}crypt ROT13/47\n"
855 "h 1 katakana->hiragana, 2 hiragana->katakana, 3 both\n"
856 "m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:non-strict,0:no decode]\n"
857 "M[BQ] MIME encode [B:base64 Q:quoted]\n"
858 "l ISO8859-1 (Latin-1) support\n"
859 "f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
860 "Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n"
861 " 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n"
862 " 4: JISX0208 Katakana to JISX0201 Katakana\n"
863 "X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n"
864 "B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n"
866 "T Text mode output\n"
868 "O Output to File (DEFAULT 'nkf.out')\n"
869 "I Convert non ISO-2022-JP charactor to GETA\n"
870 "d,c Convert line breaks -d: LF -c: CRLF\n"
871 "-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
872 "v, V Show this usage. V: show configuration\n"
874 "Long name options\n"
875 " --ic=<input codeset> --oc=<output codeset>\n"
876 " Specify the input or output codeset\n"
877 " --fj --unix --mac --windows\n"
878 " --jis --euc --sjis --utf8 --utf16 --mime --base64\n"
879 " Convert for the system or code\n"
880 " --hiragana --katakana --katakana-hiragana\n"
881 " To Hiragana/Katakana Conversion\n"
882 " --prefix= Insert escape before troublesome characters of Shift_JIS\n"
884 " --cap-input, --url-input Convert hex after ':' or '%%'\n"
886 #ifdef NUMCHAR_OPTION
887 " --numchar-input Convert Unicode Character Reference\n"
889 #ifdef UTF8_INPUT_ENABLE
890 " --fb-{skip, html, xml, perl, java, subchar}\n"
891 " Specify how nkf handles unassigned characters\n"
894 " --in-place[=SUFFIX] --overwrite[=SUFFIX]\n"
895 " Overwrite original listed files by filtered result\n"
896 " --overwrite preserves timestamp of original files\n"
898 " -g --guess Guess the input code\n"
899 " --help --version Show this help/the version\n"
900 " For more information, see also man nkf\n"
906 show_configuration(void)
909 "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n"
912 " Compile-time options:\n"
913 " Compiled at: " __DATE__ " " __TIME__ "\n"
916 " Default output encoding: "
917 #ifdef DEFAULT_CODE_LOCALE
918 "LOCALE (%s)\n", nkf_enc_name(nkf_default_encoding())
919 #elif defined(DEFAULT_ENCIDX)
920 "CONFIG (%s)\n", nkf_enc_name(nkf_default_encoding())
926 " Default output end of line: "
927 #if DEFAULT_NEWLINE == CR
929 #elif DEFAULT_NEWLINE == CRLF
935 " Decode MIME encoded string: "
936 #if MIME_DECODE_DEFAULT
942 " Convert JIS X 0201 Katakana: "
949 " --help, --version output: "
950 #if HELP_OUTPUT_HELP_OUTPUT
961 get_backup_filename(const char *suffix, const char *filename)
963 char *backup_filename;
964 int asterisk_count = 0;
966 int filename_length = strlen(filename);
968 for(i = 0; suffix[i]; i++){
969 if(suffix[i] == '*') asterisk_count++;
973 backup_filename = nkf_malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
974 for(i = 0, j = 0; suffix[i];){
975 if(suffix[i] == '*'){
976 backup_filename[j] = '\0';
977 strncat(backup_filename, filename, filename_length);
979 j += filename_length;
981 backup_filename[j++] = suffix[i++];
984 backup_filename[j] = '\0';
986 j = filename_length + strlen(suffix);
987 backup_filename = nkf_malloc(j + 1);
988 strcpy(backup_filename, filename);
989 strcat(backup_filename, suffix);
990 backup_filename[j] = '\0';
992 return backup_filename;
996 #ifdef UTF8_INPUT_ENABLE
998 nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
1005 (*f)(0, bin2hex(c>>shift));
1016 encode_fallback_html(nkf_char c)
1021 if(c >= NKF_INT32_C(1000000))
1022 (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
1023 if(c >= NKF_INT32_C(100000))
1024 (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
1026 (*oconv)(0, 0x30+(c/10000 )%10);
1028 (*oconv)(0, 0x30+(c/1000 )%10);
1030 (*oconv)(0, 0x30+(c/100 )%10);
1032 (*oconv)(0, 0x30+(c/10 )%10);
1034 (*oconv)(0, 0x30+ c %10);
1040 encode_fallback_xml(nkf_char c)
1045 nkf_each_char_to_hex(oconv, c);
1051 encode_fallback_java(nkf_char c)
1055 if(!nkf_char_unicode_bmp_p(c)){
1059 (*oconv)(0, bin2hex(c>>20));
1060 (*oconv)(0, bin2hex(c>>16));
1064 (*oconv)(0, bin2hex(c>>12));
1065 (*oconv)(0, bin2hex(c>> 8));
1066 (*oconv)(0, bin2hex(c>> 4));
1067 (*oconv)(0, bin2hex(c ));
1072 encode_fallback_perl(nkf_char c)
1077 nkf_each_char_to_hex(oconv, c);
1083 encode_fallback_subchar(nkf_char c)
1085 c = unicode_subchar;
1086 (*oconv)((c>>8)&0xFF, c&0xFF);
1091 static const struct {
1115 {"katakana-hiragana","h3"},
1123 #ifdef UTF8_OUTPUT_ENABLE
1133 {"fb-subchar=", ""},
1135 #ifdef UTF8_INPUT_ENABLE
1136 {"utf8-input", "W"},
1137 {"utf16-input", "W16"},
1138 {"no-cp932ext", ""},
1139 {"no-best-fit-chars",""},
1141 #ifdef UNICODE_NORMALIZATION
1142 {"utf8mac-input", ""},
1154 #ifdef NUMCHAR_OPTION
1155 {"numchar-input", ""},
1161 #ifdef SHIFTJIS_CP932
1172 set_input_encoding(nkf_encoding *enc)
1174 switch (nkf_enc_to_index(enc)) {
1181 #ifdef SHIFTJIS_CP932
1184 #ifdef UTF8_OUTPUT_ENABLE
1185 ms_ucs_map_f = UCS_MAP_CP932;
1195 case ISO_2022_JP_2004:
1202 #ifdef SHIFTJIS_CP932
1205 #ifdef UTF8_OUTPUT_ENABLE
1206 ms_ucs_map_f = UCS_MAP_CP932;
1211 #ifdef SHIFTJIS_CP932
1214 #ifdef UTF8_OUTPUT_ENABLE
1215 ms_ucs_map_f = UCS_MAP_CP10001;
1223 #ifdef SHIFTJIS_CP932
1226 #ifdef UTF8_OUTPUT_ENABLE
1227 ms_ucs_map_f = UCS_MAP_CP932;
1231 #ifdef SHIFTJIS_CP932
1234 #ifdef UTF8_OUTPUT_ENABLE
1235 ms_ucs_map_f = UCS_MAP_MS;
1239 #ifdef SHIFTJIS_CP932
1242 #ifdef UTF8_OUTPUT_ENABLE
1243 ms_ucs_map_f = UCS_MAP_ASCII;
1246 case SHIFT_JISX0213:
1247 case SHIFT_JIS_2004:
1249 #ifdef SHIFTJIS_CP932
1256 #ifdef SHIFTJIS_CP932
1260 #ifdef UTF8_INPUT_ENABLE
1261 #ifdef UNICODE_NORMALIZATION
1269 input_endian = ENDIAN_BIG;
1273 input_endian = ENDIAN_LITTLE;
1278 input_endian = ENDIAN_BIG;
1282 input_endian = ENDIAN_LITTLE;
1289 set_output_encoding(nkf_encoding *enc)
1291 switch (nkf_enc_to_index(enc)) {
1294 #ifdef SHIFTJIS_CP932
1295 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1297 #ifdef UTF8_OUTPUT_ENABLE
1298 ms_ucs_map_f = UCS_MAP_CP932;
1302 #ifdef SHIFTJIS_CP932
1303 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1305 #ifdef UTF8_OUTPUT_ENABLE
1306 ms_ucs_map_f = UCS_MAP_CP932;
1311 #ifdef SHIFTJIS_CP932
1312 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1318 #ifdef SHIFTJIS_CP932
1319 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1325 #ifdef UTF8_OUTPUT_ENABLE
1326 ms_ucs_map_f = UCS_MAP_CP932;
1330 #ifdef UTF8_OUTPUT_ENABLE
1331 ms_ucs_map_f = UCS_MAP_CP10001;
1336 #ifdef SHIFTJIS_CP932
1337 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1339 #ifdef UTF8_OUTPUT_ENABLE
1340 ms_ucs_map_f = UCS_MAP_ASCII;
1345 #ifdef SHIFTJIS_CP932
1346 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1348 #ifdef UTF8_OUTPUT_ENABLE
1349 ms_ucs_map_f = UCS_MAP_ASCII;
1353 #ifdef SHIFTJIS_CP932
1354 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1356 #ifdef UTF8_OUTPUT_ENABLE
1357 ms_ucs_map_f = UCS_MAP_CP932;
1362 #ifdef UTF8_OUTPUT_ENABLE
1363 ms_ucs_map_f = UCS_MAP_MS;
1368 #ifdef UTF8_OUTPUT_ENABLE
1369 ms_ucs_map_f = UCS_MAP_ASCII;
1372 case SHIFT_JISX0213:
1373 case SHIFT_JIS_2004:
1375 #ifdef SHIFTJIS_CP932
1376 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1383 #ifdef SHIFTJIS_CP932
1384 if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1387 #ifdef UTF8_OUTPUT_ENABLE
1389 output_bom_f = TRUE;
1393 output_bom_f = TRUE;
1396 output_endian = ENDIAN_LITTLE;
1397 output_bom_f = FALSE;
1400 output_endian = ENDIAN_LITTLE;
1401 output_bom_f = TRUE;
1404 output_bom_f = TRUE;
1407 output_endian = ENDIAN_LITTLE;
1408 output_bom_f = FALSE;
1411 output_endian = ENDIAN_LITTLE;
1412 output_bom_f = TRUE;
1418 static struct input_code*
1419 find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1422 struct input_code *p = input_code_list;
1424 if (iconv_func == p->iconv_func){
1434 set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1436 #ifdef INPUT_CODE_FIX
1437 if (f || !input_encoding)
1444 #ifdef INPUT_CODE_FIX
1445 && (f == -TRUE || !input_encoding) /* -TRUE means "FORCE" */
1451 if (estab_f && iconv_for_check != iconv){
1452 struct input_code *p = find_inputcode_byfunc(iconv);
1454 set_input_codename(p->name);
1457 iconv_for_check = iconv;
1464 x0212_shift(nkf_char c)
1469 if (0x75 <= c && c <= 0x7f){
1470 ret = c + (0x109 - 0x75);
1473 if (0x75 <= c && c <= 0x7f){
1474 ret = c + (0x113 - 0x75);
1482 x0212_unshift(nkf_char c)
1485 if (0x7f <= c && c <= 0x88){
1486 ret = c + (0x75 - 0x7f);
1487 }else if (0x89 <= c && c <= 0x92){
1488 ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
1492 #endif /* X0212_ENABLE */
1495 e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
1501 if((0x21 <= ndx && ndx <= 0x2F)){
1502 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
1503 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1505 }else if(0x6E <= ndx && ndx <= 0x7E){
1506 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
1507 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1513 else if(nkf_isgraph(ndx)){
1515 const unsigned short *ptr;
1516 ptr = x0212_shiftjis[ndx - 0x21];
1518 val = ptr[(c1 & 0x7f) - 0x21];
1527 c2 = x0212_shift(c2);
1529 #endif /* X0212_ENABLE */
1531 if(0x7F < c2) return 1;
1532 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
1533 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1538 s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
1540 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
1543 static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
1544 if (0xFC < c1) return 1;
1545 #ifdef SHIFTJIS_CP932
1546 if (!cp932inv_f && is_ibmext_in_sjis(c2)){
1547 val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
1554 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
1555 val = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
1561 #endif /* SHIFTJIS_CP932 */
1563 if (!x0213_f && is_ibmext_in_sjis(c2)){
1564 val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
1567 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
1580 if(x0213_f && c2 >= 0xF0){
1581 if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
1582 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
1583 }else{ /* 78<=k<=94 */
1584 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
1585 if (0x9E < c1) c2++;
1588 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
1589 #define SJ6394 0x0161 /* 63 - 94 ku offset */
1590 c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
1591 if (0x9E < c1) c2++;
1594 c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
1601 c2 = x0212_unshift(c2);
1608 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
1610 nkf_unicode_to_utf8(nkf_char val, nkf_char *p1, nkf_char *p2, nkf_char *p3, nkf_char *p4)
1618 }else if (val < 0x800){
1619 *p1 = 0xc0 | (val >> 6);
1620 *p2 = 0x80 | (val & 0x3f);
1623 } else if (nkf_char_unicode_bmp_p(val)) {
1624 *p1 = 0xe0 | (val >> 12);
1625 *p2 = 0x80 | ((val >> 6) & 0x3f);
1626 *p3 = 0x80 | ( val & 0x3f);
1628 } else if (nkf_char_unicode_value_p(val)) {
1629 *p1 = 0xe0 | (val >> 16);
1630 *p2 = 0x80 | ((val >> 12) & 0x3f);
1631 *p3 = 0x80 | ((val >> 6) & 0x3f);
1632 *p4 = 0x80 | ( val & 0x3f);
1642 nkf_utf8_to_unicode(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
1649 else if (c1 <= 0xC3) {
1650 /* trail byte or invalid */
1653 else if (c1 <= 0xDF) {
1655 wc = (c1 & 0x1F) << 6;
1658 else if (c1 <= 0xEF) {
1660 wc = (c1 & 0x0F) << 12;
1661 wc |= (c2 & 0x3F) << 6;
1664 else if (c2 <= 0xF4) {
1666 wc = (c1 & 0x0F) << 18;
1667 wc |= (c2 & 0x3F) << 12;
1668 wc |= (c3 & 0x3F) << 6;
1678 #ifdef UTF8_INPUT_ENABLE
1680 unicode_to_jis_common2(nkf_char c1, nkf_char c0,
1681 const unsigned short *const *pp, nkf_char psize,
1682 nkf_char *p2, nkf_char *p1)
1685 const unsigned short *p;
1688 if (pp == 0) return 1;
1691 if (c1 < 0 || psize <= c1) return 1;
1693 if (p == 0) return 1;
1696 if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
1698 if (val == 0) return 1;
1699 if (no_cp932ext_f && (
1700 (val>>8) == 0x2D || /* NEC special characters */
1701 val > NKF_INT32_C(0xF300) /* IBM extended characters */
1709 if (c2 == SO) c2 = JIS_X_0201_1976_K;
1717 unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
1719 const unsigned short *const *pp;
1720 const unsigned short *const *const *ppp;
1721 static const char no_best_fit_chars_table_C2[] =
1722 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1723 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1724 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
1725 0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
1726 static const char no_best_fit_chars_table_C2_ms[] =
1727 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1728 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1729 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
1730 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
1731 static const char no_best_fit_chars_table_932_C2[] =
1732 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1733 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1734 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1735 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
1736 static const char no_best_fit_chars_table_932_C3[] =
1737 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1738 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1739 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1740 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
1746 }else if(c2 < 0xe0){
1747 if(no_best_fit_chars_f){
1748 if(ms_ucs_map_f == UCS_MAP_CP932){
1751 if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
1754 if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1757 }else if(!cp932inv_f){
1760 if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
1763 if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1766 }else if(ms_ucs_map_f == UCS_MAP_MS){
1767 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
1768 }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1786 ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
1787 ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
1788 ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
1790 ret = unicode_to_jis_common2(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
1791 }else if(c0 < 0xF0){
1792 if(no_best_fit_chars_f){
1793 if(ms_ucs_map_f == UCS_MAP_CP932){
1794 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
1795 }else if(ms_ucs_map_f == UCS_MAP_MS){
1800 if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
1803 if(c0 == 0x92) return 1;
1808 if(c1 == 0x80 || c0 == 0x9C) return 1;
1811 }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1816 if(c0 == 0x94) return 1;
1819 if(c0 == 0xBB) return 1;
1829 if(c0 == 0x95) return 1;
1832 if(c0 == 0xA5) return 1;
1839 if(c0 == 0x8D) return 1;
1842 if(c0 == 0x9E && !cp932inv_f) return 1;
1845 if(0xA0 <= c0 && c0 <= 0xA5) return 1;
1853 ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
1854 ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
1855 ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
1857 ret = unicode_to_jis_common2(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
1859 #ifdef SHIFTJIS_CP932
1860 if (!ret && !cp932inv_f && is_eucg3(*p2)) {
1862 if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
1863 s2e_conv(s2, s1, p2, p1);
1872 #ifdef UTF8_OUTPUT_ENABLE
1874 e2w_conv(nkf_char c2, nkf_char c1)
1876 const unsigned short *p;
1878 if (c2 == JIS_X_0201_1976_K) {
1879 if (ms_ucs_map_f == UCS_MAP_CP10001) {
1887 p = euc_to_utf8_1byte;
1889 } else if (is_eucg3(c2)){
1890 if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
1893 c2 = (c2&0x7f) - 0x21;
1894 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
1895 p = x0212_to_utf8_2bytes[c2];
1901 c2 = (c2&0x7f) - 0x21;
1902 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
1904 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
1905 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
1906 euc_to_utf8_2bytes_ms[c2];
1911 c1 = (c1 & 0x7f) - 0x21;
1912 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
1919 w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
1926 }else if (0xc0 <= c2 && c2 <= 0xef) {
1927 ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
1928 #ifdef NUMCHAR_OPTION
1931 if (p1) *p1 = nkf_char_unicode_new(nkf_utf8_to_unicode(c2, c1, c0, 0));
1939 #ifdef UTF8_INPUT_ENABLE
1941 w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
1943 nkf_char c1, c2, c3, c4;
1950 else if (nkf_char_unicode_bmp_p(val)){
1951 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
1952 ret = unicode_to_jis_common(c1, c2, c3, p2, p1);
1955 *p1 = nkf_char_unicode_new(val);
1961 *p1 = nkf_char_unicode_new(val);
1968 e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
1970 if (c2 == JIS_X_0201_1976_K || c2 == SS2){
1971 if (iso2022jp_f && !x0201_f) {
1972 c2 = GETA1; c1 = GETA2;
1974 c2 = JIS_X_0201_1976_K;
1978 }else if (c2 == 0x8f){
1982 if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
1983 /* encoding is eucJP-ms, so invert to Unicode Private User Area */
1984 c1 = nkf_char_unicode_new((c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC);
1987 c2 = (c2 << 8) | (c1 & 0x7f);
1989 #ifdef SHIFTJIS_CP932
1992 if (e2s_conv(c2, c1, &s2, &s1) == 0){
1993 s2e_conv(s2, s1, &c2, &c1);
2000 #endif /* SHIFTJIS_CP932 */
2002 #endif /* X0212_ENABLE */
2003 } else if ((c2 == EOF) || (c2 == 0) || c2 < SP || c2 == ISO_8859_1) {
2006 if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
2007 /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2008 c1 = nkf_char_unicode_new((c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000);
2013 #ifdef SHIFTJIS_CP932
2014 if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
2016 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2017 s2e_conv(s2, s1, &c2, &c1);
2024 #endif /* SHIFTJIS_CP932 */
2032 s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
2034 if (c2 == JIS_X_0201_1976_K || (0xA1 <= c2 && c2 <= 0xDF)) {
2035 if (iso2022jp_f && !x0201_f) {
2036 c2 = GETA1; c1 = GETA2;
2040 } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
2042 } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
2044 if(c1 == 0x7F) return 0;
2045 c1 = nkf_char_unicode_new((c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000);
2048 nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
2049 if (ret) return ret;
2056 w_iconv(nkf_char c1, nkf_char c2, nkf_char c3)
2058 nkf_char ret = 0, c4 = 0;
2059 static const char w_iconv_utf8_1st_byte[] =
2061 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2062 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2063 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
2064 40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
2071 if (c1 < 0 || 0xff < c1) {
2072 }else if (c1 == 0) { /* 0 : 1 byte*/
2074 } else if ((c1 & 0xC0) == 0x80) { /* 0x80-0xbf : trail byte */
2077 switch (w_iconv_utf8_1st_byte[c1 - 0xC0]) {
2079 if (c2 < 0x80 || 0xBF < c2) return 0;
2082 if (c3 == 0) return -1;
2083 if (c2 < 0xA0 || 0xBF < c2 || (c3 & 0xC0) != 0x80)
2088 if (c3 == 0) return -1;
2089 if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80)
2093 if (c3 == 0) return -1;
2094 if (c2 < 0x80 || 0x9F < c2 || (c3 & 0xC0) != 0x80)
2098 if (c3 == 0) return -2;
2099 if (c2 < 0x90 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2103 if (c3 == 0) return -2;
2104 if (c2 < 0x80 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2108 if (c3 == 0) return -2;
2109 if (c2 < 0x80 || 0x8F < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2117 if (c1 == 0 || c1 == EOF){
2118 } else if ((c1 & 0xf8) == 0xf0) { /* 4 bytes */
2119 c2 = nkf_char_unicode_new(nkf_utf8_to_unicode(c1, c2, c3, c4));
2122 ret = w2e_conv(c1, c2, c3, &c1, &c2);
2130 #define NKF_ICONV_INVALID_CODE_RANGE -13
2132 unicode_iconv(nkf_char wc)
2140 }else if ((wc>>11) == 27) {
2141 /* unpaired surrogate */
2142 return NKF_ICONV_INVALID_CODE_RANGE;
2143 }else if (wc < 0xFFFF) {
2144 ret = w16e_conv(wc, &c2, &c1);
2145 if (ret) return ret;
2146 }else if (wc < 0x10FFFF) {
2148 c1 = nkf_char_unicode_new(wc);
2150 return NKF_ICONV_INVALID_CODE_RANGE;
2156 #define NKF_ICONV_NEED_ONE_MORE_BYTE -1
2157 #define NKF_ICONV_NEED_TWO_MORE_BYTES -2
2158 #define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00))
2160 nkf_iconv_utf_16(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2169 if (input_endian == ENDIAN_BIG) {
2170 if (0xD8 <= c1 && c1 <= 0xDB) {
2171 if (0xDC <= c3 && c3 <= 0xDF) {
2172 wc = UTF16_TO_UTF32(c1 << 8 | c2, c3 << 8 | c4);
2173 } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2178 if (0xD8 <= c2 && c2 <= 0xDB) {
2179 if (0xDC <= c4 && c4 <= 0xDF) {
2180 wc = UTF16_TO_UTF32(c2 << 8 | c1, c4 << 8 | c3);
2181 } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2187 return (*unicode_iconv)(wc);
2191 w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
2197 w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0)
2203 nkf_iconv_utf_32(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2212 switch(input_endian){
2214 wc = c2 << 16 | c3 << 8 | c4;
2217 wc = c3 << 16 | c2 << 8 | c1;
2220 wc = c1 << 16 | c4 << 8 | c3;
2223 wc = c4 << 16 | c1 << 8 | c2;
2226 return NKF_ICONV_INVALID_CODE_RANGE;
2229 return (*unicode_iconv)(wc);
2233 #define output_ascii_escape_sequence(mode) do { \
2234 if (output_mode != ASCII && output_mode != ISO_8859_1) { \
2237 (*o_putc)(ascii_intro); \
2238 output_mode = mode; \
2243 output_escape_sequence(int mode)
2245 if (output_mode == mode)
2253 case JIS_X_0201_1976_K:
2261 (*o_putc)(kanji_intro);
2286 j_oconv(nkf_char c2, nkf_char c1)
2288 #ifdef NUMCHAR_OPTION
2289 if (c2 == 0 && nkf_char_unicode_p(c1)){
2290 w16e_conv(c1, &c2, &c1);
2291 if (c2 == 0 && nkf_char_unicode_p(c1)){
2292 c2 = c1 & VALUE_MASK;
2293 if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
2296 c2 = 0x7F + c1 / 94;
2297 c1 = 0x21 + c1 % 94;
2299 if (encode_fallback) (*encode_fallback)(c1);
2306 output_ascii_escape_sequence(ASCII);
2309 else if (c2 == EOF) {
2310 output_ascii_escape_sequence(ASCII);
2313 else if (c2 == ISO_8859_1) {
2314 output_ascii_escape_sequence(ISO_8859_1);
2317 else if (c2 == JIS_X_0201_1976_K) {
2318 output_escape_sequence(JIS_X_0201_1976_K);
2321 } else if (is_eucg3(c2)){
2322 output_escape_sequence(x0213_f ? JIS_X_0213_2 : JIS_X_0212);
2323 (*o_putc)(c2 & 0x7f);
2328 ? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1
2329 : c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1) return;
2330 output_escape_sequence(x0213_f ? JIS_X_0213_1 : JIS_X_0208);
2337 e_oconv(nkf_char c2, nkf_char c1)
2339 if (c2 == 0 && nkf_char_unicode_p(c1)){
2340 w16e_conv(c1, &c2, &c1);
2341 if (c2 == 0 && nkf_char_unicode_p(c1)){
2342 c2 = c1 & VALUE_MASK;
2343 if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
2347 c2 += c2 < 10 ? 0x75 : 0x8FEB;
2348 c1 = 0x21 + c1 % 94;
2351 (*o_putc)((c2 & 0x7f) | 0x080);
2352 (*o_putc)(c1 | 0x080);
2354 (*o_putc)((c2 & 0x7f) | 0x080);
2355 (*o_putc)(c1 | 0x080);
2359 if (encode_fallback) (*encode_fallback)(c1);
2367 } else if (c2 == 0) {
2368 output_mode = ASCII;
2370 } else if (c2 == JIS_X_0201_1976_K) {
2371 output_mode = EUC_JP;
2372 (*o_putc)(SS2); (*o_putc)(c1|0x80);
2373 } else if (c2 == ISO_8859_1) {
2374 output_mode = ISO_8859_1;
2375 (*o_putc)(c1 | 0x080);
2377 } else if (is_eucg3(c2)){
2378 output_mode = EUC_JP;
2379 #ifdef SHIFTJIS_CP932
2382 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2383 s2e_conv(s2, s1, &c2, &c1);
2388 output_mode = ASCII;
2390 }else if (is_eucg3(c2)){
2393 (*o_putc)((c2 & 0x7f) | 0x080);
2394 (*o_putc)(c1 | 0x080);
2397 (*o_putc)((c2 & 0x7f) | 0x080);
2398 (*o_putc)(c1 | 0x080);
2402 if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
2403 set_iconv(FALSE, 0);
2404 return; /* too late to rescue this char */
2406 output_mode = EUC_JP;
2407 (*o_putc)(c2 | 0x080);
2408 (*o_putc)(c1 | 0x080);
2413 s_oconv(nkf_char c2, nkf_char c1)
2415 #ifdef NUMCHAR_OPTION
2416 if (c2 == 0 && nkf_char_unicode_p(c1)){
2417 w16e_conv(c1, &c2, &c1);
2418 if (c2 == 0 && nkf_char_unicode_p(c1)){
2419 c2 = c1 & VALUE_MASK;
2420 if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
2423 c2 = c1 / 188 + (cp932inv_f ? 0xF0 : 0xEB);
2425 c1 += 0x40 + (c1 > 0x3e);
2430 if(encode_fallback)(*encode_fallback)(c1);
2439 } else if (c2 == 0) {
2440 output_mode = ASCII;
2442 } else if (c2 == JIS_X_0201_1976_K) {
2443 output_mode = SHIFT_JIS;
2445 } else if (c2 == ISO_8859_1) {
2446 output_mode = ISO_8859_1;
2447 (*o_putc)(c1 | 0x080);
2449 } else if (is_eucg3(c2)){
2450 output_mode = SHIFT_JIS;
2451 if (e2s_conv(c2, c1, &c2, &c1) == 0){
2457 if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
2458 set_iconv(FALSE, 0);
2459 return; /* too late to rescue this char */
2461 output_mode = SHIFT_JIS;
2462 e2s_conv(c2, c1, &c2, &c1);
2464 #ifdef SHIFTJIS_CP932
2466 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2467 nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
2473 #endif /* SHIFTJIS_CP932 */
2476 if (prefix_table[(unsigned char)c1]){
2477 (*o_putc)(prefix_table[(unsigned char)c1]);
2483 #ifdef UTF8_OUTPUT_ENABLE
2485 w_oconv(nkf_char c2, nkf_char c1)
2491 output_bom_f = FALSE;
2502 if (c2 == 0 && nkf_char_unicode_p(c1)){
2503 val = c1 & VALUE_MASK;
2504 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2506 if (c2) (*o_putc)(c2);
2507 if (c3) (*o_putc)(c3);
2508 if (c4) (*o_putc)(c4);
2515 val = e2w_conv(c2, c1);
2517 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2519 if (c2) (*o_putc)(c2);
2520 if (c3) (*o_putc)(c3);
2521 if (c4) (*o_putc)(c4);
2527 w_oconv16(nkf_char c2, nkf_char c1)
2530 output_bom_f = FALSE;
2531 if (output_endian == ENDIAN_LITTLE){
2545 if (c2 == 0 && nkf_char_unicode_p(c1)) {
2546 if (nkf_char_unicode_bmp_p(c1)) {
2547 c2 = (c1 >> 8) & 0xff;
2551 if (c1 <= UNICODE_MAX) {
2552 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0); /* high surrogate */
2553 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
2554 if (output_endian == ENDIAN_LITTLE){
2555 (*o_putc)(c2 & 0xff);
2556 (*o_putc)((c2 >> 8) & 0xff);
2557 (*o_putc)(c1 & 0xff);
2558 (*o_putc)((c1 >> 8) & 0xff);
2560 (*o_putc)((c2 >> 8) & 0xff);
2561 (*o_putc)(c2 & 0xff);
2562 (*o_putc)((c1 >> 8) & 0xff);
2563 (*o_putc)(c1 & 0xff);
2569 nkf_char val = e2w_conv(c2, c1);
2570 c2 = (val >> 8) & 0xff;
2575 if (output_endian == ENDIAN_LITTLE){
2585 w_oconv32(nkf_char c2, nkf_char c1)
2588 output_bom_f = FALSE;
2589 if (output_endian == ENDIAN_LITTLE){
2607 if (c2 == ISO_8859_1) {
2609 } else if (c2 == 0 && nkf_char_unicode_p(c1)) {
2612 c1 = e2w_conv(c2, c1);
2615 if (output_endian == ENDIAN_LITTLE){
2616 (*o_putc)( c1 & 0xFF);
2617 (*o_putc)((c1 >> 8) & 0xFF);
2618 (*o_putc)((c1 >> 16) & 0xFF);
2622 (*o_putc)((c1 >> 16) & 0xFF);
2623 (*o_putc)((c1 >> 8) & 0xFF);
2624 (*o_putc)( c1 & 0xFF);
2629 #define SCORE_L2 (1) /*
\e$BBh
\e(B2
\e$B?e=`4A;z
\e(B */
2630 #define SCORE_KANA (SCORE_L2 << 1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
2631 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
2632 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932
\e$B$K$h$kFI$_49$(
\e(B (IBM extended characters) */
2633 #define SCORE_X0212 (SCORE_CP932 << 1) /* JIS X 0212 */
2634 #define SCORE_NO_EXIST (SCORE_X0212 << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
2635 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME
\e$B$K$h$k;XDj
\e(B */
2636 #define SCORE_ERROR (SCORE_iMIME << 1) /*
\e$B%(%i!<
\e(B */
2638 #define SCORE_INIT (SCORE_iMIME)
2640 static const char score_table_A0[] = {
2643 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
2644 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
2647 static const char score_table_F0[] = {
2648 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
2649 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
2650 SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
2651 SCORE_CP932, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
2655 set_code_score(struct input_code *ptr, nkf_char score)
2658 ptr->score |= score;
2663 clr_code_score(struct input_code *ptr, nkf_char score)
2666 ptr->score &= ~score;
2671 code_score(struct input_code *ptr)
2673 nkf_char c2 = ptr->buf[0];
2674 #ifdef UTF8_OUTPUT_ENABLE
2675 nkf_char c1 = ptr->buf[1];
2678 set_code_score(ptr, SCORE_ERROR);
2679 }else if (c2 == SS2){
2680 set_code_score(ptr, SCORE_KANA);
2681 }else if (c2 == 0x8f){
2682 set_code_score(ptr, SCORE_X0212);
2683 #ifdef UTF8_OUTPUT_ENABLE
2684 }else if (!e2w_conv(c2, c1)){
2685 set_code_score(ptr, SCORE_NO_EXIST);
2687 }else if ((c2 & 0x70) == 0x20){
2688 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2689 }else if ((c2 & 0x70) == 0x70){
2690 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2691 }else if ((c2 & 0x70) >= 0x50){
2692 set_code_score(ptr, SCORE_L2);
2697 status_disable(struct input_code *ptr)
2702 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2706 status_push_ch(struct input_code *ptr, nkf_char c)
2708 ptr->buf[ptr->index++] = c;
2712 status_clear(struct input_code *ptr)
2719 status_reset(struct input_code *ptr)
2722 ptr->score = SCORE_INIT;
2726 status_reinit(struct input_code *ptr)
2729 ptr->_file_stat = 0;
2733 status_check(struct input_code *ptr, nkf_char c)
2735 if (c <= DEL && estab_f){
2741 s_status(struct input_code *ptr, nkf_char c)
2745 status_check(ptr, c);
2750 }else if (nkf_char_unicode_p(c)){
2752 }else if (0xa1 <= c && c <= 0xdf){
2753 status_push_ch(ptr, SS2);
2754 status_push_ch(ptr, c);
2757 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
2759 status_push_ch(ptr, c);
2760 }else if (0xed <= c && c <= 0xee){
2762 status_push_ch(ptr, c);
2763 #ifdef SHIFTJIS_CP932
2764 }else if (is_ibmext_in_sjis(c)){
2766 status_push_ch(ptr, c);
2767 #endif /* SHIFTJIS_CP932 */
2769 }else if (0xf0 <= c && c <= 0xfc){
2771 status_push_ch(ptr, c);
2772 #endif /* X0212_ENABLE */
2774 status_disable(ptr);
2778 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2779 status_push_ch(ptr, c);
2780 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2784 status_disable(ptr);
2788 #ifdef SHIFTJIS_CP932
2789 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
2790 status_push_ch(ptr, c);
2791 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
2792 set_code_score(ptr, SCORE_CP932);
2797 #endif /* SHIFTJIS_CP932 */
2798 status_disable(ptr);
2801 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2802 status_push_ch(ptr, c);
2803 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2804 set_code_score(ptr, SCORE_CP932);
2807 status_disable(ptr);
2814 e_status(struct input_code *ptr, nkf_char c)
2818 status_check(ptr, c);
2823 }else if (nkf_char_unicode_p(c)){
2825 }else if (SS2 == c || (0xa1 <= c && c <= 0xfe)){
2827 status_push_ch(ptr, c);
2829 }else if (0x8f == c){
2831 status_push_ch(ptr, c);
2832 #endif /* X0212_ENABLE */
2834 status_disable(ptr);
2838 if (0xa1 <= c && c <= 0xfe){
2839 status_push_ch(ptr, c);
2843 status_disable(ptr);
2848 if (0xa1 <= c && c <= 0xfe){
2850 status_push_ch(ptr, c);
2852 status_disable(ptr);
2854 #endif /* X0212_ENABLE */
2858 #ifdef UTF8_INPUT_ENABLE
2860 w_status(struct input_code *ptr, nkf_char c)
2864 status_check(ptr, c);
2869 }else if (nkf_char_unicode_p(c)){
2871 }else if (0xc0 <= c && c <= 0xdf){
2873 status_push_ch(ptr, c);
2874 }else if (0xe0 <= c && c <= 0xef){
2876 status_push_ch(ptr, c);
2877 }else if (0xf0 <= c && c <= 0xf4){
2879 status_push_ch(ptr, c);
2881 status_disable(ptr);
2886 if (0x80 <= c && c <= 0xbf){
2887 status_push_ch(ptr, c);
2888 if (ptr->index > ptr->stat){
2889 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2890 && ptr->buf[2] == 0xbf);
2891 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2892 &ptr->buf[0], &ptr->buf[1]);
2899 status_disable(ptr);
2903 if (0x80 <= c && c <= 0xbf){
2904 if (ptr->index < ptr->stat){
2905 status_push_ch(ptr, c);
2910 status_disable(ptr);
2918 code_status(nkf_char c)
2920 int action_flag = 1;
2921 struct input_code *result = 0;
2922 struct input_code *p = input_code_list;
2924 if (!p->status_func) {
2928 if (!p->status_func)
2930 (p->status_func)(p, c);
2933 }else if(p->stat == 0){
2944 if (result && !estab_f){
2945 set_iconv(TRUE, result->iconv_func);
2946 }else if (c <= DEL){
2947 struct input_code *ptr = input_code_list;
2961 return std_gc_buf[--std_gc_ndx];
2968 std_ungetc(nkf_char c, FILE *f)
2970 if (std_gc_ndx == STD_GC_BUFSIZE){
2973 std_gc_buf[std_gc_ndx++] = c;
2979 std_putc(nkf_char c)
2986 static unsigned char hold_buf[HOLD_SIZE*2];
2987 static int hold_count = 0;
2989 push_hold_buf(nkf_char c2)
2991 if (hold_count >= HOLD_SIZE*2)
2993 hold_buf[hold_count++] = (unsigned char)c2;
2994 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
2998 h_conv(FILE *f, int c1, int c2)
3004 /** it must NOT be in the kanji shifte sequence */
3005 /** it must NOT be written in JIS7 */
3006 /** and it must be after 2 byte 8bit code */
3012 while ((c2 = (*i_getc)(f)) != EOF) {
3018 if (push_hold_buf(c2) == EOF || estab_f) {
3024 struct input_code *p = input_code_list;
3025 struct input_code *result = p;
3030 if (p->status_func && p->score < result->score) {
3035 set_iconv(TRUE, result->iconv_func);
3040 ** 1) EOF is detected, or
3041 ** 2) Code is established, or
3042 ** 3) Buffer is FULL (but last word is pushed)
3044 ** in 1) and 3) cases, we continue to use
3045 ** Kanji codes by oconv and leave estab_f unchanged.
3050 while (hold_index < hold_count){
3051 c1 = hold_buf[hold_index++];
3055 }else if (iconv == s_iconv && 0xa1 <= c1 && c1 <= 0xdf){
3056 (*iconv)(JIS_X_0201_1976_K, c1, 0);
3059 if (hold_index < hold_count){
3060 c2 = hold_buf[hold_index++];
3070 switch ((*iconv)(c1, c2, 0)) { /* can be EUC/SJIS/UTF-8 */
3073 if (hold_index < hold_count){
3074 c3 = hold_buf[hold_index++];
3075 } else if ((c3 = (*i_getc)(f)) == EOF) {
3080 if (hold_index < hold_count){
3081 c4 = hold_buf[hold_index++];
3082 } else if ((c4 = (*i_getc)(f)) == EOF) {
3087 (*iconv)(c1, c2, (c3<<8)|c4);
3092 /* 3 bytes EUC or UTF-8 */
3093 if (hold_index < hold_count){
3094 c3 = hold_buf[hold_index++];
3095 } else if ((c3 = (*i_getc)(f)) == EOF) {
3101 (*iconv)(c1, c2, c3);
3104 if (c3 == EOF) break;
3110 * Check and Ignore BOM
3116 switch(c2 = (*i_getc)(f)){
3118 if((c2 = (*i_getc)(f)) == 0x00){
3119 if((c2 = (*i_getc)(f)) == 0xFE){
3120 if((c2 = (*i_getc)(f)) == 0xFF){
3121 if(!input_encoding){
3122 set_iconv(TRUE, w_iconv32);
3124 if (iconv == w_iconv32) {
3125 input_endian = ENDIAN_BIG;
3128 (*i_ungetc)(0xFF,f);
3129 }else (*i_ungetc)(c2,f);
3130 (*i_ungetc)(0xFE,f);
3131 }else if(c2 == 0xFF){
3132 if((c2 = (*i_getc)(f)) == 0xFE){
3133 if(!input_encoding){
3134 set_iconv(TRUE, w_iconv32);
3136 if (iconv == w_iconv32) {
3137 input_endian = ENDIAN_2143;
3140 (*i_ungetc)(0xFF,f);
3141 }else (*i_ungetc)(c2,f);
3142 (*i_ungetc)(0xFF,f);
3143 }else (*i_ungetc)(c2,f);
3144 (*i_ungetc)(0x00,f);
3145 }else (*i_ungetc)(c2,f);
3146 (*i_ungetc)(0x00,f);
3149 if((c2 = (*i_getc)(f)) == 0xBB){
3150 if((c2 = (*i_getc)(f)) == 0xBF){
3151 if(!input_encoding){
3152 set_iconv(TRUE, w_iconv);
3154 if (iconv == w_iconv) {
3157 (*i_ungetc)(0xBF,f);
3158 }else (*i_ungetc)(c2,f);
3159 (*i_ungetc)(0xBB,f);
3160 }else (*i_ungetc)(c2,f);
3161 (*i_ungetc)(0xEF,f);
3164 if((c2 = (*i_getc)(f)) == 0xFF){
3165 if((c2 = (*i_getc)(f)) == 0x00){
3166 if((c2 = (*i_getc)(f)) == 0x00){
3167 if(!input_encoding){
3168 set_iconv(TRUE, w_iconv32);
3170 if (iconv == w_iconv32) {
3171 input_endian = ENDIAN_3412;
3174 (*i_ungetc)(0x00,f);
3175 }else (*i_ungetc)(c2,f);
3176 (*i_ungetc)(0x00,f);
3177 }else (*i_ungetc)(c2,f);
3178 if(!input_encoding){
3179 set_iconv(TRUE, w_iconv16);
3181 if (iconv == w_iconv16) {
3182 input_endian = ENDIAN_BIG;
3185 (*i_ungetc)(0xFF,f);
3186 }else (*i_ungetc)(c2,f);
3187 (*i_ungetc)(0xFE,f);
3190 if((c2 = (*i_getc)(f)) == 0xFE){
3191 if((c2 = (*i_getc)(f)) == 0x00){
3192 if((c2 = (*i_getc)(f)) == 0x00){
3193 if(!input_encoding){
3194 set_iconv(TRUE, w_iconv32);
3196 if (iconv == w_iconv32) {
3197 input_endian = ENDIAN_LITTLE;
3200 (*i_ungetc)(0x00,f);
3201 }else (*i_ungetc)(c2,f);
3202 (*i_ungetc)(0x00,f);
3203 }else (*i_ungetc)(c2,f);
3204 if(!input_encoding){
3205 set_iconv(TRUE, w_iconv16);
3207 if (iconv == w_iconv16) {
3208 input_endian = ENDIAN_LITTLE;
3211 (*i_ungetc)(0xFE,f);
3212 }else (*i_ungetc)(c2,f);
3213 (*i_ungetc)(0xFF,f);
3228 init_broken_state(void)
3230 memset(&broken_state, 0, sizeof(broken_state));
3236 broken_state.buf[broken_state.count++] = c;
3240 pop_broken_buf(void)
3242 return broken_state.buf[--broken_state.count];
3246 broken_getc(FILE *f)
3250 if (broken_state.count > 0) {
3251 return pop_broken_buf();
3254 if (c=='$' && broken_state.status != ESC
3255 && (input_mode == ASCII || input_mode == JIS_X_0201_1976_K)) {
3257 broken_state.status = 0;
3258 if (c1=='@'|| c1=='B') {
3259 push_broken_buf(c1);
3266 } else if (c=='(' && broken_state.status != ESC
3267 && (input_mode == JIS_X_0208 || input_mode == JIS_X_0201_1976_K)) {
3269 broken_state.status = 0;
3270 if (c1=='J'|| c1=='B') {
3271 push_broken_buf(c1);
3279 broken_state.status = c;
3285 broken_ungetc(nkf_char c, FILE *f)
3287 if (broken_state.count < 2)
3293 eol_conv(nkf_char c2, nkf_char c1)
3295 if (guess_f && input_eol != EOF) {
3296 if (c2 == 0 && c1 == LF) {
3297 if (!input_eol) input_eol = prev_cr ? CRLF : LF;
3298 else if (input_eol != (prev_cr ? CRLF : LF)) input_eol = EOF;
3299 } else if (c2 == 0 && c1 == CR && input_eol == LF) input_eol = EOF;
3301 else if (!input_eol) input_eol = CR;
3302 else if (input_eol != CR) input_eol = EOF;
3304 if (prev_cr || (c2 == 0 && c1 == LF)) {
3306 if (eolmode_f != LF) (*o_eol_conv)(0, CR);
3307 if (eolmode_f != CR) (*o_eol_conv)(0, LF);
3309 if (c2 == 0 && c1 == CR) prev_cr = CR;
3310 else if (c2 != 0 || c1 != LF) (*o_eol_conv)(c2, c1);
3314 Return value of fold_conv()
3316 LF add newline and output char
3317 CR add newline and output nothing
3320 1 (or else) normal output
3322 fold state in prev (previous character)
3324 >0x80 Japanese (X0208/X0201)
3329 This fold algorthm does not preserve heading space in a line.
3330 This is the main difference from fmt.
3333 #define char_size(c2,c1) (c2?2:1)
3336 fold_conv(nkf_char c2, nkf_char c1)
3339 nkf_char fold_state;
3341 if (c1== CR && !fold_preserve_f) {
3342 fold_state=0; /* ignore cr */
3343 }else if (c1== LF&&f_prev==CR && fold_preserve_f) {
3345 fold_state=0; /* ignore cr */
3346 } else if (c1== BS) {
3347 if (f_line>0) f_line--;
3349 } else if (c2==EOF && f_line != 0) { /* close open last line */
3351 } else if ((c1==LF && !fold_preserve_f)
3352 || ((c1==CR||(c1==LF&&f_prev!=CR))
3353 && fold_preserve_f)) {
3355 if (fold_preserve_f) {
3359 } else if ((f_prev == c1 && !fold_preserve_f)
3360 || (f_prev == LF && fold_preserve_f)
3361 ) { /* duplicate newline */
3364 fold_state = LF; /* output two newline */
3370 if (f_prev&0x80) { /* Japanese? */
3372 fold_state = 0; /* ignore given single newline */
3373 } else if (f_prev==SP) {
3377 if (++f_line<=fold_len)
3381 fold_state = CR; /* fold and output nothing */
3385 } else if (c1=='\f') {
3388 fold_state = LF; /* output newline and clear */
3389 } else if ( (c2==0 && c1==SP)||
3390 (c2==0 && c1==TAB)||
3391 (c2=='!'&& c1=='!')) {
3392 /* X0208 kankaku or ascii space */
3394 fold_state = 0; /* remove duplicate spaces */
3397 if (++f_line<=fold_len)
3398 fold_state = SP; /* output ASCII space only */
3400 f_prev = SP; f_line = 0;
3401 fold_state = CR; /* fold and output nothing */
3405 prev0 = f_prev; /* we still need this one... , but almost done */
3407 if (c2 || c2 == JIS_X_0201_1976_K)
3408 f_prev |= 0x80; /* this is Japanese */
3409 f_line += char_size(c2,c1);
3410 if (f_line<=fold_len) { /* normal case */
3413 if (f_line>fold_len+fold_margin) { /* too many kinsoku suspension */
3414 f_line = char_size(c2,c1);
3415 fold_state = LF; /* We can't wait, do fold now */
3416 } else if (c2 == JIS_X_0201_1976_K) {
3417 /* simple kinsoku rules return 1 means no folding */
3418 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
3419 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
3420 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
3421 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
3422 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
3423 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
3424 else if (SP<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
3426 fold_state = LF;/* add one new f_line before this character */
3429 fold_state = LF;/* add one new f_line before this character */
3432 /* kinsoku point in ASCII */
3433 if ( c1==')'|| /* { [ ( */
3444 /* just after special */
3445 } else if (!is_alnum(prev0)) {
3446 f_line = char_size(c2,c1);
3448 } else if ((prev0==SP) || /* ignored new f_line */
3449 (prev0==LF)|| /* ignored new f_line */
3450 (prev0&0x80)) { /* X0208 - ASCII */
3451 f_line = char_size(c2,c1);
3452 fold_state = LF;/* add one new f_line before this character */
3454 fold_state = 1; /* default no fold in ASCII */
3458 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
3459 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
3460 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
3461 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
3462 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
3463 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
3464 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
3465 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
3466 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
3467 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
3468 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
3469 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
3470 /* default no fold in kinsoku */
3473 f_line = char_size(c2,c1);
3474 /* add one new f_line before this character */
3477 f_line = char_size(c2,c1);
3479 /* add one new f_line before this character */
3484 /* terminator process */
3485 switch(fold_state) {
3487 OCONV_NEWLINE((*o_fconv));
3493 OCONV_NEWLINE((*o_fconv));
3504 static nkf_char z_prev2=0,z_prev1=0;
3507 z_conv(nkf_char c2, nkf_char c1)
3510 /* if (c2) c1 &= 0x7f; assertion */
3512 if (c2 == JIS_X_0201_1976_K && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) {
3518 if (z_prev2 == JIS_X_0201_1976_K) {
3519 if (c2 == JIS_X_0201_1976_K) {
3520 if (c1 == (0xde&0x7f)) { /*
\e$BByE@
\e(B */
3522 (*o_zconv)(dv[(z_prev1-SP)*2], dv[(z_prev1-SP)*2+1]);
3524 } else if (c1 == (0xdf&0x7f) && ev[(z_prev1-SP)*2]) { /*
\e$BH>ByE@
\e(B */
3526 (*o_zconv)(ev[(z_prev1-SP)*2], ev[(z_prev1-SP)*2+1]);
3531 (*o_zconv)(cv[(z_prev1-SP)*2], cv[(z_prev1-SP)*2+1]);
3533 if (c2 == JIS_X_0201_1976_K) {
3534 if (dv[(c1-SP)*2] || ev[(c1-SP)*2]) {
3535 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
3540 (*o_zconv)(cv[(c1-SP)*2], cv[(c1-SP)*2+1]);
3551 if (alpha_f&1 && c2 == 0x23) {
3552 /* JISX0208 Alphabet */
3554 } else if (c2 == 0x21) {
3555 /* JISX0208 Kigou */
3560 } else if (alpha_f&4) {
3565 } else if (alpha_f&1 && 0x20<c1 && c1<0x7f && fv[c1-0x20]) {
3571 if (alpha_f&8 && c2 == 0) {
3573 const char *entity = 0;
3575 case '>': entity = ">"; break;
3576 case '<': entity = "<"; break;
3577 case '\"': entity = """; break;
3578 case '&': entity = "&"; break;
3581 while (*entity) (*o_zconv)(0, *entity++);
3587 /* JIS X 0208 Katakana to JIS X 0201 Katakana */
3592 /* U+3002 (0x8142) Ideographic Full Stop -> U+FF61 (0xA1) Halfwidth Ideographic Full Stop */
3596 /* U+300C (0x8175) Left Corner Bracket -> U+FF62 (0xA2) Halfwidth Left Corner Bracket */
3600 /* U+300D (0x8176) Right Corner Bracket -> U+FF63 (0xA3) Halfwidth Right Corner Bracket */
3604 /* U+3001 (0x8141) Ideographic Comma -> U+FF64 (0xA4) Halfwidth Ideographic Comma */
3608 /* U+30FB (0x8145) Katakana Middle Dot -> U+FF65 (0xA5) Halfwidth Katakana Middle Dot */
3612 /* U+30FC (0x815B) Katakana-Hiragana Prolonged Sound Mark -> U+FF70 (0xB0) Halfwidth Katakana-Hiragana Prolonged Sound Mark */
3616 /* U+309B (0x814A) Katakana-Hiragana Voiced Sound Mark -> U+FF9E (0xDE) Halfwidth Katakana Voiced Sound Mark */
3620 /* U+309C (0x814B) Katakana-Hiragana Semi-Voiced Sound Mark -> U+FF9F (0xDF) Halfwidth Katakana Semi-Voiced Sound Mark */
3625 (*o_zconv)(JIS_X_0201_1976_K, c);
3628 } else if (c2 == 0x25) {
3629 /* JISX0208 Katakana */
3630 static const int fullwidth_to_halfwidth[] =
3632 0x0000, 0x2700, 0x3100, 0x2800, 0x3200, 0x2900, 0x3300, 0x2A00,
3633 0x3400, 0x2B00, 0x3500, 0x3600, 0x365E, 0x3700, 0x375E, 0x3800,
3634 0x385E, 0x3900, 0x395E, 0x3A00, 0x3A5E, 0x3B00, 0x3B5E, 0x3C00,
3635 0x3C5E, 0x3D00, 0x3D5E, 0x3E00, 0x3E5E, 0x3F00, 0x3F5E, 0x4000,
3636 0x405E, 0x4100, 0x415E, 0x2F00, 0x4200, 0x425E, 0x4300, 0x435E,
3637 0x4400, 0x445E, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900, 0x4A00,
3638 0x4A5E, 0x4A5F, 0x4B00, 0x4B5E, 0x4B5F, 0x4C00, 0x4C5E, 0x4C5F,
3639 0x4D00, 0x4D5E, 0x4D5F, 0x4E00, 0x4E5E, 0x4E5F, 0x4F00, 0x5000,
3640 0x5100, 0x5200, 0x5300, 0x2C00, 0x5400, 0x2D00, 0x5500, 0x2E00,
3641 0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x5B00, 0x0000, 0x5C00,
3642 0x0000, 0x0000, 0x2600, 0x5D00, 0x335E, 0x0000, 0x0000, 0x0000,
3643 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
3645 if (fullwidth_to_halfwidth[c1-0x20]){
3646 c2 = fullwidth_to_halfwidth[c1-0x20];
3647 (*o_zconv)(JIS_X_0201_1976_K, c2>>8);
3649 (*o_zconv)(JIS_X_0201_1976_K, c2&0xFF);
3659 #define rot13(c) ( \
3661 (c <= 'M') ? (c + 13): \
3662 (c <= 'Z') ? (c - 13): \
3664 (c <= 'm') ? (c + 13): \
3665 (c <= 'z') ? (c - 13): \
3669 #define rot47(c) ( \
3671 ( c <= 'O') ? (c + 47) : \
3672 ( c <= '~') ? (c - 47) : \
3677 rot_conv(nkf_char c2, nkf_char c1)
3679 if (c2 == 0 || c2 == JIS_X_0201_1976_K || c2 == ISO_8859_1) {
3685 (*o_rot_conv)(c2,c1);
3689 hira_conv(nkf_char c2, nkf_char c1)
3693 if (0x20 < c1 && c1 < 0x74) {
3695 (*o_hira_conv)(c2,c1);
3697 } else if (c1 == 0x74 && nkf_enc_unicode_p(output_encoding)) {
3699 c1 = nkf_char_unicode_new(0x3094);
3700 (*o_hira_conv)(c2,c1);
3703 } else if (c2 == 0x21 && (c1 == 0x33 || c1 == 0x34)) {
3705 (*o_hira_conv)(c2,c1);
3710 if (c2 == 0 && c1 == nkf_char_unicode_new(0x3094)) {
3713 } else if (c2 == 0x24 && 0x20 < c1 && c1 < 0x74) {
3715 } else if (c2 == 0x21 && (c1 == 0x35 || c1 == 0x36)) {
3719 (*o_hira_conv)(c2,c1);
3724 iso2022jp_check_conv(nkf_char c2, nkf_char c1)
3726 #define RANGE_NUM_MAX 18
3727 static const nkf_char range[RANGE_NUM_MAX][2] = {
3748 nkf_char start, end, c;
3750 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
3754 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
3759 for (i = 0; i < RANGE_NUM_MAX; i++) {
3760 start = range[i][0];
3763 if (c >= start && c <= end) {
3768 (*o_iso2022jp_check_conv)(c2,c1);
3772 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
3774 static const unsigned char *mime_pattern[] = {
3775 (const unsigned char *)"\075?EUC-JP?B?",
3776 (const unsigned char *)"\075?SHIFT_JIS?B?",
3777 (const unsigned char *)"\075?ISO-8859-1?Q?",
3778 (const unsigned char *)"\075?ISO-8859-1?B?",
3779 (const unsigned char *)"\075?ISO-2022-JP?B?",
3780 (const unsigned char *)"\075?ISO-2022-JP?Q?",
3781 #if defined(UTF8_INPUT_ENABLE)
3782 (const unsigned char *)"\075?UTF-8?B?",
3783 (const unsigned char *)"\075?UTF-8?Q?",
3785 (const unsigned char *)"\075?US-ASCII?Q?",
3790 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
3791 nkf_char (*mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0) = {
3792 e_iconv, s_iconv, 0, 0, 0, 0,
3793 #if defined(UTF8_INPUT_ENABLE)
3799 static const nkf_char mime_encode[] = {
3800 EUC_JP, SHIFT_JIS, ISO_8859_1, ISO_8859_1, JIS_X_0208, JIS_X_0201_1976_K,
3801 #if defined(UTF8_INPUT_ENABLE)
3808 static const nkf_char mime_encode_method[] = {
3809 'B', 'B','Q', 'B', 'B', 'Q',
3810 #if defined(UTF8_INPUT_ENABLE)
3818 /* MIME preprocessor fifo */
3820 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
3821 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
3822 #define mime_input_buf(n) mime_input_state.buf[(n)&MIME_BUF_MASK]
3824 unsigned char buf[MIME_BUF_SIZE];
3826 unsigned int last; /* decoded */
3827 unsigned int input; /* undecoded */
3829 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
3831 #define MAXRECOVER 20
3834 mime_input_buf_unshift(nkf_char c)
3836 mime_input_buf(--mime_input_state.top) = (unsigned char)c;
3840 mime_ungetc(nkf_char c, FILE *f)
3842 mime_input_buf_unshift(c);
3847 mime_ungetc_buf(nkf_char c, FILE *f)
3850 (*i_mungetc_buf)(c,f);
3852 mime_input_buf(--mime_input_state.input) = (unsigned char)c;
3857 mime_getc_buf(FILE *f)
3859 /* we don't keep eof of mime_input_buf, becase it contains ?= as
3860 a terminator. It was checked in mime_integrity. */
3861 return ((mimebuf_f)?
3862 (*i_mgetc_buf)(f):mime_input_buf(mime_input_state.input++));
3866 switch_mime_getc(void)
3868 if (i_getc!=mime_getc) {
3869 i_mgetc = i_getc; i_getc = mime_getc;
3870 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
3871 if(mime_f==STRICT_MIME) {
3872 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
3873 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
3879 unswitch_mime_getc(void)
3881 if(mime_f==STRICT_MIME) {
3882 i_mgetc = i_mgetc_buf;
3883 i_mungetc = i_mungetc_buf;
3886 i_ungetc = i_mungetc;
3887 if(mime_iconv_back)set_iconv(FALSE, mime_iconv_back);
3888 mime_iconv_back = NULL;
3892 mime_integrity(FILE *f, const unsigned char *p)
3896 /* In buffered mode, read until =? or NL or buffer full
3898 mime_input_state.input = mime_input_state.top;
3899 mime_input_state.last = mime_input_state.top;
3901 while(*p) mime_input_buf(mime_input_state.input++) = *p++;
3903 q = mime_input_state.input;
3904 while((c=(*i_getc)(f))!=EOF) {
3905 if (((mime_input_state.input-mime_input_state.top)&MIME_BUF_MASK)==0) {
3906 break; /* buffer full */
3908 if (c=='=' && d=='?') {
3909 /* checked. skip header, start decode */
3910 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
3911 /* mime_last_input = mime_input_state.input; */
3912 mime_input_state.input = q;
3916 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
3918 /* Should we check length mod 4? */
3919 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
3922 /* In case of Incomplete MIME, no MIME decode */
3923 mime_input_buf(mime_input_state.input++) = (unsigned char)c;
3924 mime_input_state.last = mime_input_state.input; /* point undecoded buffer */
3925 mime_decode_mode = 1; /* no decode on mime_input_buf last in mime_getc */
3926 switch_mime_getc(); /* anyway we need buffered getc */
3931 mime_begin_strict(FILE *f)
3935 const unsigned char *p,*q;
3936 nkf_char r[MAXRECOVER]; /* recovery buffer, max mime pattern length */
3938 mime_decode_mode = FALSE;
3939 /* =? has been checked */
3941 p = mime_pattern[j];
3944 for(i=2;p[i]>SP;i++) { /* start at =? */
3945 if (((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i]) {
3946 /* pattern fails, try next one */
3948 while (mime_pattern[++j]) {
3949 p = mime_pattern[j];
3950 for(k=2;k<i;k++) /* assume length(p) > i */
3951 if (p[k]!=q[k]) break;
3952 if (k==i && nkf_toupper(c1)==p[k]) break;
3954 p = mime_pattern[j];
3955 if (p) continue; /* found next one, continue */
3956 /* all fails, output from recovery buffer */
3964 mime_decode_mode = p[i-2];
3966 mime_iconv_back = iconv;
3967 set_iconv(FALSE, mime_priority_func[j]);
3968 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
3970 if (mime_decode_mode=='B') {
3971 mimebuf_f = unbuf_f;
3973 /* do MIME integrity check */
3974 return mime_integrity(f,mime_pattern[j]);
3988 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
3989 /* re-read and convert again from mime_buffer. */
3991 /* =? has been checked */
3992 k = mime_input_state.last;
3993 mime_input_buf(mime_input_state.last++)='='; mime_input_buf(mime_input_state.last++)='?';
3994 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
3995 /* We accept any character type even if it is breaked by new lines */
3996 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
3997 if (c1==LF||c1==SP||c1==CR||
3998 c1=='-'||c1=='_'||is_alnum(c1)) continue;
4000 /* Failed. But this could be another MIME preemble */
4002 mime_input_state.last--;
4008 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4009 if (!(++i<MAXRECOVER) || c1==EOF) break;
4010 if (c1=='b'||c1=='B') {
4011 mime_decode_mode = 'B';
4012 } else if (c1=='q'||c1=='Q') {
4013 mime_decode_mode = 'Q';
4017 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4018 if (!(++i<MAXRECOVER) || c1==EOF) break;
4020 mime_decode_mode = FALSE;
4026 if (!mime_decode_mode) {
4027 /* false MIME premble, restart from mime_buffer */
4028 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
4029 /* Since we are in MIME mode until buffer becomes empty, */
4030 /* we never go into mime_begin again for a while. */
4033 /* discard mime preemble, and goto MIME mode */
4034 mime_input_state.last = k;
4035 /* do no MIME integrity check */
4036 return c1; /* used only for checking EOF */
4047 debug(const char *str)
4050 fprintf(stderr, "%s\n", str ? str : "NULL");
4056 set_input_codename(const char *codename)
4058 if (!input_codename) {
4059 input_codename = codename;
4060 } else if (strcmp(codename, input_codename) != 0) {
4061 input_codename = "";
4066 get_guessed_code(void)
4068 if (input_codename && !*input_codename) {
4069 input_codename = "BINARY";
4071 struct input_code *p = find_inputcode_byfunc(iconv);
4072 if (!input_codename) {
4073 input_codename = "ASCII";
4074 } else if (strcmp(input_codename, "Shift_JIS") == 0) {
4075 if (p->score & (SCORE_DEPEND|SCORE_CP932))
4076 input_codename = "CP932";
4077 } else if (strcmp(input_codename, "EUC-JP") == 0) {
4078 if (p->score & (SCORE_X0212))
4079 input_codename = "EUCJP-MS";
4080 else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4081 input_codename = "CP51932";
4082 } else if (strcmp(input_codename, "ISO-2022-JP") == 0) {
4083 if (p->score & (SCORE_KANA))
4084 input_codename = "CP50221";
4085 else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4086 input_codename = "CP50220";
4089 return input_codename;
4092 #if !defined(PERL_XS) && !defined(WIN32DLL)
4094 print_guessed_code(char *filename)
4096 if (filename != NULL) printf("%s: ", filename);
4097 if (input_codename && !*input_codename) {
4100 input_codename = get_guessed_code();
4102 printf("%s\n", input_codename);
4106 input_eol == CR ? " (CR)" :
4107 input_eol == LF ? " (LF)" :
4108 input_eol == CRLF ? " (CRLF)" :
4109 input_eol == EOF ? " (MIXED NL)" :
4119 hex_getc(nkf_char ch, FILE *f, nkf_char (*g)(FILE *f), nkf_char (*u)(nkf_char c, FILE *f))
4121 nkf_char c1, c2, c3;
4127 if (!nkf_isxdigit(c2)){
4132 if (!nkf_isxdigit(c3)){
4137 return (hex2bin(c2) << 4) | hex2bin(c3);
4143 return hex_getc(':', f, i_cgetc, i_cungetc);
4147 cap_ungetc(nkf_char c, FILE *f)
4149 return (*i_cungetc)(c, f);
4155 return hex_getc('%', f, i_ugetc, i_uungetc);
4159 url_ungetc(nkf_char c, FILE *f)
4161 return (*i_uungetc)(c, f);
4165 #ifdef NUMCHAR_OPTION
4167 numchar_getc(FILE *f)
4169 nkf_char (*g)(FILE *) = i_ngetc;
4170 nkf_char (*u)(nkf_char c ,FILE *f) = i_nungetc;
4181 if (buf[i] == 'x' || buf[i] == 'X'){
4182 for (j = 0; j < 7; j++){
4184 if (!nkf_isxdigit(buf[i])){
4191 c |= hex2bin(buf[i]);
4194 for (j = 0; j < 8; j++){
4198 if (!nkf_isdigit(buf[i])){
4205 c += hex2bin(buf[i]);
4211 return nkf_char_unicode_new(c);
4221 numchar_ungetc(nkf_char c, FILE *f)
4223 return (*i_nungetc)(c, f);
4227 #ifdef UNICODE_NORMALIZATION
4236 nkf_ary_new(int length)
4238 nkf_ary *ary = nkf_malloc(sizeof(nkf_ary));
4239 ary->ary = nkf_malloc(length);
4240 ary->max_length = length;
4246 nkf_ary_dispose(nkf_ary *ary)
4252 #define nkf_ary_length(ary) ((ary)->count)
4253 #define nkf_ary_empty_p(ary) ((ary)->count == 0)
4255 static unsigned char
4256 nkf_ary_at(nkf_ary *ary, int index)
4258 assert(index <= ary->count);
4259 return ary->ary[index];
4263 nkf_ary_clear(nkf_ary *ary)
4268 static unsigned char
4269 nkf_ary_push(nkf_ary *ary, nkf_char c)
4271 assert(ary->max_length > ary->count);
4272 ary->ary[ary->count++] = c;
4276 static unsigned char
4277 nkf_ary_pop(nkf_ary *ary)
4279 assert(0 < ary->count);
4280 return ary->ary[--ary->count];
4283 /* Normalization Form C */
4287 nkf_char (*g)(FILE *f) = i_nfc_getc;
4288 nkf_char (*u)(nkf_char c ,FILE *f) = i_nfc_ungetc;
4289 nkf_ary *buf = nkf_ary_new(9);
4290 const unsigned char *array;
4291 int lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
4292 nkf_char c = (*g)(f);
4294 if (c == EOF || c > 0xFF || (c & 0xc0) == 0x80) return c;
4296 nkf_ary_push(buf, (unsigned char)c);
4298 while (lower <= upper) {
4299 int mid = (lower+upper) / 2;
4301 array = normalization_table[mid].nfd;
4302 for (len=0; len < NORMALIZATION_TABLE_NFD_LENGTH && array[len]; len++) {
4303 if (len >= nkf_ary_length(buf)) {
4307 lower = 1, upper = 0;
4310 nkf_ary_push(buf, c);
4312 if (array[len] != nkf_ary_at(buf, len)) {
4313 if (array[len] < nkf_ary_at(buf, len)) lower = mid + 1;
4314 else upper = mid - 1;
4321 array = normalization_table[mid].nfc;
4323 for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++)
4324 nkf_ary_push(buf, array[i]);
4328 } while (lower <= upper);
4330 while (nkf_ary_length(buf) > 1) (*u)(nkf_ary_pop(buf), f);
4331 c = nkf_ary_pop(buf);
4332 nkf_ary_dispose(buf);
4338 nfc_ungetc(nkf_char c, FILE *f)
4340 return (*i_nfc_ungetc)(c, f);
4342 #endif /* UNICODE_NORMALIZATION */
4346 base64decode(nkf_char c)
4351 i = c - 'A'; /* A..Z 0-25 */
4352 } else if (c == '_') {
4353 i = '?' /* 63 */ ; /* _ 63 */
4355 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
4357 } else if (c > '/') {
4358 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
4359 } else if (c == '+' || c == '-') {
4360 i = '>' /* 62 */ ; /* + and - 62 */
4362 i = '?' /* 63 */ ; /* / 63 */
4370 nkf_char c1, c2, c3, c4, cc;
4371 nkf_char t1, t2, t3, t4, mode, exit_mode;
4372 nkf_char lwsp_count;
4375 nkf_char lwsp_size = 128;
4377 if (mime_input_state.top != mime_input_state.last) { /* Something is in FIFO */
4378 return mime_input_buf(mime_input_state.top++);
4380 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
4381 mime_decode_mode=FALSE;
4382 unswitch_mime_getc();
4383 return (*i_getc)(f);
4386 if (mimebuf_f == FIXED_MIME)
4387 exit_mode = mime_decode_mode;
4390 if (mime_decode_mode == 'Q') {
4391 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4393 if (c1=='_' && mimebuf_f != FIXED_MIME) return SP;
4394 if (c1<=SP || DEL<=c1) {
4395 mime_decode_mode = exit_mode; /* prepare for quit */
4398 if (c1!='=' && (c1!='?' || mimebuf_f == FIXED_MIME)) {
4402 mime_decode_mode = exit_mode; /* prepare for quit */
4403 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
4404 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
4405 /* end Q encoding */
4406 input_mode = exit_mode;
4408 lwsp_buf = nkf_malloc((lwsp_size+5)*sizeof(char));
4409 while ((c1=(*i_getc)(f))!=EOF) {
4414 if ((c1=(*i_getc)(f))!=EOF && (c1==SP||c1==TAB)) {
4422 if ((c1=(*i_getc)(f))!=EOF && c1 == LF) {
4423 if ((c1=(*i_getc)(f))!=EOF && (c1==SP||c1==TAB)) {
4438 lwsp_buf[lwsp_count] = (unsigned char)c1;
4439 if (lwsp_count++>lwsp_size){
4441 lwsp_buf_new = nkf_realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4442 lwsp_buf = lwsp_buf_new;
4448 if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
4450 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4451 i_ungetc(lwsp_buf[lwsp_count],f);
4457 if (c1=='='&&c2<SP) { /* this is soft wrap */
4458 while((c1 = (*i_mgetc)(f)) <=SP) {
4459 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4461 mime_decode_mode = 'Q'; /* still in MIME */
4462 goto restart_mime_q;
4465 mime_decode_mode = 'Q'; /* still in MIME */
4469 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
4470 if (c2<=SP) return c2;
4471 mime_decode_mode = 'Q'; /* still in MIME */
4472 return ((hex2bin(c2)<<4) + hex2bin(c3));
4475 if (mime_decode_mode != 'B') {
4476 mime_decode_mode = FALSE;
4477 return (*i_mgetc)(f);
4481 /* Base64 encoding */
4483 MIME allows line break in the middle of
4484 Base64, but we are very pessimistic in decoding
4485 in unbuf mode because MIME encoded code may broken by
4486 less or editor's control sequence (such as ESC-[-K in unbuffered
4487 mode. ignore incomplete MIME.
4489 mode = mime_decode_mode;
4490 mime_decode_mode = exit_mode; /* prepare for quit */
4492 while ((c1 = (*i_mgetc)(f))<=SP) {
4497 if ((c2 = (*i_mgetc)(f))<=SP) {
4500 if (mime_f != STRICT_MIME) goto mime_c2_retry;
4501 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4504 if ((c1 == '?') && (c2 == '=')) {
4507 lwsp_buf = nkf_malloc((lwsp_size+5)*sizeof(char));
4508 while ((c1=(*i_getc)(f))!=EOF) {
4513 if ((c1=(*i_getc)(f))!=EOF && (c1==SP||c1==TAB)) {
4521 if ((c1=(*i_getc)(f))!=EOF) {
4525 } else if ((c1=(*i_getc)(f))!=EOF && (c1==SP||c1==TAB)) {
4540 lwsp_buf[lwsp_count] = (unsigned char)c1;
4541 if (lwsp_count++>lwsp_size){
4543 lwsp_buf_new = nkf_realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4544 lwsp_buf = lwsp_buf_new;
4550 if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
4552 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4553 i_ungetc(lwsp_buf[lwsp_count],f);
4560 if ((c3 = (*i_mgetc)(f))<=SP) {
4563 if (mime_f != STRICT_MIME) goto mime_c3_retry;
4564 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4568 if ((c4 = (*i_mgetc)(f))<=SP) {
4571 if (mime_f != STRICT_MIME) goto mime_c4_retry;
4572 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4576 mime_decode_mode = mode; /* still in MIME sigh... */
4578 /* BASE 64 decoding */
4580 t1 = 0x3f & base64decode(c1);
4581 t2 = 0x3f & base64decode(c2);
4582 t3 = 0x3f & base64decode(c3);
4583 t4 = 0x3f & base64decode(c4);
4584 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
4586 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
4587 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
4589 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
4590 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
4592 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
4597 return mime_input_buf(mime_input_state.top++);
4600 static const char basis_64[] =
4601 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
4603 #define MIMEOUT_BUF_LENGTH (60)
4605 char buf[MIMEOUT_BUF_LENGTH+1];
4610 /*nkf_char mime_lastchar2, mime_lastchar1;*/
4613 open_mime(nkf_char mode)
4615 const unsigned char *p;
4618 p = mime_pattern[0];
4619 for(i=0;mime_pattern[i];i++) {
4620 if (mode == mime_encode[i]) {
4621 p = mime_pattern[i];
4625 mimeout_mode = mime_encode_method[i];
4627 if (base64_count>45) {
4628 if (mimeout_state.count>0 && nkf_isblank(mimeout_state.buf[i])){
4629 (*o_mputc)(mimeout_state.buf[i]);
4632 PUT_NEWLINE((*o_mputc));
4635 if (mimeout_state.count>0
4636 && (mimeout_state.buf[i]==SP || mimeout_state.buf[i]==TAB
4637 || mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF)) {
4641 for (;i<mimeout_state.count;i++) {
4642 if (mimeout_state.buf[i]==SP || mimeout_state.buf[i]==TAB
4643 || mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF) {
4644 (*o_mputc)(mimeout_state.buf[i]);
4654 j = mimeout_state.count;
4655 mimeout_state.count = 0;
4657 mime_putc(mimeout_state.buf[i]);
4662 mime_prechar(nkf_char c2, nkf_char c1)
4664 if (mimeout_mode > 0){
4666 if (base64_count + mimeout_state.count/3*4> 73){
4667 (*o_base64conv)(EOF,0);
4668 OCONV_NEWLINE((*o_base64conv));
4669 (*o_base64conv)(0,SP);
4673 if (base64_count + mimeout_state.count/3*4> 66) {
4674 (*o_base64conv)(EOF,0);
4675 OCONV_NEWLINE((*o_base64conv));
4676 (*o_base64conv)(0,SP);
4682 if (c2 != EOF && base64_count + mimeout_state.count/3*4> 60) {
4683 mimeout_mode = (output_mode==ASCII ||output_mode == ISO_8859_1) ? 'Q' : 'B';
4684 open_mime(output_mode);
4685 (*o_base64conv)(EOF,0);
4686 OCONV_NEWLINE((*o_base64conv));
4687 (*o_base64conv)(0,SP);
4706 switch(mimeout_mode) {
4711 (*o_mputc)(basis_64[((mimeout_state.state & 0x3)<< 4)]);
4717 (*o_mputc)(basis_64[((mimeout_state.state & 0xF) << 2)]);
4722 if (mimeout_mode > 0) {
4723 if (mimeout_f!=FIXED_MIME) {
4725 } else if (mimeout_mode != 'Q')
4731 mimeout_addchar(nkf_char c)
4733 switch(mimeout_mode) {
4738 } else if(!nkf_isalnum(c)) {
4740 (*o_mputc)(bin2hex(((c>>4)&0xf)));
4741 (*o_mputc)(bin2hex((c&0xf)));
4749 mimeout_state.state=c;
4750 (*o_mputc)(basis_64[c>>2]);
4755 (*o_mputc)(basis_64[((mimeout_state.state & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
4756 mimeout_state.state=c;
4761 (*o_mputc)(basis_64[((mimeout_state.state & 0xF) << 2) | ((c & 0xC0) >>6)]);
4762 (*o_mputc)(basis_64[c & 0x3F]);
4774 mime_putc(nkf_char c)
4779 if (mimeout_f == FIXED_MIME){
4780 if (mimeout_mode == 'Q'){
4781 if (base64_count > 71){
4782 if (c!=CR && c!=LF) {
4784 PUT_NEWLINE((*o_mputc));
4789 if (base64_count > 71){
4791 PUT_NEWLINE((*o_mputc));
4794 if (c == EOF) { /* c==EOF */
4798 if (c != EOF) { /* c==EOF */
4804 /* mimeout_f != FIXED_MIME */
4806 if (c == EOF) { /* c==EOF */
4807 if (mimeout_mode == -1 && mimeout_state.count > 1) open_mime(output_mode);
4808 j = mimeout_state.count;
4809 mimeout_state.count = 0;
4811 if (mimeout_mode > 0) {
4812 if (!nkf_isblank(mimeout_state.buf[j-1])) {
4814 if (nkf_isspace(mimeout_state.buf[i]) && base64_count < 71){
4817 mimeout_addchar(mimeout_state.buf[i]);
4821 mimeout_addchar(mimeout_state.buf[i]);
4825 mimeout_addchar(mimeout_state.buf[i]);
4831 mimeout_addchar(mimeout_state.buf[i]);
4837 if (mimeout_state.count > 0){
4838 lastchar = mimeout_state.buf[mimeout_state.count - 1];
4843 if (mimeout_mode=='Q') {
4844 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
4845 if (c == CR || c == LF) {
4850 } else if (c <= SP) {
4852 if (base64_count > 70) {
4853 PUT_NEWLINE((*o_mputc));
4856 if (!nkf_isblank(c)) {
4861 if (base64_count > 70) {
4863 PUT_NEWLINE((*o_mputc));
4866 open_mime(output_mode);
4868 if (!nkf_noescape_mime(c)) {
4879 if (mimeout_mode <= 0) {
4880 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
4881 if (nkf_isspace(c)) {
4883 if (mimeout_mode == -1) {
4886 if (c==CR || c==LF) {
4888 open_mime(output_mode);
4894 for (i=0;i<mimeout_state.count;i++) {
4895 (*o_mputc)(mimeout_state.buf[i]);
4896 if (mimeout_state.buf[i] == CR || mimeout_state.buf[i] == LF){
4907 mimeout_state.buf[0] = (char)c;
4908 mimeout_state.count = 1;
4910 if (base64_count > 1
4911 && base64_count + mimeout_state.count > 76
4912 && mimeout_state.buf[0] != CR && mimeout_state.buf[0] != LF){
4913 PUT_NEWLINE((*o_mputc));
4915 if (!nkf_isspace(mimeout_state.buf[0])){
4920 mimeout_state.buf[mimeout_state.count++] = (char)c;
4921 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
4922 open_mime(output_mode);
4927 if (lastchar==CR || lastchar == LF){
4928 for (i=0;i<mimeout_state.count;i++) {
4929 (*o_mputc)(mimeout_state.buf[i]);
4932 mimeout_state.count = 0;
4935 for (i=0;i<mimeout_state.count-1;i++) {
4936 (*o_mputc)(mimeout_state.buf[i]);
4939 mimeout_state.buf[0] = SP;
4940 mimeout_state.count = 1;
4942 open_mime(output_mode);
4945 /* mimeout_mode == 'B', 1, 2 */
4946 if ( c<=DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
4947 if (lastchar == CR || lastchar == LF){
4948 if (nkf_isblank(c)) {
4949 for (i=0;i<mimeout_state.count;i++) {
4950 mimeout_addchar(mimeout_state.buf[i]);
4952 mimeout_state.count = 0;
4953 } else if (SP<c && c<DEL) {
4955 for (i=0;i<mimeout_state.count;i++) {
4956 (*o_mputc)(mimeout_state.buf[i]);
4959 mimeout_state.count = 0;
4961 mimeout_state.buf[mimeout_state.count++] = (char)c;
4964 if (c==SP || c==TAB || c==CR || c==LF) {
4965 for (i=0;i<mimeout_state.count;i++) {
4966 if (SP<mimeout_state.buf[i] && mimeout_state.buf[i]<DEL) {
4968 for (i=0;i<mimeout_state.count;i++) {
4969 (*o_mputc)(mimeout_state.buf[i]);
4972 mimeout_state.count = 0;
4975 mimeout_state.buf[mimeout_state.count++] = (char)c;
4976 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
4978 for (i=0;i<mimeout_state.count;i++) {
4979 (*o_mputc)(mimeout_state.buf[i]);
4982 mimeout_state.count = 0;
4986 if (mimeout_state.count>0 && SP<c && c!='=') {
4987 mimeout_state.buf[mimeout_state.count++] = (char)c;
4988 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
4989 j = mimeout_state.count;
4990 mimeout_state.count = 0;
4992 mimeout_addchar(mimeout_state.buf[i]);
4999 if (mimeout_state.count>0) {
5000 j = mimeout_state.count;
5001 mimeout_state.count = 0;
5003 if (mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF)
5005 mimeout_addchar(mimeout_state.buf[i]);
5011 (*o_mputc)(mimeout_state.buf[i]);
5013 open_mime(output_mode);
5020 base64_conv(nkf_char c2, nkf_char c1)
5022 mime_prechar(c2, c1);
5023 (*o_base64conv)(c2,c1);
5027 typedef struct nkf_iconv_t {
5030 size_t input_buffer_size;
5031 char *output_buffer;
5032 size_t output_buffer_size;
5036 nkf_iconv_new(char *tocode, char *fromcode)
5038 nkf_iconv_t converter;
5040 converter->input_buffer_size = IOBUF_SIZE;
5041 converter->input_buffer = nkf_malloc(converter->input_buffer_size);
5042 converter->output_buffer_size = IOBUF_SIZE * 2;
5043 converter->output_buffer = nkf_malloc(converter->output_buffer_size);
5044 converter->cd = iconv_open(tocode, fromcode);
5045 if (converter->cd == (iconv_t)-1)
5049 perror(fprintf("iconv doesn't support %s to %s conversion.", fromcode, tocode));
5052 perror("can't iconv_open");
5058 nkf_iconv_convert(nkf_iconv_t *converter, FILE *input)
5060 size_t invalid = (size_t)0;
5061 char *input_buffer = converter->input_buffer;
5062 size_t input_length = (size_t)0;
5063 char *output_buffer = converter->output_buffer;
5064 size_t output_length = converter->output_buffer_size;
5069 while ((c = (*i_getc)(f)) != EOF) {
5070 input_buffer[input_length++] = c;
5071 if (input_length < converter->input_buffer_size) break;
5075 size_t ret = iconv(converter->cd, &input_buffer, &input_length, &output_buffer, &output_length);
5076 while (output_length-- > 0) {
5077 (*o_putc)(output_buffer[converter->output_buffer_size-output_length]);
5079 if (ret == (size_t) - 1) {
5082 if (input_buffer != converter->input_buffer)
5083 memmove(converter->input_buffer, input_buffer, input_length);
5086 converter->output_buffer_size *= 2;
5087 output_buffer = realloc(converter->outbuf, converter->output_buffer_size);
5088 if (output_buffer == NULL) {
5089 perror("can't realloc");
5092 converter->output_buffer = output_buffer;
5095 perror("can't iconv");
5108 nkf_iconv_close(nkf_iconv_t *convert)
5110 nkf_free(converter->inbuf);
5111 nkf_free(converter->outbuf);
5112 iconv_close(converter->cd);
5121 struct input_code *p = input_code_list;
5133 mime_f = MIME_DECODE_DEFAULT;
5134 mime_decode_f = FALSE;
5139 x0201_f = X0201_DEFAULT;
5140 iso2022jp_f = FALSE;
5141 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
5142 ms_ucs_map_f = UCS_MAP_ASCII;
5144 #ifdef UTF8_INPUT_ENABLE
5145 no_cp932ext_f = FALSE;
5146 no_best_fit_chars_f = FALSE;
5147 encode_fallback = NULL;
5148 unicode_subchar = '?';
5149 input_endian = ENDIAN_BIG;
5151 #ifdef UTF8_OUTPUT_ENABLE
5152 output_bom_f = FALSE;
5153 output_endian = ENDIAN_BIG;
5155 #ifdef UNICODE_NORMALIZATION
5171 #ifdef SHIFTJIS_CP932
5181 for (i = 0; i < 256; i++){
5182 prefix_table[i] = 0;
5186 mimeout_state.count = 0;
5191 fold_preserve_f = FALSE;
5194 kanji_intro = DEFAULT_J;
5195 ascii_intro = DEFAULT_R;
5196 fold_margin = FOLD_MARGIN;
5197 o_zconv = no_connection;
5198 o_fconv = no_connection;
5199 o_eol_conv = no_connection;
5200 o_rot_conv = no_connection;
5201 o_hira_conv = no_connection;
5202 o_base64conv = no_connection;
5203 o_iso2022jp_check_conv = no_connection;
5206 i_ungetc = std_ungetc;
5208 i_bungetc = std_ungetc;
5211 i_mungetc = std_ungetc;
5212 i_mgetc_buf = std_getc;
5213 i_mungetc_buf = std_ungetc;
5214 output_mode = ASCII;
5216 mime_decode_mode = FALSE;
5222 init_broken_state();
5223 z_prev2=0,z_prev1=0;
5225 iconv_for_check = 0;
5227 input_codename = NULL;
5228 input_encoding = NULL;
5229 output_encoding = NULL;
5236 module_connection(void)
5238 if (input_encoding) set_input_encoding(input_encoding);
5239 if (!output_encoding) {
5240 output_encoding = nkf_default_encoding();
5242 if (!output_encoding) {
5243 if (noout_f || guess_f) output_encoding = nkf_enc_from_index(ISO_2022_JP);
5246 set_output_encoding(output_encoding);
5247 oconv = nkf_enc_to_oconv(output_encoding);
5250 /* replace continucation module, from output side */
5252 /* output redicrection */
5254 if (noout_f || guess_f){
5261 if (mimeout_f == TRUE) {
5262 o_base64conv = oconv; oconv = base64_conv;
5264 /* base64_count = 0; */
5267 if (eolmode_f || guess_f) {
5268 o_eol_conv = oconv; oconv = eol_conv;
5271 o_rot_conv = oconv; oconv = rot_conv;
5274 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
5277 o_hira_conv = oconv; oconv = hira_conv;
5280 o_fconv = oconv; oconv = fold_conv;
5283 if (alpha_f || x0201_f) {
5284 o_zconv = oconv; oconv = z_conv;
5288 i_ungetc = std_ungetc;
5289 /* input redicrection */
5292 i_cgetc = i_getc; i_getc = cap_getc;
5293 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
5296 i_ugetc = i_getc; i_getc = url_getc;
5297 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
5300 #ifdef NUMCHAR_OPTION
5302 i_ngetc = i_getc; i_getc = numchar_getc;
5303 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
5306 #ifdef UNICODE_NORMALIZATION
5308 i_nfc_getc = i_getc; i_getc = nfc_getc;
5309 i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
5312 if (mime_f && mimebuf_f==FIXED_MIME) {
5313 i_mgetc = i_getc; i_getc = mime_getc;
5314 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
5317 i_bgetc = i_getc; i_getc = broken_getc;
5318 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
5320 if (input_encoding) {
5321 set_iconv(-TRUE, nkf_enc_to_iconv(input_encoding));
5323 set_iconv(FALSE, e_iconv);
5327 struct input_code *p = input_code_list;
5336 Conversion main loop. Code detection only.
5339 #if !defined(PERL_XS) && !defined(WIN32DLL)
5346 module_connection();
5347 while ((c = (*i_getc)(f)) != EOF)
5354 #define NEXT continue /* no output, get next */
5355 #define SKIP c2=0;continue /* no output, get next */
5356 #define MORE c2=c1;continue /* need one more byte */
5357 #define SEND ; /* output c1 and c2, get next */
5358 #define LAST break /* end of loop, go closing */
5359 #define set_input_mode(mode) do { \
5360 input_mode = mode; \
5362 set_input_codename("ISO-2022-JP"); \
5363 debug("ISO-2022-JP"); \
5367 kanji_convert(FILE *f)
5369 nkf_char c1=0, c2=0, c3=0, c4=0;
5370 int shift_mode = 0; /* 0, 1, 2, 3 */
5372 int is_8bit = FALSE;
5374 if (input_encoding && !nkf_enc_asciicompat(input_encoding)) {
5379 output_mode = ASCII;
5381 if (module_connection() < 0) {
5382 #if !defined(PERL_XS) && !defined(WIN32DLL)
5383 fprintf(stderr, "no output encoding given\n");
5389 #ifdef UTF8_INPUT_ENABLE
5390 if(iconv == w_iconv32){
5391 while ((c1 = (*i_getc)(f)) != EOF &&
5392 (c2 = (*i_getc)(f)) != EOF &&
5393 (c3 = (*i_getc)(f)) != EOF &&
5394 (c4 = (*i_getc)(f)) != EOF) {
5395 nkf_iconv_utf_32(c1, c2, c3, c4);
5397 (*i_ungetc)(EOF, f);
5399 else if (iconv == w_iconv16) {
5400 while ((c1 = (*i_getc)(f)) != EOF &&
5401 (c2 = (*i_getc)(f)) != EOF) {
5402 if (nkf_iconv_utf_16(c1, c2, 0, 0) == -2 &&
5403 (c3 = (*i_getc)(f)) != EOF &&
5404 (c4 = (*i_getc)(f)) != EOF) {
5405 nkf_iconv_utf_16(c1, c2, c3, c4);
5408 (*i_ungetc)(EOF, f);
5412 while ((c1 = (*i_getc)(f)) != EOF) {
5413 #ifdef INPUT_CODE_FIX
5414 if (!input_encoding)
5420 /* in case of 8th bit is on */
5421 if (!estab_f&&!mime_decode_mode) {
5422 /* in case of not established yet */
5423 /* It is still ambiguious */
5424 if (h_conv(f, c2, c1)==EOF) {
5432 /* in case of already established */
5434 /* ignore bogus code */
5442 /* 2nd byte of 7 bit code or SJIS */
5446 else if (nkf_char_unicode_p(c1)) {
5452 if (input_mode == JIS_X_0208 && DEL <= c1 && c1 < 0x92) {
5455 } else if (c1 > DEL) {
5457 if (!estab_f && !iso8859_f) {
5458 /* not established yet */
5460 } else { /* estab_f==TRUE */
5466 else if ((iconv == s_iconv && 0xA0 <= c1 && c1 <= 0xDF) ||
5467 (ms_ucs_map_f == UCS_MAP_CP10001 && (c1 == 0xFD || c1 == 0xFE))) {
5469 c2 = JIS_X_0201_1976_K;
5474 /* already established */
5478 } else if (SP < c1 && c1 < DEL) {
5479 /* in case of Roman characters */
5481 /* output 1 shifted byte */
5485 } else if (nkf_byte_jisx0201_katakana_p(c1)){
5486 /* output 1 shifted byte */
5487 c2 = JIS_X_0201_1976_K;
5490 /* look like bogus code */
5493 } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
5494 input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) {
5495 /* in case of Kanji shifted */
5497 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
5498 /* Check MIME code */
5499 if ((c1 = (*i_getc)(f)) == EOF) {
5502 } else if (c1 == '?') {
5503 /* =? is mime conversion start sequence */
5504 if(mime_f == STRICT_MIME) {
5505 /* check in real detail */
5506 if (mime_begin_strict(f) == EOF)
5509 } else if (mime_begin(f) == EOF)
5518 /* normal ASCII code */
5521 } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
5524 } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
5527 } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
5528 if ((c1 = (*i_getc)(f)) == EOF) {
5529 /* (*oconv)(0, ESC); don't send bogus code */
5532 else if (c1 == '&') {
5534 if ((c1 = (*i_getc)(f)) == EOF) {
5540 else if (c1 == '$') {
5542 if ((c1 = (*i_getc)(f)) == EOF) {
5543 /* don't send bogus code
5545 (*oconv)(0, '$'); */
5547 } else if (c1 == '@' || c1 == 'B') {
5549 set_input_mode(JIS_X_0208);
5551 } else if (c1 == '(') {
5553 if ((c1 = (*i_getc)(f)) == EOF) {
5554 /* don't send bogus code
5560 } else if (c1 == '@'|| c1 == 'B') {
5562 set_input_mode(JIS_X_0208);
5565 } else if (c1 == 'D'){
5566 set_input_mode(JIS_X_0212);
5568 #endif /* X0212_ENABLE */
5569 } else if (c1 == 'O' || c1 == 'Q'){
5570 set_input_mode(JIS_X_0213_1);
5572 } else if (c1 == 'P'){
5573 set_input_mode(JIS_X_0213_2);
5576 /* could be some special code */
5583 } else if (broken_f&0x2) {
5584 /* accept any ESC-(-x as broken code ... */
5585 input_mode = JIS_X_0208;
5594 } else if (c1 == '(') {
5596 if ((c1 = (*i_getc)(f)) == EOF) {
5597 /* don't send bogus code
5599 (*oconv)(0, '('); */
5602 else if (c1 == 'I') {
5603 /* JIS X 0201 Katakana */
5604 set_input_mode(JIS_X_0201_1976_K);
5607 else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
5608 /* ISO-646IRV:1983 or JIS X 0201 Roman or JUNET */
5609 set_input_mode(ASCII);
5612 else if (broken_f&0x2) {
5613 set_input_mode(ASCII);
5622 else if (c1 == '.') {
5624 if ((c1 = (*i_getc)(f)) == EOF) {
5627 else if (c1 == 'A') {
5638 else if (c1 == 'N') {
5641 if (g2 == ISO_8859_1) {
5656 } else if (c1 == ESC && iconv == s_iconv) {
5657 /* ESC in Shift_JIS */
5658 if ((c1 = (*i_getc)(f)) == EOF) {
5659 /* (*oconv)(0, ESC); don't send bogus code */
5661 } else if (c1 == '$') {
5663 if ((c1 = (*i_getc)(f)) == EOF) {
5665 } else if (('E' <= c1 && c1 <= 'G') ||
5666 ('O' <= c1 && c1 <= 'Q')) {
5674 static const nkf_char jphone_emoji_first_table[7] =
5675 {0xE1E0, 0xDFE0, 0xE2E0, 0xE3E0, 0xE4E0, 0xDFE0, 0xE0E0};
5676 c3 = nkf_char_unicode_new(jphone_emoji_first_table[c1 % 7]);
5677 if ((c1 = (*i_getc)(f)) == EOF) LAST;
5678 while (SP <= c1 && c1 <= 'z') {
5679 (*oconv)(0, c1 + c3);
5680 if ((c1 = (*i_getc)(f)) == EOF) LAST;
5695 } else if (c1 == LF || c1 == CR) {
5697 input_mode = ASCII; set_iconv(FALSE, 0);
5699 } else if (mime_decode_f && !mime_decode_mode){
5701 if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
5709 } else { /* if (c1 == CR)*/
5710 if ((c1=(*i_getc)(f))!=EOF) {
5714 } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
5734 switch ((*iconv)(c2, c1, 0)) { /* can be EUC / SJIS / UTF-8 */
5737 if ((c3 = (*i_getc)(f)) != EOF) {
5740 if ((c4 = (*i_getc)(f)) != EOF) {
5742 (*iconv)(c2, c1, c3|c4);
5747 /* 3 bytes EUC or UTF-8 */
5748 if ((c3 = (*i_getc)(f)) != EOF) {
5750 (*iconv)(c2, c1, c3);
5758 0x7F <= c2 && c2 <= 0x92 &&
5759 0x21 <= c1 && c1 <= 0x7E) {
5761 c1 = nkf_char_unicode_new((c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000);
5764 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
5768 (*oconv)(PREFIX_EUCG3 | c2, c1);
5770 #endif /* X0212_ENABLE */
5772 (*oconv)(PREFIX_EUCG3 | c2, c1);
5775 (*oconv)(input_mode, c1); /* other special case */
5781 /* goto next_word */
5785 (*iconv)(EOF, 0, 0);
5786 if (!input_codename)
5789 struct input_code *p = input_code_list;
5790 struct input_code *result = p;
5792 if (p->score < result->score) result = p;
5795 set_input_codename(result->name);
5797 debug(result->name);
5805 * int options(unsigned char *cp)
5812 options(unsigned char *cp)
5816 unsigned char *cp_back = NULL;
5821 while(*cp && *cp++!='-');
5822 while (*cp || cp_back) {
5830 case '-': /* literal options */
5831 if (!*cp || *cp == SP) { /* ignore the rest of arguments */
5835 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
5836 p = (unsigned char *)long_option[i].name;
5837 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
5838 if (*p == cp[j] || cp[j] == SP){
5845 #if !defined(PERL_XS) && !defined(WIN32DLL)
5846 fprintf(stderr, "unknown long option: --%s\n", cp);
5850 while(*cp && *cp != SP && cp++);
5851 if (long_option[i].alias[0]){
5853 cp = (unsigned char *)long_option[i].alias;
5855 if (strcmp(long_option[i].name, "ic=") == 0){
5856 enc = nkf_enc_find((char *)p);
5858 input_encoding = enc;
5861 if (strcmp(long_option[i].name, "oc=") == 0){
5862 enc = nkf_enc_find((char *)p);
5863 /* if (enc <= 0) continue; */
5865 output_encoding = enc;
5868 if (strcmp(long_option[i].name, "guess=") == 0){
5869 if (p[0] == '0' || p[0] == '1') {
5877 if (strcmp(long_option[i].name, "overwrite") == 0){
5880 preserve_time_f = TRUE;
5883 if (strcmp(long_option[i].name, "overwrite=") == 0){
5886 preserve_time_f = TRUE;
5888 backup_suffix = nkf_malloc(strlen((char *) p) + 1);
5889 strcpy(backup_suffix, (char *) p);
5892 if (strcmp(long_option[i].name, "in-place") == 0){
5895 preserve_time_f = FALSE;
5898 if (strcmp(long_option[i].name, "in-place=") == 0){
5901 preserve_time_f = FALSE;
5903 backup_suffix = nkf_malloc(strlen((char *) p) + 1);
5904 strcpy(backup_suffix, (char *) p);
5909 if (strcmp(long_option[i].name, "cap-input") == 0){
5913 if (strcmp(long_option[i].name, "url-input") == 0){
5918 #ifdef NUMCHAR_OPTION
5919 if (strcmp(long_option[i].name, "numchar-input") == 0){
5925 if (strcmp(long_option[i].name, "no-output") == 0){
5929 if (strcmp(long_option[i].name, "debug") == 0){
5934 if (strcmp(long_option[i].name, "cp932") == 0){
5935 #ifdef SHIFTJIS_CP932
5939 #ifdef UTF8_OUTPUT_ENABLE
5940 ms_ucs_map_f = UCS_MAP_CP932;
5944 if (strcmp(long_option[i].name, "no-cp932") == 0){
5945 #ifdef SHIFTJIS_CP932
5949 #ifdef UTF8_OUTPUT_ENABLE
5950 ms_ucs_map_f = UCS_MAP_ASCII;
5954 #ifdef SHIFTJIS_CP932
5955 if (strcmp(long_option[i].name, "cp932inv") == 0){
5962 if (strcmp(long_option[i].name, "x0212") == 0){
5969 if (strcmp(long_option[i].name, "exec-in") == 0){
5973 if (strcmp(long_option[i].name, "exec-out") == 0){
5978 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
5979 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
5980 no_cp932ext_f = TRUE;
5983 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
5984 no_best_fit_chars_f = TRUE;
5987 if (strcmp(long_option[i].name, "fb-skip") == 0){
5988 encode_fallback = NULL;
5991 if (strcmp(long_option[i].name, "fb-html") == 0){
5992 encode_fallback = encode_fallback_html;
5995 if (strcmp(long_option[i].name, "fb-xml") == 0){
5996 encode_fallback = encode_fallback_xml;
5999 if (strcmp(long_option[i].name, "fb-java") == 0){
6000 encode_fallback = encode_fallback_java;
6003 if (strcmp(long_option[i].name, "fb-perl") == 0){
6004 encode_fallback = encode_fallback_perl;
6007 if (strcmp(long_option[i].name, "fb-subchar") == 0){
6008 encode_fallback = encode_fallback_subchar;
6011 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
6012 encode_fallback = encode_fallback_subchar;
6013 unicode_subchar = 0;
6015 /* decimal number */
6016 for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
6017 unicode_subchar *= 10;
6018 unicode_subchar += hex2bin(p[i]);
6020 }else if(p[1] == 'x' || p[1] == 'X'){
6021 /* hexadecimal number */
6022 for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
6023 unicode_subchar <<= 4;
6024 unicode_subchar |= hex2bin(p[i]);
6028 for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
6029 unicode_subchar *= 8;
6030 unicode_subchar += hex2bin(p[i]);
6033 w16e_conv(unicode_subchar, &i, &j);
6034 unicode_subchar = i<<8 | j;
6038 #ifdef UTF8_OUTPUT_ENABLE
6039 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
6040 ms_ucs_map_f = UCS_MAP_MS;
6044 #ifdef UNICODE_NORMALIZATION
6045 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
6050 if (strcmp(long_option[i].name, "prefix=") == 0){
6051 if (nkf_isgraph(p[0])){
6052 for (i = 1; nkf_isgraph(p[i]); i++){
6053 prefix_table[p[i]] = p[0];
6058 #if !defined(PERL_XS) && !defined(WIN32DLL)
6059 fprintf(stderr, "unsupported long option: --%s\n", long_option[i].name);
6064 case 'b': /* buffered mode */
6067 case 'u': /* non bufferd mode */
6070 case 't': /* transparent mode */
6075 } else if (*cp=='2') {
6079 * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
6087 case 'j': /* JIS output */
6089 output_encoding = nkf_enc_from_index(ISO_2022_JP);
6091 case 'e': /* AT&T EUC output */
6092 output_encoding = nkf_enc_from_index(EUCJP_NKF);
6094 case 's': /* SJIS output */
6095 output_encoding = nkf_enc_from_index(WINDOWS_31J);
6097 case 'l': /* ISO8859 Latin-1 support, no conversion */
6098 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
6099 input_encoding = nkf_enc_from_index(ISO_8859_1);
6101 case 'i': /* Kanji IN ESC-$-@/B */
6102 if (*cp=='@'||*cp=='B')
6103 kanji_intro = *cp++;
6105 case 'o': /* ASCII IN ESC-(-J/B */
6106 if (*cp=='J'||*cp=='B'||*cp=='H')
6107 ascii_intro = *cp++;
6111 bit:1 katakana->hiragana
6112 bit:2 hiragana->katakana
6114 if ('9'>= *cp && *cp>='0')
6115 hira_f |= (*cp++ -'0');
6122 #if defined(MSDOS) || defined(__OS2__)
6129 show_configuration();
6137 #ifdef UTF8_OUTPUT_ENABLE
6138 case 'w': /* UTF-8 output */
6143 output_encoding = nkf_enc_from_index(UTF_8N);
6145 output_bom_f = TRUE;
6146 output_encoding = nkf_enc_from_index(UTF_8_BOM);
6150 if ('1'== cp[0] && '6'==cp[1]) {
6153 } else if ('3'== cp[0] && '2'==cp[1]) {
6157 output_encoding = nkf_enc_from_index(UTF_8);
6162 output_endian = ENDIAN_LITTLE;
6163 } else if (cp[0] == 'B') {
6166 output_encoding = nkf_enc_from_index(enc_idx);
6171 enc_idx = enc_idx == UTF_16
6172 ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
6173 : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
6175 output_bom_f = TRUE;
6176 enc_idx = enc_idx == UTF_16
6177 ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
6178 : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
6180 output_encoding = nkf_enc_from_index(enc_idx);
6184 #ifdef UTF8_INPUT_ENABLE
6185 case 'W': /* UTF input */
6188 input_encoding = nkf_enc_from_index(UTF_8);
6191 if ('1'== cp[0] && '6'==cp[1]) {
6193 input_endian = ENDIAN_BIG;
6195 } else if ('3'== cp[0] && '2'==cp[1]) {
6197 input_endian = ENDIAN_BIG;
6200 input_encoding = nkf_enc_from_index(UTF_8);
6205 input_endian = ENDIAN_LITTLE;
6206 } else if (cp[0] == 'B') {
6208 input_endian = ENDIAN_BIG;
6210 enc_idx = (enc_idx == UTF_16
6211 ? (input_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
6212 : (input_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE));
6213 input_encoding = nkf_enc_from_index(enc_idx);
6217 /* Input code assumption */
6218 case 'J': /* ISO-2022-JP input */
6219 input_encoding = nkf_enc_from_index(ISO_2022_JP);
6221 case 'E': /* EUC-JP input */
6222 input_encoding = nkf_enc_from_index(EUCJP_NKF);
6224 case 'S': /* Windows-31J input */
6225 input_encoding = nkf_enc_from_index(WINDOWS_31J);
6227 case 'Z': /* Convert X0208 alphabet to asii */
6229 bit:0 Convert JIS X 0208 Alphabet to ASCII
6230 bit:1 Convert Kankaku to one space
6231 bit:2 Convert Kankaku to two spaces
6232 bit:3 Convert HTML Entity
6233 bit:4 Convert JIS X 0208 Katakana to JIS X 0201 Katakana
6235 while ('0'<= *cp && *cp <='9') {
6236 alpha_f |= 1 << (*cp++ - '0');
6238 if (!alpha_f) alpha_f = 1;
6240 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
6241 x0201_f = FALSE; /* No X0201->X0208 conversion */
6243 ESC-(-I in JIS, EUC, MS Kanji
6244 SI/SO in JIS, EUC, MS Kanji
6245 SS2 in EUC, JIS, not in MS Kanji
6246 MS Kanji (0xa0-0xdf)
6248 ESC-(-I in JIS (0x20-0x5f)
6249 SS2 in EUC (0xa0-0xdf)
6250 0xa0-0xd in MS Kanji (0xa0-0xdf)
6253 case 'X': /* Convert X0201 kana to X0208 */
6256 case 'F': /* prserve new lines */
6257 fold_preserve_f = TRUE;
6258 case 'f': /* folding -f60 or -f */
6261 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
6263 fold_len += *cp++ - '0';
6265 if (!(0<fold_len && fold_len<BUFSIZ))
6266 fold_len = DEFAULT_FOLD;
6270 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
6272 fold_margin += *cp++ - '0';
6276 case 'm': /* MIME support */
6277 /* mime_decode_f = TRUE; */ /* this has too large side effects... */
6278 if (*cp=='B'||*cp=='Q') {
6279 mime_decode_mode = *cp++;
6280 mimebuf_f = FIXED_MIME;
6281 } else if (*cp=='N') {
6282 mime_f = TRUE; cp++;
6283 } else if (*cp=='S') {
6284 mime_f = STRICT_MIME; cp++;
6285 } else if (*cp=='0') {
6286 mime_decode_f = FALSE;
6287 mime_f = FALSE; cp++;
6289 mime_f = STRICT_MIME;
6292 case 'M': /* MIME output */
6295 mimeout_f = FIXED_MIME; cp++;
6296 } else if (*cp=='Q') {
6298 mimeout_f = FIXED_MIME; cp++;
6303 case 'B': /* Broken JIS support */
6305 bit:1 allow any x on ESC-(-x or ESC-$-x
6306 bit:2 reset to ascii on NL
6308 if ('9'>= *cp && *cp>='0')
6309 broken_f |= 1<<(*cp++ -'0');
6314 case 'O':/* for Output file */
6318 case 'c':/* add cr code */
6321 case 'd':/* delete cr code */
6324 case 'I': /* ISO-2022-JP output */
6327 case 'L': /* line mode */
6328 if (*cp=='u') { /* unix */
6329 eolmode_f = LF; cp++;
6330 } else if (*cp=='m') { /* mac */
6331 eolmode_f = CR; cp++;
6332 } else if (*cp=='w') { /* windows */
6333 eolmode_f = CRLF; cp++;
6334 } else if (*cp=='0') { /* no conversion */
6335 eolmode_f = 0; cp++;
6340 if ('2' <= *cp && *cp <= '9') {
6343 } else if (*cp == '0' || *cp == '1') {
6352 /* module muliple options in a string are allowed for Perl moudle */
6353 while(*cp && *cp++!='-');
6356 #if !defined(PERL_XS) && !defined(WIN32DLL)
6357 fprintf(stderr, "unknown option: -%c\n", *(cp-1));
6359 /* bogus option but ignored */
6367 #include "nkf32dll.c"
6368 #elif defined(PERL_XS)
6369 #else /* WIN32DLL */
6371 main(int argc, char **argv)
6376 char *outfname = NULL;
6379 #ifdef EASYWIN /*Easy Win */
6380 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
6382 #ifdef DEFAULT_CODE_LOCALE
6383 setlocale(LC_CTYPE, "");
6385 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
6386 cp = (unsigned char *)*argv;
6391 if (pipe(fds) < 0 || (pid = fork()) < 0){
6402 execvp(argv[1], &argv[1]);
6419 int debug_f_back = debug_f;
6422 int exec_f_back = exec_f;
6425 int x0212_f_back = x0212_f;
6427 int x0213_f_back = x0213_f;
6428 int guess_f_back = guess_f;
6430 guess_f = guess_f_back;
6433 debug_f = debug_f_back;
6436 exec_f = exec_f_back;
6438 x0212_f = x0212_f_back;
6439 x0213_f = x0213_f_back;
6442 if (binmode_f == TRUE)
6443 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6444 if (freopen("","wb",stdout) == NULL)
6451 setbuf(stdout, (char *) NULL);
6453 setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
6456 if (binmode_f == TRUE)
6457 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6458 if (freopen("","rb",stdin) == NULL) return (-1);
6462 setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
6466 kanji_convert(stdin);
6467 if (guess_f) print_guessed_code(NULL);
6471 int is_argument_error = FALSE;
6473 input_codename = NULL;
6476 iconv_for_check = 0;
6478 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
6480 is_argument_error = TRUE;
6488 /* reopen file for stdout */
6489 if (file_out_f == TRUE) {
6492 outfname = nkf_malloc(strlen(origfname)
6493 + strlen(".nkftmpXXXXXX")
6495 strcpy(outfname, origfname);
6499 for (i = strlen(outfname); i; --i){
6500 if (outfname[i - 1] == '/'
6501 || outfname[i - 1] == '\\'){
6507 strcat(outfname, "ntXXXXXX");
6509 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
6510 S_IREAD | S_IWRITE);
6512 strcat(outfname, ".nkftmpXXXXXX");
6513 fd = mkstemp(outfname);
6516 || (fd_backup = dup(fileno(stdout))) < 0
6517 || dup2(fd, fileno(stdout)) < 0
6528 outfname = "nkf.out";
6531 if(freopen(outfname, "w", stdout) == NULL) {
6535 if (binmode_f == TRUE) {
6536 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6537 if (freopen("","wb",stdout) == NULL)
6544 if (binmode_f == TRUE)
6545 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6546 if (freopen("","rb",fin) == NULL)
6551 setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
6555 char *filename = NULL;
6557 if (nfiles > 1) filename = origfname;
6558 if (guess_f) print_guessed_code(filename);
6564 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
6572 if (dup2(fd_backup, fileno(stdout)) < 0){
6575 if (stat(origfname, &sb)) {
6576 fprintf(stderr, "Can't stat %s\n", origfname);
6578 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
6579 if (chmod(outfname, sb.st_mode)) {
6580 fprintf(stderr, "Can't set permission %s\n", outfname);
6583 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
6584 if(preserve_time_f){
6585 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
6586 tb[0] = tb[1] = sb.st_mtime;
6587 if (utime(outfname, tb)) {
6588 fprintf(stderr, "Can't set timestamp %s\n", outfname);
6591 tb.actime = sb.st_atime;
6592 tb.modtime = sb.st_mtime;
6593 if (utime(outfname, &tb)) {
6594 fprintf(stderr, "Can't set timestamp %s\n", outfname);
6599 char *backup_filename = get_backup_filename(backup_suffix, origfname);
6601 unlink(backup_filename);
6603 if (rename(origfname, backup_filename)) {
6604 perror(backup_filename);
6605 fprintf(stderr, "Can't rename %s to %s\n",
6606 origfname, backup_filename);
6608 nkf_free(backup_filename);
6611 if (unlink(origfname)){
6616 if (rename(outfname, origfname)) {
6618 fprintf(stderr, "Can't rename %s to %s\n",
6619 outfname, origfname);
6626 if (is_argument_error)
6629 #ifdef EASYWIN /*Easy Win */
6630 if (file_out_f == FALSE)
6631 scanf("%d",&end_check);
6634 #else /* for Other OS */
6635 if (file_out_f == TRUE)
6637 #endif /*Easy Win */
6640 #endif /* WIN32DLL */