1 /** Network Kanji Filter. (PDS Version)
2 ************************************************************************
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
4 **
\e$BO"Mm@h!'
\e(B
\e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j
\e(B
5 **
\e$B!J
\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp
\e$B!K
\e(B
6 ** Copyright (C) 1996,1998
8 **
\e$BO"Mm@h!'
\e(B
\e$BN05eBg3X>pJs9)3X2J
\e(B
\e$B2OLn
\e(B
\e$B??<#
\e(B mime/X0208 support
9 **
\e$B!J
\e(BE-Mail Address: kono@ie.u-ryukyu.ac.jp
\e$B!K
\e(B
10 **
\e$BO"Mm@h!'
\e(B COW for DOS & Win16 & Win32 & OS/2
11 **
\e$B!J
\e(BE-Mail Address: GHG00637@niftyserve.or.p
\e$B!K
\e(B
13 **
\e$B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"
\e(B
14 **
\e$B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#
\e(B
15 **
\e$B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#
\e(B
16 **
\e$B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#
\e(B
17 **
\e$B%P%$%J%j$NG[I[$N:]$K$O
\e(Bversion message
\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#
\e(B
18 **
\e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#
\e(B
20 ** Everyone is permitted to do anything on this program
21 ** including copying, modifying, improving,
22 ** as long as you don't try to pretend that you wrote it.
23 ** i.e., the above copyright notice has to appear in all copies.
24 ** Binary distribution requires original version messages.
25 ** You don't have to ask before copying, redistribution or publishing.
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
27 ***********************************************************************/
29 /***********************************************************************
30 ** UTF-8
\e$B%5%]!<%H$K$D$$$F
\e(B
31 **
\e$B=>Mh$N
\e(B nkf
\e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9
\e(B
32 ** nkf -e
\e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G
\e(B UTF-8
\e$B$HH=Dj$5$l$l$P!"
\e(B
33 **
\e$B$=$N$^$^
\e(B euc-jp
\e$B$KJQ49$5$l$^$9
\e(B
35 **
\e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#
\e(B
36 ** (
\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O
\e(B)
38 **
\e$B2?$+LdBj$r8+$D$1$?$i!"
\e(B
39 ** E-Mail: furukawa@tcp-ip.or.jp
40 **
\e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#
\e(B
41 ***********************************************************************/
42 /* $Id: nkf.c,v 1.102 2006/06/12 16:34:42 naruse Exp $ */
43 #define NKF_VERSION "2.0.7"
44 #define NKF_RELEASE_DATE "2006-06-13"
49 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
50 "Copyright (C) 2002-2006 Kono, Furukawa, Naruse, mastodon"
57 ** USAGE: nkf [flags] [file]
60 ** b Output is buffered (DEFAULT)
61 ** u Output is unbuffered
65 ** j Outout code is JIS 7 bit (DEFAULT SELECT)
66 ** s Output code is MS Kanji (DEFAULT SELECT)
67 ** e Output code is AT&T JIS (DEFAULT SELECT)
68 ** w Output code is AT&T JIS (DEFAULT SELECT)
69 ** l Output code is JIS 7bit and ISO8859-1 Latin-1
71 ** m MIME conversion for ISO-2022-JP
72 ** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
73 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
74 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
75 ** M MIME output conversion
77 ** r {de/en}crypt ROT13/47
81 ** T Text mode output (for MS-DOS)
83 ** x Do not convert X0201 kana into X0208
84 ** Z Convert X0208 alphabet to ASCII
89 ** B try to fix broken JIS, missing Escape
90 ** B[1-9] broken level
92 ** O Output to 'nkf.out' file or last file name
93 ** d Delete \r in line feed
94 ** c Add \r in line feed
95 ** -- other long option
96 ** -- ignore following option (don't use with -O )
100 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
102 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
118 #if defined(MSDOS) || defined(__OS2__)
121 #if defined(_MSC_VER) || defined(__WATCOMC__)
122 #define mktemp _mktemp
128 #define setbinmode(fp) fsetbin(fp)
129 #elif defined(__DJGPP__)
130 #include <libc/dosio.h>
131 #define setbinmode(fp) djgpp_setbinmode(fp)
132 #else /* Microsoft C, Turbo C */
133 #define setbinmode(fp) setmode(fileno(fp), O_BINARY)
136 #define setbinmode(fp)
139 #if defined(__DJGPP__)
140 void djgpp_setbinmode(FILE *fp)
142 /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
145 m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
146 __file_handle_set(fd, m);
150 #ifdef _IOFBF /* SysV and MSDOS, Windows */
151 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
153 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
156 /*Borland C++ 4.5 EasyWin*/
157 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
166 /* added by satoru@isoternet.org */
168 #include <sys/types.h>
170 #include <sys/stat.h>
171 #if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
173 #if defined(__WATCOMC__)
174 #include <sys/utime.h>
178 #else /* defined(MSDOS) */
180 #ifdef __BORLANDC__ /* BCC32 */
182 #else /* !defined(__BORLANDC__) */
183 #include <sys/utime.h>
184 #endif /* (__BORLANDC__) */
185 #else /* !defined(__WIN32__) */
186 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__) /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
187 #include <sys/utime.h>
188 #elif defined(__TURBOC__) /* BCC */
190 #elif defined(LSI_C) /* LSI C */
191 #endif /* (__WIN32__) */
199 /* state of output_mode and input_mode
216 #define X0213_1 0x284F
217 #define X0213_2 0x2850
219 /* Input Assumption */
223 #define LATIN1_INPUT 6
225 #define STRICT_MIME 8
230 #define JAPANESE_EUC 10
234 #define UTF8_INPUT 13
235 #define UTF16BE_INPUT 14
236 #define UTF16LE_INPUT 15
256 #define is_alnum(c) \
257 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
259 /* I don't trust portablity of toupper */
260 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
261 #define nkf_isoctal(c) ('0'<=c && c<='7')
262 #define nkf_isdigit(c) ('0'<=c && c<='9')
263 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
264 #define nkf_isblank(c) (c == SPACE || c == TAB)
265 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == NL)
266 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
267 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
268 #define nkf_isprint(c) (' '<=c && c<='~')
269 #define nkf_isgraph(c) ('!'<=c && c<='~')
270 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
271 ('A'<=c&&c<='F') ? (c-'A'+10) : \
272 ('a'<=c&&c<='f') ? (c-'a'+10) : 0 )
273 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
275 #define CP932_TABLE_BEGIN 0xFA
276 #define CP932_TABLE_END 0xFC
277 #define CP932INV_TABLE_BEGIN 0xED
278 #define CP932INV_TABLE_END 0xEE
279 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
281 #define HOLD_SIZE 1024
282 #if defined(INT_IS_SHORT)
283 #define IOBUF_SIZE 2048
285 #define IOBUF_SIZE 16384
288 #define DEFAULT_J 'B'
289 #define DEFAULT_R 'B'
291 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
292 #define SJ6394 0x0161 /* 63 - 94 ku offset */
294 #define RANGE_NUM_MAX 18
299 #if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
300 #define sizeof_euc_to_utf8_1byte 94
301 #define sizeof_euc_to_utf8_2bytes 94
302 #define sizeof_utf8_to_euc_C2 64
303 #define sizeof_utf8_to_euc_E5B8 64
304 #define sizeof_utf8_to_euc_2bytes 112
305 #define sizeof_utf8_to_euc_3bytes 16
308 /* MIME preprocessor */
310 #ifdef EASYWIN /*Easy Win */
311 extern POINT _BufferSize;
320 void (*status_func)(struct input_code *, nkf_char);
321 nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
325 static char *input_codename = "";
328 static const char *CopyRight = COPY_RIGHT;
330 #if !defined(PERL_XS) && !defined(WIN32DLL)
331 static nkf_char noconvert(FILE *f);
333 static void module_connection(void);
334 static nkf_char kanji_convert(FILE *f);
335 static nkf_char h_conv(FILE *f,nkf_char c2,nkf_char c1);
336 static nkf_char push_hold_buf(nkf_char c2);
337 static void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0));
338 static nkf_char s_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
339 static nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
340 static nkf_char e_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
341 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
343 * 0: Shift_JIS, eucJP-ascii
347 #define UCS_MAP_ASCII 0
349 #define UCS_MAP_CP932 2
350 static int ms_ucs_map_f = UCS_MAP_ASCII;
352 #ifdef UTF8_INPUT_ENABLE
353 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
354 static int no_cp932ext_f = FALSE;
355 /* ignore ZERO WIDTH NO-BREAK SPACE */
356 static int ignore_zwnbsp_f = TRUE;
357 static int no_best_fit_chars_f = FALSE;
358 static nkf_char unicode_subchar = '?'; /* the regular substitution character */
359 static void nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c);
360 static void encode_fallback_html(nkf_char c);
361 static void encode_fallback_xml(nkf_char c);
362 static void encode_fallback_java(nkf_char c);
363 static void encode_fallback_perl(nkf_char c);
364 static void encode_fallback_subchar(nkf_char c);
365 static void (*encode_fallback)(nkf_char c) = NULL;
366 static nkf_char w2e_conv(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
367 static nkf_char w_iconv(nkf_char c2,nkf_char c1,nkf_char c0);
368 static nkf_char w_iconv16(nkf_char c2,nkf_char c1,nkf_char c0);
369 static nkf_char unicode_to_jis_common(nkf_char c2,nkf_char c1,nkf_char c0,nkf_char *p2,nkf_char *p1);
370 static nkf_char w_iconv_common(nkf_char c1,nkf_char c0,const unsigned short *const *pp,nkf_char psize,nkf_char *p2,nkf_char *p1);
371 static void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0);
372 static nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0);
373 static nkf_char w16e_conv(nkf_char val,nkf_char *p2,nkf_char *p1);
375 #ifdef UTF8_OUTPUT_ENABLE
376 static int unicode_bom_f= 0; /* Output Unicode BOM */
377 static int w_oconv16_LE = 0; /* utf-16 little endian */
378 static nkf_char e2w_conv(nkf_char c2,nkf_char c1);
379 static void w_oconv(nkf_char c2,nkf_char c1);
380 static void w_oconv16(nkf_char c2,nkf_char c1);
382 static void e_oconv(nkf_char c2,nkf_char c1);
383 static nkf_char e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1);
384 static void s_oconv(nkf_char c2,nkf_char c1);
385 static void j_oconv(nkf_char c2,nkf_char c1);
386 static void fold_conv(nkf_char c2,nkf_char c1);
387 static void cr_conv(nkf_char c2,nkf_char c1);
388 static void z_conv(nkf_char c2,nkf_char c1);
389 static void rot_conv(nkf_char c2,nkf_char c1);
390 static void hira_conv(nkf_char c2,nkf_char c1);
391 static void base64_conv(nkf_char c2,nkf_char c1);
392 static void iso2022jp_check_conv(nkf_char c2,nkf_char c1);
393 static void no_connection(nkf_char c2,nkf_char c1);
394 static nkf_char no_connection2(nkf_char c2,nkf_char c1,nkf_char c0);
396 static void code_score(struct input_code *ptr);
397 static void code_status(nkf_char c);
399 static void std_putc(nkf_char c);
400 static nkf_char std_getc(FILE *f);
401 static nkf_char std_ungetc(nkf_char c,FILE *f);
403 static nkf_char broken_getc(FILE *f);
404 static nkf_char broken_ungetc(nkf_char c,FILE *f);
406 static nkf_char mime_begin(FILE *f);
407 static nkf_char mime_getc(FILE *f);
408 static nkf_char mime_ungetc(nkf_char c,FILE *f);
410 static void switch_mime_getc(void);
411 static void unswitch_mime_getc(void);
412 static nkf_char mime_begin_strict(FILE *f);
413 static nkf_char mime_getc_buf(FILE *f);
414 static nkf_char mime_ungetc_buf(nkf_char c,FILE *f);
415 static nkf_char mime_integrity(FILE *f,const unsigned char *p);
417 static nkf_char base64decode(nkf_char c);
418 static void mime_prechar(nkf_char c2, nkf_char c1);
419 static void mime_putc(nkf_char c);
420 static void open_mime(nkf_char c);
421 static void close_mime(void);
422 static void eof_mime(void);
423 static void mimeout_addchar(nkf_char c);
425 static void usage(void);
426 static void version(void);
428 static void options(unsigned char *c);
429 #if defined(PERL_XS) || defined(WIN32DLL)
430 static void reinit(void);
435 #if !defined(PERL_XS) && !defined(WIN32DLL)
436 static unsigned char stdibuf[IOBUF_SIZE];
437 static unsigned char stdobuf[IOBUF_SIZE];
439 static unsigned char hold_buf[HOLD_SIZE*2];
440 static int hold_count;
442 /* MIME preprocessor fifo */
444 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
445 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
446 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
447 static unsigned char mime_buf[MIME_BUF_SIZE];
448 static unsigned int mime_top = 0;
449 static unsigned int mime_last = 0; /* decoded */
450 static unsigned int mime_input = 0; /* undecoded */
451 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
454 static int unbuf_f = FALSE;
455 static int estab_f = FALSE;
456 static int nop_f = FALSE;
457 static int binmode_f = TRUE; /* binary mode */
458 static int rot_f = FALSE; /* rot14/43 mode */
459 static int hira_f = FALSE; /* hira/kata henkan */
460 static int input_f = FALSE; /* non fixed input code */
461 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
462 static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
463 static int mime_decode_f = FALSE; /* mime decode is explicitly on */
464 static int mimebuf_f = FALSE; /* MIME buffered input */
465 static int broken_f = FALSE; /* convert ESC-less broken JIS */
466 static int iso8859_f = FALSE; /* ISO8859 through */
467 static int mimeout_f = FALSE; /* base64 mode */
468 #if defined(MSDOS) || defined(__OS2__)
469 static int x0201_f = TRUE; /* Assume JISX0201 kana */
471 static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
473 static int iso2022jp_f = FALSE; /* convert ISO-2022-JP */
475 #ifdef UNICODE_NORMALIZATION
476 static int nfc_f = FALSE;
477 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
478 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
479 static nkf_char nfc_getc(FILE *f);
480 static nkf_char nfc_ungetc(nkf_char c,FILE *f);
484 static int cap_f = FALSE;
485 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
486 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
487 static nkf_char cap_getc(FILE *f);
488 static nkf_char cap_ungetc(nkf_char c,FILE *f);
490 static int url_f = FALSE;
491 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
492 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
493 static nkf_char url_getc(FILE *f);
494 static nkf_char url_ungetc(nkf_char c,FILE *f);
497 #if defined(INT_IS_SHORT)
498 #define NKF_INT32_C(n) (n##L)
500 #define NKF_INT32_C(n) (n)
502 #define PREFIX_EUCG3 NKF_INT32_C(0x8F00)
503 #define CLASS_MASK NKF_INT32_C(0xFF000000)
504 #define CLASS_UTF16 NKF_INT32_C(0x01000000)
505 #define VALUE_MASK NKF_INT32_C(0x00FFFFFF)
506 #define UNICODE_MAX NKF_INT32_C(0x0010FFFF)
507 #define is_unicode_capsule(c) ((c & CLASS_MASK) == CLASS_UTF16)
508 #define is_unicode_bmp(c) ((c & VALUE_MASK) <= NKF_INT32_C(0xFFFF))
510 #ifdef NUMCHAR_OPTION
511 static int numchar_f = FALSE;
512 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
513 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
514 static nkf_char numchar_getc(FILE *f);
515 static nkf_char numchar_ungetc(nkf_char c,FILE *f);
519 static int noout_f = FALSE;
520 static void no_putc(nkf_char c);
521 static nkf_char debug_f = FALSE;
522 static void debug(const char *str);
523 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
526 static int guess_f = FALSE;
528 static void print_guessed_code(char *filename);
530 static void set_input_codename(char *codename);
531 static int is_inputcode_mixed = FALSE;
532 static int is_inputcode_set = FALSE;
535 static int exec_f = 0;
538 #ifdef SHIFTJIS_CP932
539 /* invert IBM extended characters to others */
540 static int cp51932_f = TRUE;
542 /* invert NEC-selected IBM extended characters to IBM extended characters */
543 static int cp932inv_f = TRUE;
545 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
546 #endif /* SHIFTJIS_CP932 */
549 static int x0212_f = FALSE;
550 static nkf_char x0212_shift(nkf_char c);
551 static nkf_char x0212_unshift(nkf_char c);
553 static int x0213_f = FALSE;
555 static unsigned char prefix_table[256];
557 static void set_code_score(struct input_code *ptr, nkf_char score);
558 static void clr_code_score(struct input_code *ptr, nkf_char score);
559 static void status_disable(struct input_code *ptr);
560 static void status_push_ch(struct input_code *ptr, nkf_char c);
561 static void status_clear(struct input_code *ptr);
562 static void status_reset(struct input_code *ptr);
563 static void status_reinit(struct input_code *ptr);
564 static void status_check(struct input_code *ptr, nkf_char c);
565 static void e_status(struct input_code *, nkf_char);
566 static void s_status(struct input_code *, nkf_char);
568 #ifdef UTF8_INPUT_ENABLE
569 static void w_status(struct input_code *, nkf_char);
570 static void w16_status(struct input_code *, nkf_char);
571 static int utf16_mode = UTF16BE_INPUT;
574 struct input_code input_code_list[] = {
575 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
576 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
577 #ifdef UTF8_INPUT_ENABLE
578 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
579 {"UTF-16", 0, 0, 0, {0, 0, 0}, w16_status, w_iconv16, 0},
584 static int mimeout_mode = 0;
585 static int base64_count = 0;
587 /* X0208 -> ASCII converter */
590 static int f_line = 0; /* chars in line */
591 static int f_prev = 0;
592 static int fold_preserve_f = FALSE; /* preserve new lines */
593 static int fold_f = FALSE;
594 static int fold_len = 0;
597 static unsigned char kanji_intro = DEFAULT_J;
598 static unsigned char ascii_intro = DEFAULT_R;
602 #define FOLD_MARGIN 10
603 #define DEFAULT_FOLD 60
605 static int fold_margin = FOLD_MARGIN;
609 #ifdef DEFAULT_CODE_JIS
610 # define DEFAULT_CONV j_oconv
612 #ifdef DEFAULT_CODE_SJIS
613 # define DEFAULT_CONV s_oconv
615 #ifdef DEFAULT_CODE_EUC
616 # define DEFAULT_CONV e_oconv
618 #ifdef DEFAULT_CODE_UTF8
619 # define DEFAULT_CONV w_oconv
622 /* process default */
623 static void (*output_conv)(nkf_char c2,nkf_char c1) = DEFAULT_CONV;
625 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
626 /* s_iconv or oconv */
627 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
629 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
630 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
631 static void (*o_crconv)(nkf_char c2,nkf_char c1) = no_connection;
632 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
633 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
634 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
635 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
637 /* static redirections */
639 static void (*o_putc)(nkf_char c) = std_putc;
641 static nkf_char (*i_getc)(FILE *f) = std_getc; /* general input */
642 static nkf_char (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
644 static nkf_char (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
645 static nkf_char (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
647 static void (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
649 static nkf_char (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
650 static nkf_char (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
652 /* for strict mime */
653 static nkf_char (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
654 static nkf_char (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
657 static int output_mode = ASCII, /* output kanji mode */
658 input_mode = ASCII, /* input kanji mode */
659 shift_mode = FALSE; /* TRUE shift out, or X0201 */
660 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
662 /* X0201 / X0208 conversion tables */
664 /* X0201 kana conversion table */
667 unsigned char cv[]= {
668 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
669 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
670 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
671 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
672 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
673 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
674 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
675 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
676 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
677 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
678 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
679 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
680 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
681 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
682 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
683 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
687 /* X0201 kana conversion table for daguten */
690 unsigned char dv[]= {
691 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
692 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
693 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
694 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
695 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
696 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
697 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
698 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
699 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
700 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
701 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
702 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
703 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
704 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
705 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
706 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
709 /* X0201 kana conversion table for han-daguten */
712 unsigned char ev[]= {
713 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
714 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
715 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
716 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
717 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
718 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
719 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
720 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
721 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
722 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
723 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
724 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
725 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
726 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
727 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
728 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
732 /* X0208 kigou conversion table */
733 /* 0x8140 - 0x819e */
735 unsigned char fv[] = {
737 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
738 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
739 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
740 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
741 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
742 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
743 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
744 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
745 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
746 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
747 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
748 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
754 static int file_out_f = FALSE;
756 static int overwrite_f = FALSE;
757 static int preserve_time_f = FALSE;
758 static int backup_f = FALSE;
759 static char *backup_suffix = "";
760 static char *get_backup_filename(const char *suffix, const char *filename);
763 static int crmode_f = 0; /* CR, NL, CRLF */
764 #ifdef EASYWIN /*Easy Win */
765 static int end_check;
768 #define STD_GC_BUFSIZE (256)
769 nkf_char std_gc_buf[STD_GC_BUFSIZE];
773 #include "nkf32dll.c"
774 #elif defined(PERL_XS)
776 int main(int argc, char **argv)
781 char *outfname = NULL;
784 #ifdef EASYWIN /*Easy Win */
785 _BufferSize.y = 400;/*Set Scroll Buffer Size*/
788 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
789 cp = (unsigned char *)*argv;
794 if (pipe(fds) < 0 || (pid = fork()) < 0){
805 execvp(argv[1], &argv[1]);
819 if(x0201_f == WISH_TRUE)
820 x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201);
822 if (binmode_f == TRUE)
823 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
824 if (freopen("","wb",stdout) == NULL)
831 setbuf(stdout, (char *) NULL);
833 setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
836 if (binmode_f == TRUE)
837 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
838 if (freopen("","rb",stdin) == NULL) return (-1);
842 setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
846 kanji_convert(stdin);
847 if (guess_f) print_guessed_code(NULL);
852 is_inputcode_mixed = FALSE;
853 is_inputcode_set = FALSE;
858 if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
867 /* reopen file for stdout */
868 if (file_out_f == TRUE) {
871 outfname = malloc(strlen(origfname)
872 + strlen(".nkftmpXXXXXX")
878 strcpy(outfname, origfname);
882 for (i = strlen(outfname); i; --i){
883 if (outfname[i - 1] == '/'
884 || outfname[i - 1] == '\\'){
890 strcat(outfname, "ntXXXXXX");
892 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
895 strcat(outfname, ".nkftmpXXXXXX");
896 fd = mkstemp(outfname);
899 || (fd_backup = dup(fileno(stdout))) < 0
900 || dup2(fd, fileno(stdout)) < 0
911 outfname = "nkf.out";
914 if(freopen(outfname, "w", stdout) == NULL) {
918 if (binmode_f == TRUE) {
919 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
920 if (freopen("","wb",stdout) == NULL)
927 if (binmode_f == TRUE)
928 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
929 if (freopen("","rb",fin) == NULL)
934 setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
938 char *filename = NULL;
940 if (nfiles > 1) filename = origfname;
941 if (guess_f) print_guessed_code(filename);
947 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
955 if (dup2(fd_backup, fileno(stdout)) < 0){
958 if (stat(origfname, &sb)) {
959 fprintf(stderr, "Can't stat %s\n", origfname);
961 /*
\e$B%Q!<%_%C%7%g%s$rI|85
\e(B */
962 if (chmod(outfname, sb.st_mode)) {
963 fprintf(stderr, "Can't set permission %s\n", outfname);
966 /*
\e$B%?%$%`%9%?%s%W$rI|85
\e(B */
968 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
969 tb[0] = tb[1] = sb.st_mtime;
970 if (utime(outfname, tb)) {
971 fprintf(stderr, "Can't set timestamp %s\n", outfname);
974 tb.actime = sb.st_atime;
975 tb.modtime = sb.st_mtime;
976 if (utime(outfname, &tb)) {
977 fprintf(stderr, "Can't set timestamp %s\n", outfname);
982 char *backup_filename = get_backup_filename(backup_suffix, origfname);
984 unlink(backup_filename);
986 if (rename(origfname, backup_filename)) {
987 perror(backup_filename);
988 fprintf(stderr, "Can't rename %s to %s\n",
989 origfname, backup_filename);
993 if (unlink(origfname)){
998 if (rename(outfname, origfname)) {
1000 fprintf(stderr, "Can't rename %s to %s\n",
1001 outfname, origfname);
1009 #ifdef EASYWIN /*Easy Win */
1010 if (file_out_f == FALSE)
1011 scanf("%d",&end_check);
1014 #else /* for Other OS */
1015 if (file_out_f == TRUE)
1017 #endif /*Easy Win */
1020 #endif /* WIN32DLL */
1023 char *get_backup_filename(const char *suffix, const char *filename)
1025 char *backup_filename;
1026 int asterisk_count = 0;
1028 int filename_length = strlen(filename);
1030 for(i = 0; suffix[i]; i++){
1031 if(suffix[i] == '*') asterisk_count++;
1035 backup_filename = malloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1036 if (!backup_filename){
1037 perror("Can't malloc backup filename.");
1041 for(i = 0, j = 0; suffix[i];){
1042 if(suffix[i] == '*'){
1043 backup_filename[j] = '\0';
1044 strncat(backup_filename, filename, filename_length);
1046 j += filename_length;
1048 backup_filename[j++] = suffix[i++];
1051 backup_filename[j] = '\0';
1053 j = strlen(suffix) + filename_length;
1054 backup_filename = malloc( + 1);
1055 strcpy(backup_filename, filename);
1056 strcat(backup_filename, suffix);
1057 backup_filename[j] = '\0';
1059 return backup_filename;
1088 {"katakana-hiragana","h3"},
1095 #ifdef UTF8_OUTPUT_ENABLE
1105 {"fb-subchar=", ""},
1107 #ifdef UTF8_INPUT_ENABLE
1108 {"utf8-input", "W"},
1109 {"utf16-input", "W16"},
1110 {"no-cp932ext", ""},
1111 {"no-best-fit-chars",""},
1113 #ifdef UNICODE_NORMALIZATION
1114 {"utf8mac-input", ""},
1126 #ifdef NUMCHAR_OPTION
1127 {"numchar-input", ""},
1133 #ifdef SHIFTJIS_CP932
1143 static int option_mode = 0;
1145 void options(unsigned char *cp)
1149 unsigned char *cp_back = NULL;
1154 while(*cp && *cp++!='-');
1155 while (*cp || cp_back) {
1163 case '-': /* literal options */
1164 if (!*cp || *cp == SPACE) { /* ignore the rest of arguments */
1168 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
1169 p = (unsigned char *)long_option[i].name;
1170 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
1171 if (*p == cp[j] || cp[j] == ' '){
1178 while(*cp && *cp != SPACE && cp++);
1179 if (long_option[i].alias[0]){
1181 cp = (unsigned char *)long_option[i].alias;
1183 if (strcmp(long_option[i].name, "ic=") == 0){
1184 for (i=0; i < 16 && SPACE < p[i] && p[i] < DEL; i++){
1185 codeset[i] = nkf_toupper(p[i]);
1188 if(strcmp(codeset, "ISO-2022-JP") == 0 ||
1189 strcmp(codeset, "X-ISO2022JP-CP932") == 0 ||
1190 strcmp(codeset, "CP50220") == 0 ||
1191 strcmp(codeset, "CP50221") == 0 ||
1192 strcmp(codeset, "CP50222") == 0 ||
1193 strcmp(codeset, "ISO-2022-JP-MS") == 0){
1194 input_f = JIS_INPUT;
1195 }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1196 input_f = JIS_INPUT;
1200 }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1201 input_f = JIS_INPUT;
1206 }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1207 input_f = SJIS_INPUT;
1208 if (x0201_f==NO_X0201) x0201_f=TRUE;
1209 }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1210 strcmp(codeset, "CSWINDOWS31J") == 0 ||
1211 strcmp(codeset, "CP932") == 0 ||
1212 strcmp(codeset, "MS932") == 0){
1213 input_f = SJIS_INPUT;
1215 #ifdef SHIFTJIS_CP932
1218 #ifdef UTF8_OUTPUT_ENABLE
1219 ms_ucs_map_f = UCS_MAP_CP932;
1221 }else if(strcmp(codeset, "EUCJP") == 0 ||
1222 strcmp(codeset, "EUC-JP") == 0){
1223 input_f = JIS_INPUT;
1224 }else if(strcmp(codeset, "CP51932") == 0){
1225 input_f = JIS_INPUT;
1227 #ifdef SHIFTJIS_CP932
1230 #ifdef UTF8_OUTPUT_ENABLE
1231 ms_ucs_map_f = UCS_MAP_CP932;
1233 }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1234 strcmp(codeset, "EUCJP-MS") == 0 ||
1235 strcmp(codeset, "EUCJPMS") == 0){
1236 input_f = JIS_INPUT;
1238 #ifdef SHIFTJIS_CP932
1241 #ifdef UTF8_OUTPUT_ENABLE
1242 ms_ucs_map_f = UCS_MAP_MS;
1244 }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1245 strcmp(codeset, "EUCJP-ASCII") == 0){
1246 input_f = JIS_INPUT;
1248 #ifdef SHIFTJIS_CP932
1251 #ifdef UTF8_OUTPUT_ENABLE
1252 ms_ucs_map_f = UCS_MAP_ASCII;
1254 }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1255 strcmp(codeset, "SHIFT_JIS-2004") == 0){
1256 input_f = SJIS_INPUT;
1258 #ifdef SHIFTJIS_CP932
1262 if (x0201_f==NO_X0201) x0201_f=TRUE;
1263 }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1264 strcmp(codeset, "EUC-JIS-2004") == 0){
1265 input_f = JIS_INPUT;
1268 #ifdef SHIFTJIS_CP932
1272 #ifdef UTF8_INPUT_ENABLE
1273 }else if(strcmp(codeset, "UTF-8") == 0 ||
1274 strcmp(codeset, "UTF-8N") == 0 ||
1275 strcmp(codeset, "UTF-8-BOM") == 0){
1276 input_f = UTF8_INPUT;
1277 #ifdef UNICODE_NORMALIZATION
1278 }else if(strcmp(codeset, "UTF8-MAC") == 0 ||
1279 strcmp(codeset, "UTF-8-MAC") == 0){
1280 input_f = UTF8_INPUT;
1283 }else if(strcmp(codeset, "UTF-16") == 0){
1284 input_f = UTF16BE_INPUT;
1285 utf16_mode = UTF16BE_INPUT;
1286 }else if(strcmp(codeset, "UTF-16BE") == 0 ||
1287 strcmp(codeset, "UTF-16BE-BOM") == 0){
1288 input_f = UTF16BE_INPUT;
1289 utf16_mode = UTF16BE_INPUT;
1290 }else if(strcmp(codeset, "UTF-16LE") == 0 ||
1291 strcmp(codeset, "UTF-16LE-BOM") == 0){
1292 input_f = UTF16LE_INPUT;
1293 utf16_mode = UTF16LE_INPUT;
1298 if (strcmp(long_option[i].name, "oc=") == 0){
1299 for (i=0; i < 16 && SPACE < p[i] && p[i] < DEL; i++){
1300 codeset[i] = nkf_toupper(p[i]);
1303 if(strcmp(codeset, "ISO-2022-JP") == 0 ||
1304 strcmp(codeset, "CP50220") == 0){
1305 output_conv = j_oconv;
1306 }else if(strcmp(codeset, "X-ISO2022JP-CP932") == 0){
1307 output_conv = j_oconv;
1308 no_cp932ext_f = TRUE;
1309 }else if(strcmp(codeset, "CP50221") == 0 ||
1310 strcmp(codeset, "ISO-2022-JP-MS") == 0){
1311 output_conv = j_oconv;
1313 }else if(strcmp(codeset, "ISO-2022-JP-1") == 0){
1314 output_conv = j_oconv;
1318 #ifdef SHIFTJIS_CP932
1321 }else if(strcmp(codeset, "ISO-2022-JP-3") == 0){
1322 output_conv = j_oconv;
1327 #ifdef SHIFTJIS_CP932
1330 }else if(strcmp(codeset, "ISO-2022-JP-MS") == 0){
1331 output_conv = j_oconv;
1336 #ifdef SHIFTJIS_CP932
1339 }else if(strcmp(codeset, "SHIFT_JIS") == 0){
1340 output_conv = s_oconv;
1341 }else if(strcmp(codeset, "WINDOWS-31J") == 0 ||
1342 strcmp(codeset, "CSWINDOWS31J") == 0 ||
1343 strcmp(codeset, "CP932") == 0 ||
1344 strcmp(codeset, "MS932") == 0){
1345 output_conv = s_oconv;
1347 #ifdef SHIFTJIS_CP932
1351 #ifdef UTF8_OUTPUT_ENABLE
1352 ms_ucs_map_f = UCS_MAP_CP932;
1354 }else if(strcmp(codeset, "EUCJP") == 0 ||
1355 strcmp(codeset, "EUC-JP") == 0){
1356 output_conv = e_oconv;
1357 }else if(strcmp(codeset, "CP51932") == 0){
1358 output_conv = e_oconv;
1360 #ifdef SHIFTJIS_CP932
1363 #ifdef UTF8_OUTPUT_ENABLE
1364 ms_ucs_map_f = UCS_MAP_CP932;
1366 }else if(strcmp(codeset, "EUC-JP-MS") == 0 ||
1367 strcmp(codeset, "EUCJP-MS") == 0 ||
1368 strcmp(codeset, "EUCJPMS") == 0){
1369 output_conv = e_oconv;
1374 #ifdef SHIFTJIS_CP932
1377 #ifdef UTF8_OUTPUT_ENABLE
1378 ms_ucs_map_f = UCS_MAP_MS;
1380 }else if(strcmp(codeset, "EUC-JP-ASCII") == 0 ||
1381 strcmp(codeset, "EUCJP-ASCII") == 0){
1382 output_conv = e_oconv;
1387 #ifdef SHIFTJIS_CP932
1390 #ifdef UTF8_OUTPUT_ENABLE
1391 ms_ucs_map_f = UCS_MAP_ASCII;
1393 }else if(strcmp(codeset, "SHIFT_JISX0213") == 0 ||
1394 strcmp(codeset, "SHIFT_JIS-2004") == 0){
1395 output_conv = s_oconv;
1397 #ifdef SHIFTJIS_CP932
1400 }else if(strcmp(codeset, "EUC-JISX0213") == 0 ||
1401 strcmp(codeset, "EUC-JIS-2004") == 0){
1402 output_conv = e_oconv;
1407 #ifdef SHIFTJIS_CP932
1410 #ifdef UTF8_OUTPUT_ENABLE
1411 }else if(strcmp(codeset, "UTF-8") == 0){
1412 output_conv = w_oconv;
1413 }else if(strcmp(codeset, "UTF-8N") == 0){
1414 output_conv = w_oconv;
1416 }else if(strcmp(codeset, "UTF-8-BOM") == 0){
1417 output_conv = w_oconv;
1419 }else if(strcmp(codeset, "UTF-16BE") == 0){
1420 output_conv = w_oconv16;
1422 }else if(strcmp(codeset, "UTF-16") == 0 ||
1423 strcmp(codeset, "UTF-16BE-BOM") == 0){
1424 output_conv = w_oconv16;
1426 }else if(strcmp(codeset, "UTF-16LE") == 0){
1427 output_conv = w_oconv16;
1430 }else if(strcmp(codeset, "UTF-16LE-BOM") == 0){
1431 output_conv = w_oconv16;
1439 if (strcmp(long_option[i].name, "overwrite") == 0){
1442 preserve_time_f = TRUE;
1445 if (strcmp(long_option[i].name, "overwrite=") == 0){
1448 preserve_time_f = TRUE;
1450 backup_suffix = malloc(strlen((char *) p) + 1);
1451 strcpy(backup_suffix, (char *) p);
1454 if (strcmp(long_option[i].name, "in-place") == 0){
1457 preserve_time_f = FALSE;
1460 if (strcmp(long_option[i].name, "in-place=") == 0){
1463 preserve_time_f = FALSE;
1465 backup_suffix = malloc(strlen((char *) p) + 1);
1466 strcpy(backup_suffix, (char *) p);
1471 if (strcmp(long_option[i].name, "cap-input") == 0){
1475 if (strcmp(long_option[i].name, "url-input") == 0){
1480 #ifdef NUMCHAR_OPTION
1481 if (strcmp(long_option[i].name, "numchar-input") == 0){
1487 if (strcmp(long_option[i].name, "no-output") == 0){
1491 if (strcmp(long_option[i].name, "debug") == 0){
1496 if (strcmp(long_option[i].name, "cp932") == 0){
1497 #ifdef SHIFTJIS_CP932
1501 #ifdef UTF8_OUTPUT_ENABLE
1502 ms_ucs_map_f = UCS_MAP_CP932;
1506 if (strcmp(long_option[i].name, "no-cp932") == 0){
1507 #ifdef SHIFTJIS_CP932
1511 #ifdef UTF8_OUTPUT_ENABLE
1512 ms_ucs_map_f = UCS_MAP_ASCII;
1516 #ifdef SHIFTJIS_CP932
1517 if (strcmp(long_option[i].name, "cp932inv") == 0){
1524 if (strcmp(long_option[i].name, "x0212") == 0){
1531 if (strcmp(long_option[i].name, "exec-in") == 0){
1535 if (strcmp(long_option[i].name, "exec-out") == 0){
1540 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
1541 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
1542 no_cp932ext_f = TRUE;
1545 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
1546 no_best_fit_chars_f = TRUE;
1549 if (strcmp(long_option[i].name, "fb-skip") == 0){
1550 encode_fallback = NULL;
1553 if (strcmp(long_option[i].name, "fb-html") == 0){
1554 encode_fallback = encode_fallback_html;
1557 if (strcmp(long_option[i].name, "fb-xml" ) == 0){
1558 encode_fallback = encode_fallback_xml;
1561 if (strcmp(long_option[i].name, "fb-java") == 0){
1562 encode_fallback = encode_fallback_java;
1565 if (strcmp(long_option[i].name, "fb-perl") == 0){
1566 encode_fallback = encode_fallback_perl;
1569 if (strcmp(long_option[i].name, "fb-subchar") == 0){
1570 encode_fallback = encode_fallback_subchar;
1573 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
1574 encode_fallback = encode_fallback_subchar;
1575 unicode_subchar = 0;
1577 /* decimal number */
1578 for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
1579 unicode_subchar *= 10;
1580 unicode_subchar += hex2bin(p[i]);
1582 }else if(p[1] == 'x' || p[1] == 'X'){
1583 /* hexadecimal number */
1584 for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
1585 unicode_subchar <<= 4;
1586 unicode_subchar |= hex2bin(p[i]);
1590 for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
1591 unicode_subchar *= 8;
1592 unicode_subchar += hex2bin(p[i]);
1595 w16e_conv(unicode_subchar, &i, &j);
1596 unicode_subchar = i<<8 | j;
1600 #ifdef UTF8_OUTPUT_ENABLE
1601 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
1602 ms_ucs_map_f = UCS_MAP_MS;
1606 #ifdef UNICODE_NORMALIZATION
1607 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
1608 input_f = UTF8_INPUT;
1613 if (strcmp(long_option[i].name, "prefix=") == 0){
1614 if (nkf_isgraph(p[0])){
1615 for (i = 1; nkf_isgraph(p[i]); i++){
1616 prefix_table[p[i]] = p[0];
1623 case 'b': /* buffered mode */
1626 case 'u': /* non bufferd mode */
1629 case 't': /* transparent mode */
1634 } else if (*cp=='2') {
1638 * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
1646 case 'j': /* JIS output */
1648 output_conv = j_oconv;
1650 case 'e': /* AT&T EUC output */
1651 output_conv = e_oconv;
1653 case 's': /* SJIS output */
1654 output_conv = s_oconv;
1656 case 'l': /* ISO8859 Latin-1 support, no conversion */
1657 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
1658 input_f = LATIN1_INPUT;
1660 case 'i': /* Kanji IN ESC-$-@/B */
1661 if (*cp=='@'||*cp=='B')
1662 kanji_intro = *cp++;
1664 case 'o': /* ASCII IN ESC-(-J/B */
1665 if (*cp=='J'||*cp=='B'||*cp=='H')
1666 ascii_intro = *cp++;
1670 bit:1 katakana->hiragana
1671 bit:2 hiragana->katakana
1673 if ('9'>= *cp && *cp>='0')
1674 hira_f |= (*cp++ -'0');
1681 #if defined(MSDOS) || defined(__OS2__)
1696 #ifdef UTF8_OUTPUT_ENABLE
1697 case 'w': /* UTF-8 output */
1698 if ('1'== cp[0] && '6'==cp[1]) {
1699 output_conv = w_oconv16; cp+=2;
1701 unicode_bom_f=2; cp++;
1704 unicode_bom_f=1; cp++;
1706 } else if (cp[0] == 'B') {
1707 unicode_bom_f=2; cp++;
1709 unicode_bom_f=1; cp++;
1712 } else if (cp[0] == '8') {
1713 output_conv = w_oconv; cp++;
1716 unicode_bom_f=1; cp++;
1719 output_conv = w_oconv;
1722 #ifdef UTF8_INPUT_ENABLE
1723 case 'W': /* UTF-8 input */
1724 if ('1'== cp[0] && '6'==cp[1]) {
1725 input_f = UTF16BE_INPUT;
1726 utf16_mode = UTF16BE_INPUT;
1730 input_f = UTF16LE_INPUT;
1731 utf16_mode = UTF16LE_INPUT;
1732 } else if (cp[0] == 'B') {
1734 input_f = UTF16BE_INPUT;
1735 utf16_mode = UTF16BE_INPUT;
1737 } else if (cp[0] == '8') {
1739 input_f = UTF8_INPUT;
1741 input_f = UTF8_INPUT;
1744 /* Input code assumption */
1745 case 'J': /* JIS input */
1746 case 'E': /* AT&T EUC input */
1747 input_f = JIS_INPUT;
1749 case 'S': /* MS Kanji input */
1750 input_f = SJIS_INPUT;
1751 if (x0201_f==NO_X0201) x0201_f=TRUE;
1753 case 'Z': /* Convert X0208 alphabet to asii */
1754 /* bit:0 Convert X0208
1755 bit:1 Convert Kankaku to one space
1756 bit:2 Convert Kankaku to two spaces
1757 bit:3 Convert HTML Entity
1759 if ('9'>= *cp && *cp>='0')
1760 alpha_f |= 1<<(*cp++ -'0');
1764 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
1765 x0201_f = FALSE; /* No X0201->X0208 conversion */
1767 ESC-(-I in JIS, EUC, MS Kanji
1768 SI/SO in JIS, EUC, MS Kanji
1769 SSO in EUC, JIS, not in MS Kanji
1770 MS Kanji (0xa0-0xdf)
1772 ESC-(-I in JIS (0x20-0x5f)
1773 SSO in EUC (0xa0-0xdf)
1774 0xa0-0xd in MS Kanji (0xa0-0xdf)
1777 case 'X': /* Assume X0201 kana */
1778 /* Default value is NO_X0201 for EUC/MS-Kanji mix */
1781 case 'F': /* prserve new lines */
1782 fold_preserve_f = TRUE;
1783 case 'f': /* folding -f60 or -f */
1786 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1788 fold_len += *cp++ - '0';
1790 if (!(0<fold_len && fold_len<BUFSIZ))
1791 fold_len = DEFAULT_FOLD;
1795 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
1797 fold_margin += *cp++ - '0';
1801 case 'm': /* MIME support */
1802 /* mime_decode_f = TRUE; */ /* this has too large side effects... */
1803 if (*cp=='B'||*cp=='Q') {
1804 mime_decode_mode = *cp++;
1805 mimebuf_f = FIXED_MIME;
1806 } else if (*cp=='N') {
1807 mime_f = TRUE; cp++;
1808 } else if (*cp=='S') {
1809 mime_f = STRICT_MIME; cp++;
1810 } else if (*cp=='0') {
1811 mime_decode_f = FALSE;
1812 mime_f = FALSE; cp++;
1815 case 'M': /* MIME output */
1818 mimeout_f = FIXED_MIME; cp++;
1819 } else if (*cp=='Q') {
1821 mimeout_f = FIXED_MIME; cp++;
1826 case 'B': /* Broken JIS support */
1828 bit:1 allow any x on ESC-(-x or ESC-$-x
1829 bit:2 reset to ascii on NL
1831 if ('9'>= *cp && *cp>='0')
1832 broken_f |= 1<<(*cp++ -'0');
1837 case 'O':/* for Output file */
1841 case 'c':/* add cr code */
1844 case 'd':/* delete cr code */
1847 case 'I': /* ISO-2022-JP output */
1850 case 'L': /* line mode */
1851 if (*cp=='u') { /* unix */
1852 crmode_f = NL; cp++;
1853 } else if (*cp=='m') { /* mac */
1854 crmode_f = CR; cp++;
1855 } else if (*cp=='w') { /* windows */
1856 crmode_f = CRLF; cp++;
1857 } else if (*cp=='0') { /* no conversion */
1867 /* module muliple options in a string are allowed for Perl moudle */
1868 while(*cp && *cp++!='-');
1871 /* bogus option but ignored */
1877 #ifdef ANSI_C_PROTOTYPE
1878 struct input_code * find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1880 struct input_code * find_inputcode_byfunc(iconv_func)
1881 nkf_char (*iconv_func)();
1885 struct input_code *p = input_code_list;
1887 if (iconv_func == p->iconv_func){
1896 void set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1898 #ifdef INPUT_CODE_FIX
1906 #ifdef INPUT_CODE_FIX
1907 && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */
1913 if (estab_f && iconv_for_check != iconv){
1914 struct input_code *p = find_inputcode_byfunc(iconv);
1916 set_input_codename(p->name);
1917 debug(input_codename);
1919 iconv_for_check = iconv;
1924 #define SCORE_L2 (1) /*
\e$BBh
\e(B2
\e$B?e=`4A;z
\e(B */
1925 #define SCORE_KANA (SCORE_L2 << 1) /*
\e$B$$$o$f$kH>3Q%+%J
\e(B */
1926 #define SCORE_DEPEND (SCORE_KANA << 1) /*
\e$B5!<o0MB8J8;z
\e(B */
1927 #ifdef SHIFTJIS_CP932
1928 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932
\e$B$K$h$kFI$_49$(
\e(B */
1929 #define SCORE_NO_EXIST (SCORE_CP932 << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1931 #define SCORE_NO_EXIST (SCORE_DEPEND << 1) /*
\e$BB8:_$7$J$$J8;z
\e(B */
1933 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME
\e$B$K$h$k;XDj
\e(B */
1934 #define SCORE_ERROR (SCORE_iMIME << 1) /*
\e$B%(%i!<
\e(B */
1936 #define SCORE_INIT (SCORE_iMIME)
1938 const nkf_char score_table_A0[] = {
1941 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1942 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST,
1945 const nkf_char score_table_F0[] = {
1946 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
1947 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST,
1948 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
1949 SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR,
1952 void set_code_score(struct input_code *ptr, nkf_char score)
1955 ptr->score |= score;
1959 void clr_code_score(struct input_code *ptr, nkf_char score)
1962 ptr->score &= ~score;
1966 void code_score(struct input_code *ptr)
1968 nkf_char c2 = ptr->buf[0];
1969 #ifdef UTF8_OUTPUT_ENABLE
1970 nkf_char c1 = ptr->buf[1];
1973 set_code_score(ptr, SCORE_ERROR);
1974 }else if (c2 == SSO){
1975 set_code_score(ptr, SCORE_KANA);
1976 #ifdef UTF8_OUTPUT_ENABLE
1977 }else if (!e2w_conv(c2, c1)){
1978 set_code_score(ptr, SCORE_NO_EXIST);
1980 }else if ((c2 & 0x70) == 0x20){
1981 set_code_score(ptr, score_table_A0[c2 & 0x0f]);
1982 }else if ((c2 & 0x70) == 0x70){
1983 set_code_score(ptr, score_table_F0[c2 & 0x0f]);
1984 }else if ((c2 & 0x70) >= 0x50){
1985 set_code_score(ptr, SCORE_L2);
1989 void status_disable(struct input_code *ptr)
1994 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
1997 void status_push_ch(struct input_code *ptr, nkf_char c)
1999 ptr->buf[ptr->index++] = c;
2002 void status_clear(struct input_code *ptr)
2008 void status_reset(struct input_code *ptr)
2011 ptr->score = SCORE_INIT;
2014 void status_reinit(struct input_code *ptr)
2017 ptr->_file_stat = 0;
2020 void status_check(struct input_code *ptr, nkf_char c)
2022 if (c <= DEL && estab_f){
2027 void s_status(struct input_code *ptr, nkf_char c)
2031 status_check(ptr, c);
2036 #ifdef NUMCHAR_OPTION
2037 }else if (is_unicode_capsule(c)){
2040 }else if (0xa1 <= c && c <= 0xdf){
2041 status_push_ch(ptr, SSO);
2042 status_push_ch(ptr, c);
2045 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
2047 status_push_ch(ptr, c);
2048 #ifdef SHIFTJIS_CP932
2050 && is_ibmext_in_sjis(c)){
2052 status_push_ch(ptr, c);
2053 #endif /* SHIFTJIS_CP932 */
2055 }else if (x0212_f && 0xf0 <= c && c <= 0xfc){
2057 status_push_ch(ptr, c);
2058 #endif /* X0212_ENABLE */
2060 status_disable(ptr);
2064 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2065 status_push_ch(ptr, c);
2066 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2070 status_disable(ptr);
2074 #ifdef SHIFTJIS_CP932
2075 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2076 status_push_ch(ptr, c);
2077 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){
2078 set_code_score(ptr, SCORE_CP932);
2083 #endif /* SHIFTJIS_CP932 */
2084 #ifndef X0212_ENABLE
2085 status_disable(ptr);
2091 void e_status(struct input_code *ptr, nkf_char c)
2095 status_check(ptr, c);
2100 #ifdef NUMCHAR_OPTION
2101 }else if (is_unicode_capsule(c)){
2104 }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){
2106 status_push_ch(ptr, c);
2108 }else if (0x8f == c){
2110 status_push_ch(ptr, c);
2111 #endif /* X0212_ENABLE */
2113 status_disable(ptr);
2117 if (0xa1 <= c && c <= 0xfe){
2118 status_push_ch(ptr, c);
2122 status_disable(ptr);
2127 if (0xa1 <= c && c <= 0xfe){
2129 status_push_ch(ptr, c);
2131 status_disable(ptr);
2133 #endif /* X0212_ENABLE */
2137 #ifdef UTF8_INPUT_ENABLE
2138 void w16_status(struct input_code *ptr, nkf_char c)
2144 if (ptr->_file_stat == 0){
2145 if (c == 0xfe || c == 0xff){
2147 status_push_ch(ptr, c);
2148 ptr->_file_stat = 1;
2150 status_disable(ptr);
2151 ptr->_file_stat = -1;
2153 }else if (ptr->_file_stat > 0){
2155 status_push_ch(ptr, c);
2156 }else if (ptr->_file_stat < 0){
2157 status_disable(ptr);
2163 status_disable(ptr);
2164 ptr->_file_stat = -1;
2166 status_push_ch(ptr, c);
2173 if (ptr->stat != c && (c == 0xfe || c == 0xff)){
2174 status_push_ch(ptr, c);
2177 status_disable(ptr);
2178 ptr->_file_stat = -1;
2184 void w_status(struct input_code *ptr, nkf_char c)
2188 status_check(ptr, c);
2193 #ifdef NUMCHAR_OPTION
2194 }else if (is_unicode_capsule(c)){
2197 }else if (0xc0 <= c && c <= 0xdf){
2199 status_push_ch(ptr, c);
2200 }else if (0xe0 <= c && c <= 0xef){
2202 status_push_ch(ptr, c);
2204 status_disable(ptr);
2209 if (0x80 <= c && c <= 0xbf){
2210 status_push_ch(ptr, c);
2211 if (ptr->index > ptr->stat){
2212 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2213 && ptr->buf[2] == 0xbf);
2214 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2215 &ptr->buf[0], &ptr->buf[1]);
2222 status_disable(ptr);
2229 void code_status(nkf_char c)
2231 int action_flag = 1;
2232 struct input_code *result = 0;
2233 struct input_code *p = input_code_list;
2235 (p->status_func)(p, c);
2238 }else if(p->stat == 0){
2249 if (result && !estab_f){
2250 set_iconv(TRUE, result->iconv_func);
2251 }else if (c <= DEL){
2252 struct input_code *ptr = input_code_list;
2262 nkf_char std_getc(FILE *f)
2265 return std_gc_buf[--std_gc_ndx];
2271 nkf_char std_ungetc(nkf_char c, FILE *f)
2273 if (std_gc_ndx == STD_GC_BUFSIZE){
2276 std_gc_buf[std_gc_ndx++] = c;
2281 void std_putc(nkf_char c)
2288 #if !defined(PERL_XS) && !defined(WIN32DLL)
2289 nkf_char noconvert(FILE *f)
2294 module_connection();
2295 while ((c = (*i_getc)(f)) != EOF)
2302 void module_connection(void)
2304 oconv = output_conv;
2307 /* replace continucation module, from output side */
2309 /* output redicrection */
2311 if (noout_f || guess_f){
2318 if (mimeout_f == TRUE) {
2319 o_base64conv = oconv; oconv = base64_conv;
2321 /* base64_count = 0; */
2325 o_crconv = oconv; oconv = cr_conv;
2328 o_rot_conv = oconv; oconv = rot_conv;
2331 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
2334 o_hira_conv = oconv; oconv = hira_conv;
2337 o_fconv = oconv; oconv = fold_conv;
2340 if (alpha_f || x0201_f) {
2341 o_zconv = oconv; oconv = z_conv;
2345 i_ungetc = std_ungetc;
2346 /* input redicrection */
2349 i_cgetc = i_getc; i_getc = cap_getc;
2350 i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
2353 i_ugetc = i_getc; i_getc = url_getc;
2354 i_uungetc = i_ungetc; i_ungetc= url_ungetc;
2357 #ifdef NUMCHAR_OPTION
2359 i_ngetc = i_getc; i_getc = numchar_getc;
2360 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
2363 #ifdef UNICODE_NORMALIZATION
2364 if (nfc_f && input_f == UTF8_INPUT){
2365 i_nfc_getc = i_getc; i_getc = nfc_getc;
2366 i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
2369 if (mime_f && mimebuf_f==FIXED_MIME) {
2370 i_mgetc = i_getc; i_getc = mime_getc;
2371 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
2374 i_bgetc = i_getc; i_getc = broken_getc;
2375 i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
2377 if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) {
2378 set_iconv(-TRUE, e_iconv);
2379 } else if (input_f == SJIS_INPUT) {
2380 set_iconv(-TRUE, s_iconv);
2381 #ifdef UTF8_INPUT_ENABLE
2382 } else if (input_f == UTF8_INPUT) {
2383 set_iconv(-TRUE, w_iconv);
2384 } else if (input_f == UTF16BE_INPUT) {
2385 set_iconv(-TRUE, w_iconv16);
2386 } else if (input_f == UTF16LE_INPUT) {
2387 set_iconv(-TRUE, w_iconv16);
2390 set_iconv(FALSE, e_iconv);
2394 struct input_code *p = input_code_list;
2402 Conversion main loop. Code detection only.
2405 nkf_char kanji_convert(FILE *f)
2409 int is_8bit = FALSE;
2411 module_connection();
2414 if(input_f == SJIS_INPUT
2415 #ifdef UTF8_INPUT_ENABLE
2416 || input_f == UTF8_INPUT || input_f == UTF16BE_INPUT || input_f == UTF16LE_INPUT
2424 output_mode = ASCII;
2427 #define NEXT continue /* no output, get next */
2428 #define SEND ; /* output c1 and c2, get next */
2429 #define LAST break /* end of loop, go closing */
2431 while ((c1 = (*i_getc)(f)) != EOF) {
2432 #ifdef INPUT_CODE_FIX
2439 /* in case of 8th bit is on */
2440 if (!estab_f&&!mime_decode_mode) {
2441 /* in case of not established yet */
2442 /* It is still ambiguious */
2443 if (h_conv(f, c2, c1)==EOF)
2449 /* in case of already established */
2451 /* ignore bogus code */
2457 /* second byte, 7 bit code */
2458 /* it might be kanji shitfted */
2459 if ((c1 == DEL) || (c1 <= SPACE)) {
2460 /* ignore bogus first code */
2468 #ifdef UTF8_INPUT_ENABLE
2477 #ifdef NUMCHAR_OPTION
2478 } else if (is_unicode_capsule(c1)){
2481 } else if (c1 > DEL) {
2483 if (!estab_f && !iso8859_f) {
2484 /* not established yet */
2485 if (!is_8bit) is_8bit = TRUE;
2488 } else { /* estab_f==TRUE */
2493 } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) {
2494 /* SJIS X0201 Case... */
2495 if(iso2022jp_f && x0201_f==NO_X0201) {
2496 (*oconv)(GETA1, GETA2);
2503 } else if (c1==SSO && iconv != s_iconv) {
2504 /* EUC X0201 Case */
2505 c1 = (*i_getc)(f); /* skip SSO */
2507 if (SSP<=c1 && c1<0xe0) {
2508 if(iso2022jp_f && x0201_f==NO_X0201) {
2509 (*oconv)(GETA1, GETA2);
2516 } else { /* bogus code, skip SSO and one byte */
2520 /* already established */
2525 } else if ((c1 > SPACE) && (c1 != DEL)) {
2526 /* in case of Roman characters */
2528 /* output 1 shifted byte */
2532 } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){
2533 /* output 1 shifted byte */
2534 if(iso2022jp_f && x0201_f==NO_X0201) {
2535 (*oconv)(GETA1, GETA2);
2542 /* look like bogus code */
2545 } else if (input_mode == X0208 || input_mode == X0212 ||
2546 input_mode == X0213_1 || input_mode == X0213_2) {
2547 /* in case of Kanji shifted */
2550 } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
2551 /* Check MIME code */
2552 if ((c1 = (*i_getc)(f)) == EOF) {
2555 } else if (c1 == '?') {
2556 /* =? is mime conversion start sequence */
2557 if(mime_f == STRICT_MIME) {
2558 /* check in real detail */
2559 if (mime_begin_strict(f) == EOF)
2563 } else if (mime_begin(f) == EOF)
2573 /* normal ASCII code */
2576 } else if (!is_8bit && c1 == SI) {
2579 } else if (!is_8bit && c1 == SO) {
2582 } else if (!is_8bit && c1 == ESC ) {
2583 if ((c1 = (*i_getc)(f)) == EOF) {
2584 /* (*oconv)(0, ESC); don't send bogus code */
2586 } else if (c1 == '$') {
2587 if ((c1 = (*i_getc)(f)) == EOF) {
2589 (*oconv)(0, ESC); don't send bogus code
2590 (*oconv)(0, '$'); */
2592 } else if (c1 == '@'|| c1 == 'B') {
2593 /* This is kanji introduction */
2596 set_input_codename("ISO-2022-JP");
2598 debug(input_codename);
2601 } else if (c1 == '(') {
2602 if ((c1 = (*i_getc)(f)) == EOF) {
2603 /* don't send bogus code
2609 } else if (c1 == '@'|| c1 == 'B') {
2610 /* This is kanji introduction */
2615 } else if (c1 == 'D'){
2619 #endif /* X0212_ENABLE */
2620 } else if (c1 == (X0213_1&0x7F)){
2621 input_mode = X0213_1;
2624 } else if (c1 == (X0213_2&0x7F)){
2625 input_mode = X0213_2;
2629 /* could be some special code */
2636 } else if (broken_f&0x2) {
2637 /* accept any ESC-(-x as broken code ... */
2647 } else if (c1 == '(') {
2648 if ((c1 = (*i_getc)(f)) == EOF) {
2649 /* don't send bogus code
2651 (*oconv)(0, '('); */
2655 /* This is X0201 kana introduction */
2656 input_mode = X0201; shift_mode = X0201;
2658 } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
2659 /* This is X0208 kanji introduction */
2660 input_mode = ASCII; shift_mode = FALSE;
2662 } else if (broken_f&0x2) {
2663 input_mode = ASCII; shift_mode = FALSE;
2668 /* maintain various input_mode here */
2672 } else if ( c1 == 'N' || c1 == 'n' ){
2674 c3 = (*i_getc)(f); /* skip SS2 */
2675 if ( (SPACE<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
2690 } else if ((c1 == NL || c1 == CR) && broken_f&4) {
2691 input_mode = ASCII; set_iconv(FALSE, 0);
2693 } else if (c1 == NL && mime_decode_f && !mime_decode_mode ) {
2694 if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2702 } else if (c1 == CR && mime_decode_f && !mime_decode_mode ) {
2703 if ((c1=(*i_getc)(f))!=EOF) {
2707 } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
2725 if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */
2726 nkf_char c0 = (*i_getc)(f);
2729 (*iconv)(c2, c1, c0);
2735 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
2739 (*oconv)(PREFIX_EUCG3 | c2, c1);
2741 #endif /* X0212_ENABLE */
2743 (*oconv)(PREFIX_EUCG3 | c2, c1);
2746 (*oconv)(input_mode, c1); /* other special case */
2751 /* goto next_word */
2755 (*iconv)(EOF, 0, 0);
2756 if (!is_inputcode_set)
2759 struct input_code *p = input_code_list;
2760 struct input_code *result = p;
2762 if (p->score < result->score) result = p;
2765 set_input_codename(result->name);
2772 h_conv(FILE *f, nkf_char c2, nkf_char c1)
2777 /** it must NOT be in the kanji shifte sequence */
2778 /** it must NOT be written in JIS7 */
2779 /** and it must be after 2 byte 8bit code */
2785 while ((c1 = (*i_getc)(f)) != EOF) {
2791 if (push_hold_buf(c1) == EOF || estab_f){
2797 struct input_code *p = input_code_list;
2798 struct input_code *result = p;
2803 if (p->score < result->score){
2808 set_iconv(FALSE, result->iconv_func);
2813 ** 1) EOF is detected, or
2814 ** 2) Code is established, or
2815 ** 3) Buffer is FULL (but last word is pushed)
2817 ** in 1) and 3) cases, we continue to use
2818 ** Kanji codes by oconv and leave estab_f unchanged.
2823 while (wc < hold_count){
2824 c2 = hold_buf[wc++];
2826 #ifdef NUMCHAR_OPTION
2827 || is_unicode_capsule(c2)
2832 }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){
2833 (*iconv)(X0201, c2, 0);
2836 if (wc < hold_count){
2837 c1 = hold_buf[wc++];
2846 if ((*iconv)(c2, c1, 0) < 0){
2848 if (wc < hold_count){
2849 c0 = hold_buf[wc++];
2858 (*iconv)(c2, c1, c0);
2867 push_hold_buf(nkf_char c2)
2869 if (hold_count >= HOLD_SIZE*2)
2871 hold_buf[hold_count++] = (unsigned char)c2;
2872 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
2875 nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
2877 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
2880 static const nkf_char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
2881 #ifdef SHIFTJIS_CP932
2882 if (cp51932_f && is_ibmext_in_sjis(c2)){
2884 extern const unsigned short shiftjis_cp932[3][189];
2886 val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
2892 #endif /* SHIFTJIS_CP932 */
2894 if (!x0213_f && is_ibmext_in_sjis(c2)){
2896 extern const unsigned short shiftjis_x0212[3][189];
2898 val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
2901 c2 = PREFIX_EUCG3 | (val >> 8);
2914 if(x0213_f && c2 >= 0xF0){
2915 if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
2916 c2 = PREFIX_EUCG3 | 0x20 + shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
2917 }else{ /* 78<=k<=94 */
2918 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
2919 if (0x9E < c1) c2++;
2922 c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
2923 if (0x9E < c1) c2++;
2926 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1F);
2933 c2 = x0212_unshift(c2);
2940 nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
2944 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2947 nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
2948 if (ret) return ret;
2954 nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
2959 }else if (c2 == 0x8f){
2963 c2 = (c2 << 8) | (c1 & 0x7f);
2965 #ifdef SHIFTJIS_CP932
2968 if (e2s_conv(c2, c1, &s2, &s1) == 0){
2969 s2e_conv(s2, s1, &c2, &c1);
2976 #endif /* SHIFTJIS_CP932 */
2977 #endif /* X0212_ENABLE */
2978 } else if (c2 == SSO){
2981 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) {
2991 #ifdef UTF8_INPUT_ENABLE
2992 nkf_char w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
2999 }else if (0xc0 <= c2 && c2 <= 0xef) {
3000 ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
3001 #ifdef NUMCHAR_OPTION
3004 if (p1) *p1 = CLASS_UTF16 | ww16_conv(c2, c1, c0);
3012 nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
3016 /* throw away ZERO WIDTH NO-BREAK SPACE (U+FEFF) */
3017 if(ignore_zwnbsp_f){
3018 ignore_zwnbsp_f = FALSE;
3019 if(c2 == 0xef && c1 == 0xbb && c0 == 0xbf)
3023 if (c2 == 0) /* 0x00-0x7f */
3024 c1 &= 0x7F; /* 1byte */
3026 if ((c2 & 0xe0) == 0xc0){ /* 0xc0-0xdf */
3028 if((c2 & 0xFE) == 0xC0 || c1 < 0x80 || 0xBF < c1) return 0;
3029 }else if ((c2 & 0xf0) == 0xe0) /* 0xe0-0xef */
3030 return -1; /* 3bytes */
3032 else if (0xf0 <= c2)
3033 return 0; /* 4,5,6bytes */
3034 else if ((c2 & 0xc0) == 0x80) /* 0x80-0xbf */
3035 return 0; /* trail byte */
3039 /* must be 3bytes */
3041 if(c1 < 0xA0 || 0xBF < c1 || c0 < 0x80 || 0xBF < c0)
3043 }else if(c2 == 0xED){
3044 if(c1 < 0x80 || 0x9F < c1 || c0 < 0x80 || 0xBF < c0)
3046 }else if((c2 & 0xf0) == 0xe0){
3047 if(c1 < 0x80 || 0xBF < c1 || c0 < 0x80 || 0xBF < c0)
3051 if (c2 == 0 || c2 == EOF){
3053 ret = w2e_conv(c2, c1, c0, &c2, &c1);
3062 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
3063 void w16w_conv(nkf_char val, nkf_char *p2, nkf_char *p1, nkf_char *p0)
3070 }else if (val < 0x800){
3071 *p2 = 0xc0 | (val >> 6);
3072 *p1 = 0x80 | (val & 0x3f);
3074 } else if (val <= NKF_INT32_C(0xFFFF)) {
3075 *p2 = 0xe0 | (val >> 12);
3076 *p1 = 0x80 | ((val >> 6) & 0x3f);
3077 *p0 = 0x80 | (val & 0x3f);
3086 #ifdef UTF8_INPUT_ENABLE
3087 nkf_char ww16_conv(nkf_char c2, nkf_char c1, nkf_char c0)
3092 }else if (c2 >= 0xe0){
3093 val = (c2 & 0x0f) << 12;
3094 val |= (c1 & 0x3f) << 6;
3096 }else if (c2 >= 0xc0){
3097 val = (c2 & 0x1f) << 6;
3105 nkf_char w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
3107 nkf_char c2, c1, c0;
3114 w16w_conv(val, &c2, &c1, &c0);
3115 ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
3116 #ifdef NUMCHAR_OPTION
3119 *p1 = CLASS_UTF16 | val;
3128 #ifdef UTF8_INPUT_ENABLE
3129 nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0)
3133 /* throw away ZERO WIDTH NO-BREAK SPACE (U+FEFF) */
3134 if(ignore_zwnbsp_f){
3135 ignore_zwnbsp_f = FALSE;
3136 if (c2==0376 && c1==0377){
3137 utf16_mode = UTF16BE_INPUT;
3139 }else if(c2==0377 && c1==0376){
3140 utf16_mode = UTF16LE_INPUT;
3144 if (c2 != EOF && utf16_mode == UTF16LE_INPUT) {
3146 tmp=c1; c1=c2; c2=tmp;
3148 if ((c2==0 && c1 < 0x80) || c2==EOF) {
3151 }else if((c2>>3)==27){ /* surrogate pair */
3153 }else ret = w16e_conv(((c2 & 0xff)<<8) + c1, &c2, &c1);
3154 if (ret) return ret;
3159 nkf_char unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
3162 extern const unsigned short *const utf8_to_euc_2bytes[];
3163 extern const unsigned short *const utf8_to_euc_2bytes_ms[];
3164 extern const unsigned short *const utf8_to_euc_2bytes_932[];
3165 extern const unsigned short *const *const utf8_to_euc_3bytes[];
3166 extern const unsigned short *const *const utf8_to_euc_3bytes_ms[];
3167 extern const unsigned short *const *const utf8_to_euc_3bytes_932[];
3169 const unsigned short *const *pp;
3170 const unsigned short *const *const *ppp;
3171 static const int no_best_fit_chars_table_C2[] =
3172 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3173 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3174 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
3175 0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
3176 static const int no_best_fit_chars_table_C2_ms[] =
3177 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3178 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3179 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
3180 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
3181 static const int no_best_fit_chars_table_932_C2[] =
3182 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3183 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3184 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
3185 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
3186 static const int no_best_fit_chars_table_932_C3[] =
3187 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3188 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
3189 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3190 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
3196 }else if(c2 < 0xe0){
3197 if(no_best_fit_chars_f){
3198 if(ms_ucs_map_f == UCS_MAP_CP932){
3201 if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
3204 if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3207 }else if(cp51932_f){
3210 if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
3213 if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
3216 }else if(ms_ucs_map_f == UCS_MAP_MS){
3217 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
3221 ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
3222 ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
3224 ret = w_iconv_common(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
3225 }else if(c0 < 0xF0){
3226 if(no_best_fit_chars_f){
3227 if(ms_ucs_map_f == UCS_MAP_CP932){
3228 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
3229 }else if(ms_ucs_map_f == UCS_MAP_MS){
3234 if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
3237 if(c0 == 0x92) return 1;
3242 if(c1 == 0x80 || c0 == 0x9C) return 1;
3250 if(c0 == 0x95) return 1;
3253 if(c0 == 0xA5) return 1;
3260 if(c0 == 0x8D) return 1;
3263 if(c0 == 0x9E && cp51932_f) return 1;
3266 if(0xA0 <= c0 && c0 <= 0xA5) return 1;
3274 ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
3275 ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
3277 ret = w_iconv_common(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
3279 #ifdef SHIFTJIS_CP932
3280 if (!ret && cp51932_f && is_eucg3(*p2)) {
3282 if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
3283 s2e_conv(s2, s1, p2, p1);
3292 nkf_char w_iconv_common(nkf_char c1, nkf_char c0, const unsigned short *const *pp, nkf_char psize, nkf_char *p2, nkf_char *p1)
3295 const unsigned short *p;
3298 if (pp == 0) return 1;
3301 if (c1 < 0 || psize <= c1) return 1;
3303 if (p == 0) return 1;
3306 if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
3308 if (val == 0) return 1;
3309 if (no_cp932ext_f && (
3310 (val>>8) == 0x2D || /* NEC special characters */
3311 val > NKF_INT32_C(0xF300) /* IBM extended characters */
3319 if (c2 == SO) c2 = X0201;
3326 void nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
3328 const char *hex = "0123456789ABCDEF";
3334 (*f)(0, hex[(c>>shift)&0xF]);
3344 void encode_fallback_html(nkf_char c)
3349 if(c >= NKF_INT32_C(1000000))
3350 (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
3351 if(c >= NKF_INT32_C(100000))
3352 (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
3354 (*oconv)(0, 0x30+(c/10000 )%10);
3356 (*oconv)(0, 0x30+(c/1000 )%10);
3358 (*oconv)(0, 0x30+(c/100 )%10);
3360 (*oconv)(0, 0x30+(c/10 )%10);
3362 (*oconv)(0, 0x30+ c %10);
3367 void encode_fallback_xml(nkf_char c)
3372 nkf_each_char_to_hex(oconv, c);
3377 void encode_fallback_java(nkf_char c)
3379 const char *hex = "0123456789ABCDEF";
3382 if(!is_unicode_bmp(c)){
3386 (*oconv)(0, hex[(c>>20)&0xF]);
3387 (*oconv)(0, hex[(c>>16)&0xF]);
3391 (*oconv)(0, hex[(c>>12)&0xF]);
3392 (*oconv)(0, hex[(c>> 8)&0xF]);
3393 (*oconv)(0, hex[(c>> 4)&0xF]);
3394 (*oconv)(0, hex[ c &0xF]);
3398 void encode_fallback_perl(nkf_char c)
3403 nkf_each_char_to_hex(oconv, c);
3408 void encode_fallback_subchar(nkf_char c)
3410 c = unicode_subchar;
3411 (*oconv)((c>>8)&0xFF, c&0xFF);
3416 #ifdef UTF8_OUTPUT_ENABLE
3417 nkf_char e2w_conv(nkf_char c2, nkf_char c1)
3420 extern const unsigned short euc_to_utf8_1byte[];
3421 extern const unsigned short *const euc_to_utf8_2bytes[];
3422 extern const unsigned short *const euc_to_utf8_2bytes_ms[];
3423 extern const unsigned short *const x0212_to_utf8_2bytes[];
3425 const unsigned short *p;
3428 p = euc_to_utf8_1byte;
3430 } else if (is_eucg3(c2)){
3431 if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
3434 c2 = (c2&0x7f) - 0x21;
3435 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3436 p = x0212_to_utf8_2bytes[c2];
3442 c2 = (c2&0x7f) - 0x21;
3443 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
3444 p = ms_ucs_map_f != UCS_MAP_ASCII ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2];
3449 c1 = (c1 & 0x7f) - 0x21;
3450 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
3455 void w_oconv(nkf_char c2, nkf_char c1)
3464 if (unicode_bom_f==2) {
3471 #ifdef NUMCHAR_OPTION
3472 if (c2 == 0 && is_unicode_capsule(c1)){
3476 }else if (val < 0x800){
3477 (*o_putc)(0xC0 | (val >> 6));
3478 (*o_putc)(0x80 | (val & 0x3f));
3479 } else if (val <= NKF_INT32_C(0xFFFF)) {
3480 (*o_putc)(0xE0 | (val >> 12));
3481 (*o_putc)(0x80 | ((val >> 6) & 0x3f));
3482 (*o_putc)(0x80 | (val & 0x3f));
3483 } else if (val <= NKF_INT32_C(0x10FFFF)) {
3484 (*o_putc)(0xE0 | ( val>>18));
3485 (*o_putc)(0x80 | ((val>>12) & 0x3f));
3486 (*o_putc)(0x80 | ((val>> 6) & 0x3f));
3487 (*o_putc)(0x80 | ( val & 0x3f));
3494 output_mode = ASCII;
3496 } else if (c2 == ISO8859_1) {
3497 output_mode = ISO8859_1;
3498 (*o_putc)(c1 | 0x080);
3501 val = e2w_conv(c2, c1);
3503 w16w_conv(val, &c2, &c1, &c0);
3507 if (c0) (*o_putc)(c0);
3513 void w_oconv16(nkf_char c2, nkf_char c1)
3520 if (unicode_bom_f==2) {
3522 (*o_putc)((unsigned char)'\377');
3526 (*o_putc)((unsigned char)'\377');
3531 if (c2 == ISO8859_1) {
3534 #ifdef NUMCHAR_OPTION
3535 } else if (c2 == 0 && is_unicode_capsule(c1)) {
3536 if (is_unicode_bmp(c1)) {
3537 c2 = (c1 >> 8) & 0xff;
3541 if (c1 <= UNICODE_MAX) {
3542 c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0); /* high surrogate */
3543 c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
3545 (*o_putc)(c2 & 0xff);
3546 (*o_putc)((c2 >> 8) & 0xff);
3547 (*o_putc)(c1 & 0xff);
3548 (*o_putc)((c1 >> 8) & 0xff);
3550 (*o_putc)(c2 & 0xff);
3551 (*o_putc)((c2 >> 8) & 0xff);
3552 (*o_putc)(c1 & 0xff);
3553 (*o_putc)((c1 >> 8) & 0xff);
3560 nkf_char val = e2w_conv(c2, c1);
3561 c2 = (val >> 8) & 0xff;
3575 void e_oconv(nkf_char c2, nkf_char c1)
3577 #ifdef NUMCHAR_OPTION
3578 if (c2 == 0 && is_unicode_capsule(c1)){
3579 w16e_conv(c1, &c2, &c1);
3580 if (c2 == 0 && is_unicode_capsule(c1)){
3581 if(encode_fallback)(*encode_fallback)(c1);
3589 } else if (c2 == 0) {
3590 output_mode = ASCII;
3592 } else if (c2 == X0201) {
3593 output_mode = JAPANESE_EUC;
3594 (*o_putc)(SSO); (*o_putc)(c1|0x80);
3595 } else if (c2 == ISO8859_1) {
3596 output_mode = ISO8859_1;
3597 (*o_putc)(c1 | 0x080);
3599 } else if (is_eucg3(c2)){
3600 output_mode = JAPANESE_EUC;
3601 #ifdef SHIFTJIS_CP932
3604 if (e2s_conv(c2, c1, &s2, &s1) == 0){
3605 s2e_conv(s2, s1, &c2, &c1);
3610 output_mode = ASCII;
3612 }else if (is_eucg3(c2)){
3615 (*o_putc)((c2 & 0x7f) | 0x080);
3616 (*o_putc)(c1 | 0x080);
3619 (*o_putc)((c2 & 0x7f) | 0x080);
3620 (*o_putc)(c1 | 0x080);
3624 if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
3625 set_iconv(FALSE, 0);
3626 return; /* too late to rescue this char */
3628 output_mode = JAPANESE_EUC;
3629 (*o_putc)(c2 | 0x080);
3630 (*o_putc)(c1 | 0x080);
3635 nkf_char x0212_shift(nkf_char c)
3640 if (0x75 <= c && c <= 0x7f){
3641 ret = c + (0x109 - 0x75);
3644 if (0x75 <= c && c <= 0x7f){
3645 ret = c + (0x113 - 0x75);
3652 nkf_char x0212_unshift(nkf_char c)
3655 if (0x7f <= c && c <= 0x88){
3656 ret = c + (0x75 - 0x7f);
3657 }else if (0x89 <= c && c <= 0x92){
3658 ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
3662 #endif /* X0212_ENABLE */
3664 nkf_char e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
3670 if((0x21 <= ndx && ndx <= 0x2F)){
3671 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
3672 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
3674 }else if(0x6E <= ndx && ndx <= 0x7E){
3675 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
3676 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
3682 else if(nkf_isgraph(ndx)){
3684 const unsigned short *ptr;
3686 extern const unsigned short *const x0212_shiftjis[];
3688 ptr = x0212_shiftjis[ndx - 0x21];
3690 val = ptr[(c1 & 0x7f) - 0x21];
3699 c2 = x0212_shift(c2);
3701 #endif /* X0212_ENABLE */
3703 if(0x7F < c2) return 1;
3704 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
3705 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
3709 void s_oconv(nkf_char c2, nkf_char c1)
3711 #ifdef NUMCHAR_OPTION
3712 if (c2 == 0 && is_unicode_capsule(c1)){
3713 w16e_conv(c1, &c2, &c1);
3714 if (c2 == 0 && is_unicode_capsule(c1)){
3715 if(encode_fallback)(*encode_fallback)(c1);
3723 } else if (c2 == 0) {
3724 output_mode = ASCII;
3726 } else if (c2 == X0201) {
3727 output_mode = SHIFT_JIS;
3729 } else if (c2 == ISO8859_1) {
3730 output_mode = ISO8859_1;
3731 (*o_putc)(c1 | 0x080);
3733 } else if (is_eucg3(c2)){
3734 output_mode = SHIFT_JIS;
3735 if (e2s_conv(c2, c1, &c2, &c1) == 0){
3741 if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
3742 set_iconv(FALSE, 0);
3743 return; /* too late to rescue this char */
3745 output_mode = SHIFT_JIS;
3746 e2s_conv(c2, c1, &c2, &c1);
3748 #ifdef SHIFTJIS_CP932
3750 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
3752 extern const unsigned short cp932inv[2][189];
3754 nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
3760 #endif /* SHIFTJIS_CP932 */
3763 if (prefix_table[(unsigned char)c1]){
3764 (*o_putc)(prefix_table[(unsigned char)c1]);
3770 void j_oconv(nkf_char c2, nkf_char c1)
3772 #ifdef NUMCHAR_OPTION
3773 if (c2 == 0 && is_unicode_capsule(c1)){
3774 w16e_conv(c1, &c2, &c1);
3775 if (c2 == 0 && is_unicode_capsule(c1)){
3776 if(encode_fallback)(*encode_fallback)(c1);
3782 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
3785 (*o_putc)(ascii_intro);
3786 output_mode = ASCII;
3790 } else if (is_eucg3(c2)){
3792 if(output_mode!=X0213_2){
3793 output_mode = X0213_2;
3797 (*o_putc)(X0213_2&0x7F);
3800 if(output_mode!=X0212){
3801 output_mode = X0212;
3805 (*o_putc)(X0212&0x7F);
3808 (*o_putc)(c2 & 0x7f);
3811 } else if (c2==X0201) {
3812 if (output_mode!=X0201) {
3813 output_mode = X0201;
3819 } else if (c2==ISO8859_1) {
3820 /* iso8859 introduction, or 8th bit on */
3821 /* Can we convert in 7bit form using ESC-'-'-A ?
3823 output_mode = ISO8859_1;
3825 } else if (c2 == 0) {
3826 if (output_mode !=ASCII && output_mode!=ISO8859_1) {
3829 (*o_putc)(ascii_intro);
3830 output_mode = ASCII;
3834 if(c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1) return;
3836 if (output_mode!=X0213_1) {
3837 output_mode = X0213_1;
3841 (*o_putc)(X0213_1&0x7F);
3843 }else if (output_mode != X0208) {
3844 output_mode = X0208;
3847 (*o_putc)(kanji_intro);
3854 void base64_conv(nkf_char c2, nkf_char c1)
3856 mime_prechar(c2, c1);
3857 (*o_base64conv)(c2,c1);
3861 static nkf_char broken_buf[3];
3862 static int broken_counter = 0;
3863 static int broken_last = 0;
3864 nkf_char broken_getc(FILE *f)
3868 if (broken_counter>0) {
3869 return broken_buf[--broken_counter];
3872 if (c=='$' && broken_last != ESC
3873 && (input_mode==ASCII || input_mode==X0201)) {
3876 if (c1=='@'|| c1=='B') {
3877 broken_buf[0]=c1; broken_buf[1]=c;
3884 } else if (c=='(' && broken_last != ESC
3885 && (input_mode==X0208 || input_mode==X0201)) { /* ) */
3888 if (c1=='J'|| c1=='B') {
3889 broken_buf[0]=c1; broken_buf[1]=c;
3902 nkf_char broken_ungetc(nkf_char c, FILE *f)
3904 if (broken_counter<2)
3905 broken_buf[broken_counter++]=c;
3909 static nkf_char prev_cr = 0;
3911 void cr_conv(nkf_char c2, nkf_char c1)
3915 if (! (c2==0&&c1==NL) ) {
3921 } else if (c1=='\r') {
3923 } else if (c1=='\n') {
3924 if (crmode_f==CRLF) {
3925 (*o_crconv)(0,'\r');
3926 } else if (crmode_f==CR) {
3927 (*o_crconv)(0,'\r');
3931 } else if (c1!='\032' || crmode_f!=NL){
3937 Return value of fold_conv()
3939 \n add newline and output char
3940 \r add newline and output nothing
3943 1 (or else) normal output
3945 fold state in prev (previous character)
3947 >0x80 Japanese (X0208/X0201)
3952 This fold algorthm does not preserve heading space in a line.
3953 This is the main difference from fmt.
3956 #define char_size(c2,c1) (c2?2:1)
3958 void fold_conv(nkf_char c2, nkf_char c1)
3961 nkf_char fold_state;
3963 if (c1== '\r' && !fold_preserve_f) {
3964 fold_state=0; /* ignore cr */
3965 }else if (c1== '\n'&&f_prev=='\r' && fold_preserve_f) {
3967 fold_state=0; /* ignore cr */
3968 } else if (c1== BS) {
3969 if (f_line>0) f_line--;
3971 } else if (c2==EOF && f_line != 0) { /* close open last line */
3973 } else if ((c1=='\n' && !fold_preserve_f)
3974 || ((c1=='\r'||(c1=='\n'&&f_prev!='\r'))
3975 && fold_preserve_f)) {
3977 if (fold_preserve_f) {
3981 } else if ((f_prev == c1 && !fold_preserve_f)
3982 || (f_prev == '\n' && fold_preserve_f)
3983 ) { /* duplicate newline */
3986 fold_state = '\n'; /* output two newline */
3992 if (f_prev&0x80) { /* Japanese? */
3994 fold_state = 0; /* ignore given single newline */
3995 } else if (f_prev==' ') {
3999 if (++f_line<=fold_len)
4003 fold_state = '\r'; /* fold and output nothing */
4007 } else if (c1=='\f') {
4010 fold_state = '\n'; /* output newline and clear */
4011 } else if ( (c2==0 && c1==' ')||
4012 (c2==0 && c1=='\t')||
4013 (c2=='!'&& c1=='!')) {
4014 /* X0208 kankaku or ascii space */
4015 if (f_prev == ' ') {
4016 fold_state = 0; /* remove duplicate spaces */
4019 if (++f_line<=fold_len)
4020 fold_state = ' '; /* output ASCII space only */
4022 f_prev = ' '; f_line = 0;
4023 fold_state = '\r'; /* fold and output nothing */
4027 prev0 = f_prev; /* we still need this one... , but almost done */
4029 if (c2 || c2==X0201)
4030 f_prev |= 0x80; /* this is Japanese */
4031 f_line += char_size(c2,c1);
4032 if (f_line<=fold_len) { /* normal case */
4035 if (f_line>fold_len+fold_margin) { /* too many kinsoku suspension */
4036 f_line = char_size(c2,c1);
4037 fold_state = '\n'; /* We can't wait, do fold now */
4038 } else if (c2==X0201) {
4039 /* simple kinsoku rules return 1 means no folding */
4040 if (c1==(0xde&0x7f)) fold_state = 1; /*
\e$B!+
\e(B*/
4041 else if (c1==(0xdf&0x7f)) fold_state = 1; /*
\e$B!,
\e(B*/
4042 else if (c1==(0xa4&0x7f)) fold_state = 1; /*
\e$B!#
\e(B*/
4043 else if (c1==(0xa3&0x7f)) fold_state = 1; /*
\e$B!$
\e(B*/
4044 else if (c1==(0xa1&0x7f)) fold_state = 1; /*
\e$B!W
\e(B*/
4045 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
4046 else if (SPACE<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
4048 fold_state = '\n';/* add one new f_line before this character */
4051 fold_state = '\n';/* add one new f_line before this character */
4054 /* kinsoku point in ASCII */
4055 if ( c1==')'|| /* { [ ( */
4066 /* just after special */
4067 } else if (!is_alnum(prev0)) {
4068 f_line = char_size(c2,c1);
4070 } else if ((prev0==' ') || /* ignored new f_line */
4071 (prev0=='\n')|| /* ignored new f_line */
4072 (prev0&0x80)) { /* X0208 - ASCII */
4073 f_line = char_size(c2,c1);
4074 fold_state = '\n';/* add one new f_line before this character */
4076 fold_state = 1; /* default no fold in ASCII */
4080 if (c1=='"') fold_state = 1; /*
\e$B!"
\e(B */
4081 else if (c1=='#') fold_state = 1; /*
\e$B!#
\e(B */
4082 else if (c1=='W') fold_state = 1; /*
\e$B!W
\e(B */
4083 else if (c1=='K') fold_state = 1; /*
\e$B!K
\e(B */
4084 else if (c1=='$') fold_state = 1; /*
\e$B!$
\e(B */
4085 else if (c1=='%') fold_state = 1; /*
\e$B!%
\e(B */
4086 else if (c1=='\'') fold_state = 1; /*
\e$B!\
\e(B */
4087 else if (c1=='(') fold_state = 1; /*
\e$B!(
\e(B */
4088 else if (c1==')') fold_state = 1; /*
\e$B!)
\e(B */
4089 else if (c1=='*') fold_state = 1; /*
\e$B!*
\e(B */
4090 else if (c1=='+') fold_state = 1; /*
\e$B!+
\e(B */
4091 else if (c1==',') fold_state = 1; /*
\e$B!,
\e(B */
4092 /* default no fold in kinsoku */
4095 f_line = char_size(c2,c1);
4096 /* add one new f_line before this character */
4099 f_line = char_size(c2,c1);
4101 /* add one new f_line before this character */
4106 /* terminator process */
4107 switch(fold_state) {
4126 nkf_char z_prev2=0,z_prev1=0;
4128 void z_conv(nkf_char c2, nkf_char c1)
4131 /* if (c2) c1 &= 0x7f; assertion */
4133 if (x0201_f && z_prev2==X0201) { /* X0201 */
4134 if (c1==(0xde&0x7f)) { /*
\e$BByE@
\e(B */
4136 (*o_zconv)(dv[(z_prev1-SPACE)*2],dv[(z_prev1-SPACE)*2+1]);
4138 } else if (c1==(0xdf&0x7f)&&ev[(z_prev1-SPACE)*2]) { /*
\e$BH>ByE@
\e(B */
4140 (*o_zconv)(ev[(z_prev1-SPACE)*2],ev[(z_prev1-SPACE)*2+1]);
4144 (*o_zconv)(cv[(z_prev1-SPACE)*2],cv[(z_prev1-SPACE)*2+1]);
4153 if (x0201_f && c2==X0201) {
4154 if (dv[(c1-SPACE)*2]||ev[(c1-SPACE)*2]) {
4155 /* wait for
\e$BByE@
\e(B or
\e$BH>ByE@
\e(B */
4156 z_prev1 = c1; z_prev2 = c2;
4159 (*o_zconv)(cv[(c1-SPACE)*2],cv[(c1-SPACE)*2+1]);
4164 /* JISX0208 Alphabet */
4165 if (alpha_f && c2 == 0x23 ) {
4167 } else if (alpha_f && c2 == 0x21 ) {
4168 /* JISX0208 Kigou */
4173 } else if (alpha_f&0x4) {
4178 } else if (0x20<c1 && c1<0x7f && fv[c1-0x20]) {
4184 case '>': entity = ">"; break;
4185 case '<': entity = "<"; break;
4186 case '\"': entity = """; break;
4187 case '&': entity = "&"; break;
4190 while (*entity) (*o_zconv)(0, *entity++);
4200 #define rot13(c) ( \
4202 (c <= 'M') ? (c + 13): \
4203 (c <= 'Z') ? (c - 13): \
4205 (c <= 'm') ? (c + 13): \
4206 (c <= 'z') ? (c - 13): \
4210 #define rot47(c) ( \
4212 ( c <= 'O' ) ? (c + 47) : \
4213 ( c <= '~' ) ? (c - 47) : \
4217 void rot_conv(nkf_char c2, nkf_char c1)
4219 if (c2==0 || c2==X0201 || c2==ISO8859_1) {
4225 (*o_rot_conv)(c2,c1);
4228 void hira_conv(nkf_char c2, nkf_char c1)
4232 if (0x20 < c1 && c1 < 0x74) {
4234 (*o_hira_conv)(c2,c1);
4236 } else if (c1 == 0x74 && (output_conv == w_oconv || output_conv == w_oconv16)) {
4238 c1 = CLASS_UTF16 | 0x3094;
4239 (*o_hira_conv)(c2,c1);
4242 } else if (c2 == 0x21 && (c1 == 0x33 || c1 == 0x34)) {
4244 (*o_hira_conv)(c2,c1);
4249 if (c2 == 0 && c1 == (CLASS_UTF16 | 0x3094)) {
4252 } else if (c2 == 0x24 && 0x20 < c1 && c1 < 0x74) {
4254 } else if (c2 == 0x21 && (c1 == 0x35 || c1 == 0x36)) {
4258 (*o_hira_conv)(c2,c1);
4262 void iso2022jp_check_conv(nkf_char c2, nkf_char c1)
4264 static const nkf_char range[RANGE_NUM_MAX][2] = {
4285 nkf_char start, end, c;
4287 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
4291 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
4296 for (i = 0; i < RANGE_NUM_MAX; i++) {
4297 start = range[i][0];
4300 if (c >= start && c <= end) {
4305 (*o_iso2022jp_check_conv)(c2,c1);
4309 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
4311 const unsigned char *mime_pattern[] = {
4312 (const unsigned char *)"\075?EUC-JP?B?",
4313 (const unsigned char *)"\075?SHIFT_JIS?B?",
4314 (const unsigned char *)"\075?ISO-8859-1?Q?",
4315 (const unsigned char *)"\075?ISO-8859-1?B?",
4316 (const unsigned char *)"\075?ISO-2022-JP?B?",
4317 (const unsigned char *)"\075?ISO-2022-JP?Q?",
4318 #if defined(UTF8_INPUT_ENABLE)
4319 (const unsigned char *)"\075?UTF-8?B?",
4320 (const unsigned char *)"\075?UTF-8?Q?",
4322 (const unsigned char *)"\075?US-ASCII?Q?",
4327 /*
\e$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u
\e(B */
4328 nkf_char (*mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0) = {
4329 e_iconv, s_iconv, 0, 0, 0, 0,
4330 #if defined(UTF8_INPUT_ENABLE)
4336 const nkf_char mime_encode[] = {
4337 JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201,
4338 #if defined(UTF8_INPUT_ENABLE)
4345 const nkf_char mime_encode_method[] = {
4346 'B', 'B','Q', 'B', 'B', 'Q',
4347 #if defined(UTF8_INPUT_ENABLE)
4355 #define MAXRECOVER 20
4357 void switch_mime_getc(void)
4359 if (i_getc!=mime_getc) {
4360 i_mgetc = i_getc; i_getc = mime_getc;
4361 i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
4362 if(mime_f==STRICT_MIME) {
4363 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
4364 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
4369 void unswitch_mime_getc(void)
4371 if(mime_f==STRICT_MIME) {
4372 i_mgetc = i_mgetc_buf;
4373 i_mungetc = i_mungetc_buf;
4376 i_ungetc = i_mungetc;
4377 if(mime_iconv_back)set_iconv(FALSE, mime_iconv_back);
4378 mime_iconv_back = NULL;
4381 nkf_char mime_begin_strict(FILE *f)
4385 const unsigned char *p,*q;
4386 nkf_char r[MAXRECOVER]; /* recovery buffer, max mime pattern length */
4388 mime_decode_mode = FALSE;
4389 /* =? has been checked */
4391 p = mime_pattern[j];
4394 for(i=2;p[i]>' ';i++) { /* start at =? */
4395 if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) {
4396 /* pattern fails, try next one */
4398 while (mime_pattern[++j]) {
4399 p = mime_pattern[j];
4400 for(k=2;k<i;k++) /* assume length(p) > i */
4401 if (p[k]!=q[k]) break;
4402 if (k==i && nkf_toupper(c1)==p[k]) break;
4404 p = mime_pattern[j];
4405 if (p) continue; /* found next one, continue */
4406 /* all fails, output from recovery buffer */
4414 mime_decode_mode = p[i-2];
4416 mime_iconv_back = iconv;
4417 set_iconv(FALSE, mime_priority_func[j]);
4418 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
4420 if (mime_decode_mode=='B') {
4421 mimebuf_f = unbuf_f;
4423 /* do MIME integrity check */
4424 return mime_integrity(f,mime_pattern[j]);
4432 nkf_char mime_getc_buf(FILE *f)
4434 /* we don't keep eof of Fifo, becase it contains ?= as
4435 a terminator. It was checked in mime_integrity. */
4436 return ((mimebuf_f)?
4437 (*i_mgetc_buf)(f):Fifo(mime_input++));
4440 nkf_char mime_ungetc_buf(nkf_char c, FILE *f)
4443 (*i_mungetc_buf)(c,f);
4445 Fifo(--mime_input) = (unsigned char)c;
4449 nkf_char mime_begin(FILE *f)
4454 /* In NONSTRICT mode, only =? is checked. In case of failure, we */
4455 /* re-read and convert again from mime_buffer. */
4457 /* =? has been checked */
4459 Fifo(mime_last++)='='; Fifo(mime_last++)='?';
4460 for(i=2;i<MAXRECOVER;i++) { /* start at =? */
4461 /* We accept any character type even if it is breaked by new lines */
4462 c1 = (*i_getc)(f); Fifo(mime_last++) = (unsigned char)c1;
4463 if (c1=='\n'||c1==' '||c1=='\r'||
4464 c1=='-'||c1=='_'||is_alnum(c1) ) continue;
4466 /* Failed. But this could be another MIME preemble */
4474 c1 = (*i_getc)(f); Fifo(mime_last++) = (unsigned char)c1;
4475 if (!(++i<MAXRECOVER) || c1==EOF) break;
4476 if (c1=='b'||c1=='B') {
4477 mime_decode_mode = 'B';
4478 } else if (c1=='q'||c1=='Q') {
4479 mime_decode_mode = 'Q';
4483 c1 = (*i_getc)(f); Fifo(mime_last++) = (unsigned char)c1;
4484 if (!(++i<MAXRECOVER) || c1==EOF) break;
4486 mime_decode_mode = FALSE;
4492 if (!mime_decode_mode) {
4493 /* false MIME premble, restart from mime_buffer */
4494 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
4495 /* Since we are in MIME mode until buffer becomes empty, */
4496 /* we never go into mime_begin again for a while. */
4499 /* discard mime preemble, and goto MIME mode */
4501 /* do no MIME integrity check */
4502 return c1; /* used only for checking EOF */
4506 void no_putc(nkf_char c)
4511 void debug(const char *str)
4514 fprintf(stderr, "%s\n", str);
4519 void set_input_codename(char *codename)
4523 strcmp(codename, "") != 0 &&
4524 strcmp(codename, input_codename) != 0)
4526 is_inputcode_mixed = TRUE;
4528 input_codename = codename;
4529 is_inputcode_set = TRUE;
4532 #if !defined(PERL_XS) && !defined(WIN32DLL)
4533 void print_guessed_code(char *filename)
4535 char *codename = "BINARY";
4536 if (!is_inputcode_mixed) {
4537 if (strcmp(input_codename, "") == 0) {
4540 codename = input_codename;
4543 if (filename != NULL) printf("%s:", filename);
4544 printf("%s\n", codename);
4550 nkf_char hex_getc(nkf_char ch, FILE *f, nkf_char (*g)(FILE *f), nkf_char (*u)(nkf_char c, FILE *f))
4552 nkf_char c1, c2, c3;
4558 if (!nkf_isxdigit(c2)){
4563 if (!nkf_isxdigit(c3)){
4568 return (hex2bin(c2) << 4) | hex2bin(c3);
4571 nkf_char cap_getc(FILE *f)
4573 return hex_getc(':', f, i_cgetc, i_cungetc);
4576 nkf_char cap_ungetc(nkf_char c, FILE *f)
4578 return (*i_cungetc)(c, f);
4581 nkf_char url_getc(FILE *f)
4583 return hex_getc('%', f, i_ugetc, i_uungetc);
4586 nkf_char url_ungetc(nkf_char c, FILE *f)
4588 return (*i_uungetc)(c, f);
4592 #ifdef NUMCHAR_OPTION
4593 nkf_char numchar_getc(FILE *f)
4595 nkf_char (*g)(FILE *) = i_ngetc;
4596 nkf_char (*u)(nkf_char c ,FILE *f) = i_nungetc;
4607 if (buf[i] == 'x' || buf[i] == 'X'){
4608 for (j = 0; j < 7; j++){
4610 if (!nkf_isxdigit(buf[i])){
4617 c |= hex2bin(buf[i]);
4620 for (j = 0; j < 8; j++){
4624 if (!nkf_isdigit(buf[i])){
4631 c += hex2bin(buf[i]);
4637 return CLASS_UTF16 | c;
4646 nkf_char numchar_ungetc(nkf_char c, FILE *f)
4648 return (*i_nungetc)(c, f);
4652 #ifdef UNICODE_NORMALIZATION
4654 /* Normalization Form C */
4655 nkf_char nfc_getc(FILE *f)
4657 nkf_char (*g)(FILE *f) = i_nfc_getc;
4658 nkf_char (*u)(nkf_char c ,FILE *f) = i_nfc_ungetc;
4659 int i=0, j, k=1, lower, upper;
4661 const nkf_nfchar *array;
4663 extern const struct normalization_pair normalization_table[];
4667 while (k > 0 && ((buf[i] & 0xc0) != 0x80)){
4668 lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
4669 while (upper >= lower) {
4670 j = (lower+upper) / 2;
4671 array = normalization_table[j].nfd;
4672 for (k=0; k < NORMALIZATION_TABLE_NFD_LENGTH && array[k]; k++){
4673 if (array[k] != buf[k]){
4674 array[k] < buf[k] ? (lower = j + 1) : (upper = j - 1);
4681 array = normalization_table[j].nfc;
4682 for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++)
4683 buf[i] = (nkf_char)(array[i]);
4694 nkf_char nfc_ungetc(nkf_char c, FILE *f)
4696 return (*i_nfc_ungetc)(c, f);
4698 #endif /* UNICODE_NORMALIZATION */
4704 nkf_char c1, c2, c3, c4, cc;
4705 nkf_char t1, t2, t3, t4, mode, exit_mode;
4706 nkf_char lwsp_count;
4709 nkf_char lwsp_size = 128;
4711 if (mime_top != mime_last) { /* Something is in FIFO */
4712 return Fifo(mime_top++);
4714 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
4715 mime_decode_mode=FALSE;
4716 unswitch_mime_getc();
4717 return (*i_getc)(f);
4720 if (mimebuf_f == FIXED_MIME)
4721 exit_mode = mime_decode_mode;
4724 if (mime_decode_mode == 'Q') {
4725 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4727 if (c1=='_' && mimebuf_f != FIXED_MIME) return ' ';
4728 if (c1<=' ' || DEL<=c1) {
4729 mime_decode_mode = exit_mode; /* prepare for quit */
4732 if (c1!='=' && (c1!='?' || mimebuf_f == FIXED_MIME)) {
4736 mime_decode_mode = exit_mode; /* prepare for quit */
4737 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
4738 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
4739 /* end Q encoding */
4740 input_mode = exit_mode;
4742 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
4743 if (lwsp_buf==NULL) {
4744 perror("can't malloc");
4747 while ((c1=(*i_getc)(f))!=EOF) {
4752 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4760 if ((c1=(*i_getc)(f))!=EOF && c1 == NL) {
4761 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4776 lwsp_buf[lwsp_count] = (unsigned char)c1;
4777 if (lwsp_count++>lwsp_size){
4779 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4780 if (lwsp_buf_new==NULL) {
4782 perror("can't realloc");
4785 lwsp_buf = lwsp_buf_new;
4791 if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SPACE && lwsp_buf[lwsp_count-1] != TAB))) {
4793 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4794 i_ungetc(lwsp_buf[lwsp_count],f);
4800 if (c1=='='&&c2<' ') { /* this is soft wrap */
4801 while((c1 = (*i_mgetc)(f)) <=' ') {
4802 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4804 mime_decode_mode = 'Q'; /* still in MIME */
4805 goto restart_mime_q;
4808 mime_decode_mode = 'Q'; /* still in MIME */
4812 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
4813 if (c2<=' ') return c2;
4814 mime_decode_mode = 'Q'; /* still in MIME */
4815 return ((hex2bin(c2)<<4) + hex2bin(c3));
4818 if (mime_decode_mode != 'B') {
4819 mime_decode_mode = FALSE;
4820 return (*i_mgetc)(f);
4824 /* Base64 encoding */
4826 MIME allows line break in the middle of
4827 Base64, but we are very pessimistic in decoding
4828 in unbuf mode because MIME encoded code may broken by
4829 less or editor's control sequence (such as ESC-[-K in unbuffered
4830 mode. ignore incomplete MIME.
4832 mode = mime_decode_mode;
4833 mime_decode_mode = exit_mode; /* prepare for quit */
4835 while ((c1 = (*i_mgetc)(f))<=' ') {
4840 if ((c2 = (*i_mgetc)(f))<=' ') {
4843 if (mime_f != STRICT_MIME) goto mime_c2_retry;
4844 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4847 if ((c1 == '?') && (c2 == '=')) {
4850 lwsp_buf = malloc((lwsp_size+5)*sizeof(char));
4851 if (lwsp_buf==NULL) {
4852 perror("can't malloc");
4855 while ((c1=(*i_getc)(f))!=EOF) {
4860 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4868 if ((c1=(*i_getc)(f))!=EOF) {
4872 } else if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) {
4887 lwsp_buf[lwsp_count] = (unsigned char)c1;
4888 if (lwsp_count++>lwsp_size){
4890 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4891 if (lwsp_buf_new==NULL) {
4893 perror("can't realloc");
4896 lwsp_buf = lwsp_buf_new;
4902 if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SPACE && lwsp_buf[lwsp_count-1] != TAB))) {
4904 for(lwsp_count--;lwsp_count>0;lwsp_count--)
4905 i_ungetc(lwsp_buf[lwsp_count],f);
4912 if ((c3 = (*i_mgetc)(f))<=' ') {
4915 if (mime_f != STRICT_MIME) goto mime_c3_retry;
4916 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4920 if ((c4 = (*i_mgetc)(f))<=' ') {
4923 if (mime_f != STRICT_MIME) goto mime_c4_retry;
4924 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4928 mime_decode_mode = mode; /* still in MIME sigh... */
4930 /* BASE 64 decoding */
4932 t1 = 0x3f & base64decode(c1);
4933 t2 = 0x3f & base64decode(c2);
4934 t3 = 0x3f & base64decode(c3);
4935 t4 = 0x3f & base64decode(c4);
4936 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
4938 Fifo(mime_last++) = (unsigned char)cc;
4939 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
4941 Fifo(mime_last++) = (unsigned char)cc;
4942 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
4944 Fifo(mime_last++) = (unsigned char)cc;
4949 return Fifo(mime_top++);
4952 nkf_char mime_ungetc(nkf_char c, FILE *f)
4954 Fifo(--mime_top) = (unsigned char)c;
4958 nkf_char mime_integrity(FILE *f, const unsigned char *p)
4962 /* In buffered mode, read until =? or NL or buffer full
4964 mime_input = mime_top;
4965 mime_last = mime_top;
4967 while(*p) Fifo(mime_input++) = *p++;
4970 while((c=(*i_getc)(f))!=EOF) {
4971 if (((mime_input-mime_top)&MIME_BUF_MASK)==0) {
4972 break; /* buffer full */
4974 if (c=='=' && d=='?') {
4975 /* checked. skip header, start decode */
4976 Fifo(mime_input++) = (unsigned char)c;
4977 /* mime_last_input = mime_input; */
4982 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
4984 /* Should we check length mod 4? */
4985 Fifo(mime_input++) = (unsigned char)c;
4988 /* In case of Incomplete MIME, no MIME decode */
4989 Fifo(mime_input++) = (unsigned char)c;
4990 mime_last = mime_input; /* point undecoded buffer */
4991 mime_decode_mode = 1; /* no decode on Fifo last in mime_getc */
4992 switch_mime_getc(); /* anyway we need buffered getc */
4996 nkf_char base64decode(nkf_char c)
5001 i = c - 'A'; /* A..Z 0-25 */
5003 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
5005 } else if (c > '/') {
5006 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
5007 } else if (c == '+') {
5008 i = '>' /* 62 */ ; /* + 62 */
5010 i = '?' /* 63 */ ; /* / 63 */
5015 static const char basis_64[] =
5016 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
5018 static nkf_char b64c;
5019 #define MIMEOUT_BUF_LENGTH (60)
5020 char mimeout_buf[MIMEOUT_BUF_LENGTH+1];
5021 int mimeout_buf_count = 0;
5022 int mimeout_preserve_space = 0;
5023 #define itoh4(c) (c>=10?c+'A'-10:c+'0')
5025 void open_mime(nkf_char mode)
5027 const unsigned char *p;
5030 p = mime_pattern[0];
5031 for(i=0;mime_encode[i];i++) {
5032 if (mode == mime_encode[i]) {
5033 p = mime_pattern[i];
5037 mimeout_mode = mime_encode_method[i];
5040 if (base64_count>45) {
5041 if (mimeout_buf_count>0 && nkf_isblank(mimeout_buf[i])){
5042 (*o_mputc)(mimeout_buf[i]);
5048 if (!mimeout_preserve_space && mimeout_buf_count>0
5049 && (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
5050 || mimeout_buf[i]==CR || mimeout_buf[i]==NL )) {
5054 if (!mimeout_preserve_space) {
5055 for (;i<mimeout_buf_count;i++) {
5056 if (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
5057 || mimeout_buf[i]==CR || mimeout_buf[i]==NL ) {
5058 (*o_mputc)(mimeout_buf[i]);
5065 mimeout_preserve_space = FALSE;
5071 j = mimeout_buf_count;
5072 mimeout_buf_count = 0;
5074 mime_putc(mimeout_buf[i]);
5078 void close_mime(void)
5088 switch(mimeout_mode) {
5093 (*o_mputc)(basis_64[((b64c & 0x3)<< 4)]);
5099 (*o_mputc)(basis_64[((b64c & 0xF) << 2)]);
5105 if (mimeout_f!=FIXED_MIME) {
5107 } else if (mimeout_mode != 'Q')
5112 void mimeout_addchar(nkf_char c)
5114 switch(mimeout_mode) {
5119 } else if(!nkf_isalnum(c)) {
5121 (*o_mputc)(itoh4(((c>>4)&0xf)));
5122 (*o_mputc)(itoh4((c&0xf)));
5131 (*o_mputc)(basis_64[c>>2]);
5136 (*o_mputc)(basis_64[((b64c & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
5142 (*o_mputc)(basis_64[((b64c & 0xF) << 2) | ((c & 0xC0) >>6)]);
5143 (*o_mputc)(basis_64[c & 0x3F]);
5154 nkf_char mime_lastchar2, mime_lastchar1;
5156 void mime_prechar(nkf_char c2, nkf_char c1)
5160 if (base64_count + mimeout_buf_count/3*4> 66){
5161 (*o_base64conv)(EOF,0);
5162 (*o_base64conv)(0,NL);
5163 (*o_base64conv)(0,SPACE);
5165 }/*else if (mime_lastchar2){
5166 if (c1 <=DEL && !nkf_isspace(c1)){
5167 (*o_base64conv)(0,SPACE);
5171 if (c2 && mime_lastchar2 == 0
5172 && mime_lastchar1 && !nkf_isspace(mime_lastchar1)){
5173 (*o_base64conv)(0,SPACE);
5176 mime_lastchar2 = c2;
5177 mime_lastchar1 = c1;
5180 void mime_putc(nkf_char c)
5185 if (mimeout_f == FIXED_MIME){
5186 if (mimeout_mode == 'Q'){
5187 if (base64_count > 71){
5188 if (c!=CR && c!=NL) {
5195 if (base64_count > 71){
5200 if (c == EOF) { /* c==EOF */
5204 if (c != EOF) { /* c==EOF */
5210 /* mimeout_f != FIXED_MIME */
5212 if (c == EOF) { /* c==EOF */
5213 j = mimeout_buf_count;
5214 mimeout_buf_count = 0;
5218 if (nkf_isspace(mimeout_buf[i]) && base64_count < 71){
5221 mimeout_addchar(mimeout_buf[i]);
5225 mimeout_addchar(mimeout_buf[i]);
5229 mimeout_addchar(mimeout_buf[i]);
5235 if (mimeout_mode=='Q') {
5236 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
5248 if (mimeout_buf_count > 0){
5249 lastchar = mimeout_buf[mimeout_buf_count - 1];
5254 if (!mimeout_mode) {
5255 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1)) {
5256 if (nkf_isspace(c)) {
5257 if (c==CR || c==NL) {
5260 for (i=0;i<mimeout_buf_count;i++) {
5261 (*o_mputc)(mimeout_buf[i]);
5262 if (mimeout_buf[i] == CR || mimeout_buf[i] == NL){
5268 mimeout_buf[0] = (char)c;
5269 mimeout_buf_count = 1;
5271 if (base64_count > 1
5272 && base64_count + mimeout_buf_count > 76){
5275 if (!nkf_isspace(mimeout_buf[0])){
5280 mimeout_buf[mimeout_buf_count++] = (char)c;
5281 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
5282 open_mime(output_mode);
5287 if (lastchar==CR || lastchar == NL){
5288 for (i=0;i<mimeout_buf_count;i++) {
5289 (*o_mputc)(mimeout_buf[i]);
5292 mimeout_buf_count = 0;
5294 if (lastchar==SPACE) {
5295 for (i=0;i<mimeout_buf_count-1;i++) {
5296 (*o_mputc)(mimeout_buf[i]);
5299 mimeout_buf[0] = SPACE;
5300 mimeout_buf_count = 1;
5302 open_mime(output_mode);
5305 /* mimeout_mode == 'B', 1, 2 */
5306 if ( c<=DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
5307 if (lastchar == CR || lastchar == NL){
5308 if (nkf_isblank(c)) {
5309 for (i=0;i<mimeout_buf_count;i++) {
5310 mimeout_addchar(mimeout_buf[i]);
5312 mimeout_buf_count = 0;
5313 } else if (SPACE<c && c<DEL) {
5315 for (i=0;i<mimeout_buf_count;i++) {
5316 (*o_mputc)(mimeout_buf[i]);
5319 mimeout_buf_count = 0;
5322 if (c==SPACE || c==TAB || c==CR || c==NL) {
5323 for (i=0;i<mimeout_buf_count;i++) {
5324 if (SPACE<mimeout_buf[i] && mimeout_buf[i]<DEL) {
5326 for (i=0;i<mimeout_buf_count;i++) {
5327 (*o_mputc)(mimeout_buf[i]);
5330 mimeout_buf_count = 0;
5333 mimeout_buf[mimeout_buf_count++] = (char)c;
5334 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
5336 for (i=0;i<mimeout_buf_count;i++) {
5337 (*o_mputc)(mimeout_buf[i]);
5340 mimeout_buf_count = 0;
5344 if (mimeout_buf_count>0 && SPACE<c && c!='=') {
5345 mimeout_buf[mimeout_buf_count++] = (char)c;
5346 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
5347 j = mimeout_buf_count;
5348 mimeout_buf_count = 0;
5350 mimeout_addchar(mimeout_buf[i]);
5357 if (mimeout_buf_count>0) {
5358 j = mimeout_buf_count;
5359 mimeout_buf_count = 0;
5361 if (mimeout_buf[i]==CR || mimeout_buf[i]==NL)
5363 mimeout_addchar(mimeout_buf[i]);
5369 (*o_mputc)(mimeout_buf[i]);
5371 open_mime(output_mode);
5378 #if defined(PERL_XS) || defined(WIN32DLL)
5382 struct input_code *p = input_code_list;
5395 mime_f = STRICT_MIME;
5396 mime_decode_f = FALSE;
5401 #if defined(MSDOS) || defined(__OS2__)
5406 iso2022jp_f = FALSE;
5407 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
5408 ms_ucs_map_f = UCS_MAP_ASCII;
5410 #ifdef UTF8_INPUT_ENABLE
5411 no_cp932ext_f = FALSE;
5412 ignore_zwnbsp_f = TRUE;
5413 no_best_fit_chars_f = FALSE;
5414 encode_fallback = NULL;
5415 unicode_subchar = '?';
5417 #ifdef UTF8_OUTPUT_ENABLE
5421 #ifdef UNICODE_NORMALIZATION
5434 is_inputcode_mixed = FALSE;
5435 is_inputcode_set = FALSE;
5439 #ifdef SHIFTJIS_CP932
5449 for (i = 0; i < 256; i++){
5450 prefix_table[i] = 0;
5453 #ifdef UTF8_INPUT_ENABLE
5454 utf16_mode = UTF16BE_INPUT;
5456 mimeout_buf_count = 0;
5461 fold_preserve_f = FALSE;
5464 kanji_intro = DEFAULT_J;
5465 ascii_intro = DEFAULT_R;
5466 fold_margin = FOLD_MARGIN;
5467 output_conv = DEFAULT_CONV;
5468 oconv = DEFAULT_CONV;
5469 o_zconv = no_connection;
5470 o_fconv = no_connection;
5471 o_crconv = no_connection;
5472 o_rot_conv = no_connection;
5473 o_hira_conv = no_connection;
5474 o_base64conv = no_connection;
5475 o_iso2022jp_check_conv = no_connection;
5478 i_ungetc = std_ungetc;
5480 i_bungetc = std_ungetc;
5483 i_mungetc = std_ungetc;
5484 i_mgetc_buf = std_getc;
5485 i_mungetc_buf = std_ungetc;
5486 output_mode = ASCII;
5489 mime_decode_mode = FALSE;
5495 z_prev2=0,z_prev1=0;
5497 iconv_for_check = 0;
5499 input_codename = "";
5506 void no_connection(nkf_char c2, nkf_char c1)
5508 no_connection2(c2,c1,0);
5511 nkf_char no_connection2(nkf_char c2, nkf_char c1, nkf_char c0)
5513 fprintf(stderr,"nkf internal module connection failure.\n");
5515 return 0; /* LINT */
5520 #define fprintf dllprintf
5524 fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
5525 fprintf(stderr,"Flags:\n");
5526 fprintf(stderr,"b,u Output is buffered (DEFAULT),Output is unbuffered\n");
5527 #ifdef DEFAULT_CODE_SJIS
5528 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift_JIS (DEFAULT), EUC-JP, UTF-8N\n");
5530 #ifdef DEFAULT_CODE_JIS
5531 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, EUC-JP, UTF-8N\n");
5533 #ifdef DEFAULT_CODE_EUC
5534 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, EUC-JP (DEFAULT), UTF-8N\n");
5536 #ifdef DEFAULT_CODE_UTF8
5537 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, EUC-JP, UTF-8N (DEFAULT)\n");
5539 #ifdef UTF8_OUTPUT_ENABLE
5540 fprintf(stderr," After 'w' you can add more options. -w[ 8 [0], 16 [[BL] [0]] ]\n");
5542 fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, EUC-JP, UTF-8\n");
5543 #ifdef UTF8_INPUT_ENABLE
5544 fprintf(stderr," After 'W' you can add more options. -W[ 8, 16 [BL] ] \n");
5546 fprintf(stderr,"t no conversion\n");
5547 fprintf(stderr,"i[@B] Specify the Esc Seq for JIS X 0208-1978/83 (DEFAULT B)\n");
5548 fprintf(stderr,"o[BJH] Specify the Esc Seq for ASCII/Roman (DEFAULT B)\n");
5549 fprintf(stderr,"r {de/en}crypt ROT13/47\n");
5550 fprintf(stderr,"h 1 katakana->hiragana, 2 hiragana->katakana, 3 both\n");
5551 fprintf(stderr,"v Show this usage. V: show version\n");
5552 fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
5553 fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
5554 fprintf(stderr,"l ISO8859-1 (Latin-1) support\n");
5555 fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
5556 fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII\n");
5557 fprintf(stderr," 1: Kankaku to 1 space 2: to 2 spaces 3: Convert to HTML Entity\n");
5558 fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
5559 fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
5561 fprintf(stderr,"T Text mode output\n");
5563 fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n");
5564 fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n");
5565 fprintf(stderr,"d,c Convert line breaks -d: LF -c: CRLF\n");
5566 fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
5567 fprintf(stderr,"\n");
5568 fprintf(stderr,"Long name options\n");
5569 fprintf(stderr," --ic=<input codeset> --oc=<output codeset>\n");
5570 fprintf(stderr," Specify the input or output codeset\n");
5571 fprintf(stderr," --fj --unix --mac --windows\n");
5572 fprintf(stderr," --jis --euc --sjis --utf8 --utf16 --mime --base64\n");
5573 fprintf(stderr," Convert for the system or code\n");
5574 fprintf(stderr," --hiragana --katakana --katakana-hiragana\n");
5575 fprintf(stderr," To Hiragana/Katakana Conversion\n");
5576 fprintf(stderr," --prefix= Insert escape before troublesome characters of Shift_JIS\n");
5578 fprintf(stderr," --cap-input, --url-input Convert hex after ':' or '%%'\n");
5580 #ifdef NUMCHAR_OPTION
5581 fprintf(stderr," --numchar-input Convert Unicode Character Reference\n");
5583 #ifdef UTF8_INPUT_ENABLE
5584 fprintf(stderr," --fb-{skip, html, xml, perl, java, subchar}\n");
5585 fprintf(stderr," Specify how nkf handles unassigned characters\n");
5588 fprintf(stderr," --in-place[=SUFFIX] --overwrite[=SUFFIX]\n");
5589 fprintf(stderr," Overwrite original listed files by filtered result\n");
5590 fprintf(stderr," --overwrite preserves timestamp of original files\n");
5592 fprintf(stderr," -g --guess Guess the input code\n");
5593 fprintf(stderr," --help --version Show this help/the version\n");
5594 fprintf(stderr," For more information, see also man nkf\n");
5595 fprintf(stderr,"\n");
5601 fprintf(stderr,"Network Kanji Filter Version %s (%s) "
5602 #if defined(MSDOS) && !defined(__WIN32__) && !defined(__WIN16__) && !defined(__OS2__)
5605 #if defined(MSDOS) && defined(__WIN16__)
5608 #if defined(MSDOS) && defined(__WIN32__)
5614 ,NKF_VERSION,NKF_RELEASE_DATE);
5615 fprintf(stderr,"\n%s\n",CopyRight);
5620 **
\e$B%Q%C%A@):n<T
\e(B
5621 ** void@merope.pleiades.or.jp (Kusakabe Youichi)
5622 ** NIDE Naoyuki <nide@ics.nara-wu.ac.jp>
5623 ** ohta@src.ricoh.co.jp (Junn Ohta)
5624 ** inouet@strl.nhk.or.jp (Tomoyuki Inoue)
5625 ** kiri@pulser.win.or.jp (Tetsuaki Kiriyama)
5626 ** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp>
5627 ** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe)
5628 ** kono@ie.u-ryukyu.ac.jp (Shinji Kono)
5629 ** GHG00637@nifty-serve.or.jp (COW)