* 3. This notice may not be removed or altered from any source distribution.
*/
#define NKF_VERSION "2.0.9"
-#define NKF_RELEASE_DATE "2009-01-20"
+#define NKF_RELEASE_DATE "2009-04-26"
#define COPY_RIGHT \
"Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa).\n" \
"Copyright (C) 1996-2009, The nkf Project."
&& (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
#define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
-#define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c < (0xE0&0x7F))
+#define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c <= 0x5F)
#define HOLD_SIZE 1024
#if defined(INT_IS_SHORT)
{"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
#ifdef UTF8_INPUT_ENABLE
{"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
+ {"UTF-16", 0, 0, 0, {0, 0, 0}, NULL, w_iconv16, 0},
+ {"UTF-32", 0, 0, 0, {0, 0, 0}, NULL, w_iconv32, 0},
#endif
{0}
};
{
fprintf(HELP_OUTPUT,
"Usage: nkf -[flags] [--] [in file] .. [out file for -O flag]\n"
- " j,s,e,w Output code is ISO-2022-JP, Shift JIS, EUC-JP, UTF-8N\n"
#ifdef UTF8_OUTPUT_ENABLE
- " After 'w' you can add more options. -w[ 8 [0], 16 [[BL] [0]] ]\n"
+ " j/s/e/w Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
+ " UTF options is -w[8[0],{16,32}[{B,L}[0]]]\n"
+#else
#endif
- " J,S,E,W Input assumption is JIS 7 bit , Shift JIS, EUC-JP, UTF-8\n"
#ifdef UTF8_INPUT_ENABLE
- " After 'W' you can add more options. -W[ 8, 16 [BL] ] \n"
+ " J/S/E/W Specify input encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
+ " UTF option is -W[8,[16,32][B,L]]\n"
+#else
+ " J/S/E Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
#endif
);
fprintf(HELP_OUTPUT,
- " m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:non-strict,0:no decode]\n"
+ " m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:nonstrict,0:no decode]\n"
" M[BQ] MIME encode [B:base64 Q:quoted]\n"
" f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
);
" L[uwm] Line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
);
fprintf(HELP_OUTPUT,
- "Long name options\n"
- " --ic=<input codeset> --oc=<output codeset>\n"
- " Specify the input or output codeset\n"
- " --hiragana --katakana --katakana-hiragana\n"
- " To Hiragana/Katakana Conversion\n"
+ " --ic=<encoding> Specify the input encoding\n"
+ " --oc=<encoding> Specify the output encoding\n"
+ " --hiragana --katakana Hiragana/Katakana Conversion\n"
+ " --katakana-hiragana Converts each other\n"
);
fprintf(HELP_OUTPUT,
#ifdef INPUT_OPTION
- " --cap-input, --url-input Convert hex after ':' or '%%'\n"
+ " --{cap, url}-input Convert hex after ':' or '%%'\n"
#endif
#ifdef NUMCHAR_OPTION
- " --numchar-input Convert Unicode Character Reference\n"
+ " --numchar-input Convert Unicode Character Reference\n"
#endif
#ifdef UTF8_INPUT_ENABLE
" --fb-{skip, html, xml, perl, java, subchar}\n"
- " Specify how nkf handles unassigned characters\n"
+ " Specify unassigned character's replacement\n"
#endif
);
fprintf(HELP_OUTPUT,
#ifdef OVERWRITE
- " --in-place[=SUF] Overwrite original listed files by filtered result\n"
- " --overwrite[=SUF] in-place and preserve timestamp of original files\n"
+ " --in-place[=SUF] Overwrite original files\n"
+ " --overwrite[=SUF] Preserve timestamp of original files\n"
#endif
- " -g --guess Guess the input code\n"
- " -v --version print the version\n"
- " --help/-V print this help / configuration\n"
+ " -g --guess Guess the input code\n"
+ " -v --version Print the version\n"
+ " --help/-V Print this help / configuration\n"
);
version();
}
output_endian = ENDIAN_LITTLE;
output_bom_f = TRUE;
break;
+ case UTF_32:
case UTF_32BE_BOM:
output_bom_f = TRUE;
break;
*p3 = 0x80 | ( val & 0x3f);
*p4 = 0;
} else if (nkf_char_unicode_value_p(val)) {
- *p1 = 0xe0 | (val >> 16);
+ *p1 = 0xf0 | (val >> 18);
*p2 = 0x80 | ((val >> 12) & 0x3f);
*p3 = 0x80 | ((val >> 6) & 0x3f);
*p4 = 0x80 | ( val & 0x3f);
} else if ((c3 = (*i_getc)(f)) == EOF) {
ret = EOF;
break;
- } else {
- code_status(c3);
- if (hold_index < hold_count){
- c4 = hold_buf[hold_index++];
- } else if ((c4 = (*i_getc)(f)) == EOF) {
- c3 = ret = EOF;
- break;
- } else {
- code_status(c4);
- (*iconv)(c1, c2, (c3<<8)|c4);
- }
}
+ code_status(c3);
+ if (hold_index < hold_count){
+ c4 = hold_buf[hold_index++];
+ } else if ((c4 = (*i_getc)(f)) == EOF) {
+ c3 = ret = EOF;
+ break;
+ }
+ code_status(c4);
+ (*iconv)(c1, c2, (c3<<8)|c4);
break;
case -1:
/* 3 bytes EUC or UTF-8 */
if (input_mode == JIS_X_0208 && DEL <= c1 && c1 < 0x92) {
/* CP5022x */
MORE;
+ }else if (input_codename && input_codename[0] == 'I' &&
+ 0xA1 <= c1 && c1 <= 0xDF) {
+ /* JIS X 0201 Katakana in 8bit JIS */
+ c2 = JIS_X_0201_1976_K;
+ c1 &= 0x7f;
+ SEND;
} else if (c1 > DEL) {
/* 8 bit code */
if (!estab_f && !iso8859_f) {
output_endian = ENDIAN_LITTLE;
} else if (cp[0] == 'B') {
cp++;
- } else {
- output_encoding = nkf_enc_from_index(enc_idx);
- continue;
}
if (cp[0] == '0'){
cp++;
while ('0'<= *cp && *cp <='9') {
alpha_f |= 1 << (*cp++ - '0');
}
+ if (alpha_f & ((1 << 2) | (1 << 3))) alpha_f |= 1;
if (!alpha_f) alpha_f = 1;
continue;
case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */