* \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
* http://sourceforge.jp/projects/nkf/
***********************************************************************/
-/* $Id: nkf.c,v 1.142 2007/10/05 10:57:50 naruse Exp $ */
+/* $Id: nkf.c,v 1.147 2007/11/03 08:02:49 naruse Exp $ */
#define NKF_VERSION "2.0.8"
-#define NKF_RELEASE_DATE "2007-10-05"
+#define NKF_RELEASE_DATE "2007-11-03"
#define COPY_RIGHT \
"Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
"Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
('a'<=c&&c<='f') ? (c-'a'+10) : 0)
#define bin2hex(c) ("0123456789ABCDEF"[c&15])
#define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
+#define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
+ ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
+ && (c != '.') && (c != 0x22)))
#define CP932_TABLE_BEGIN 0xFA
#define CP932_TABLE_END 0xFC
static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
#endif
-static int guess_f = FALSE;
+static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
#if !defined PERL_XS
static void print_guessed_code(char *filename);
#endif
#ifdef X0212_ENABLE
int x0213_f_back = x0213_f;
#endif
+ int guess_f_back = guess_f;
reinit();
- guess_f = TRUE;
+ guess_f = guess_f_back;
mime_f = FALSE;
#ifdef CHECK_OPTION
debug_f = debug_f_back;
{"hiragana","h1"},
{"katakana","h2"},
{"katakana-hiragana","h3"},
+ {"guess=", ""},
{"guess", "g"},
{"cp932", ""},
{"no-cp932", ""},
}
continue;
}
+ if (strcmp(long_option[i].name, "guess=") == 0){
+ if (p[0] == '1') {
+ guess_f = 2;
+ } else {
+ guess_f = 1;
+ }
+ continue;
+ }
#ifdef OVERWRITE
if (strcmp(long_option[i].name, "overwrite") == 0){
file_out_f = TRUE;
continue;
#ifndef PERL_XS
case 'g':
- guess_f = TRUE;
+ if (*cp == '1') {
+ guess_f = 2;
+ cp++;
+ } else if (*cp == '0') {
+ guess_f = 1;
+ cp++;
+ } else {
+ guess_f = 1;
+ }
continue;
#endif
case SP:
#define SCORE_KANA (SCORE_L2 << 1) /* \e$B$$$o$f$kH>3Q%+%J\e(B */
#define SCORE_DEPEND (SCORE_KANA << 1) /* \e$B5!<o0MB8J8;z\e(B */
#define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932 \e$B$K$h$kFI$_49$(\e(B (IBM extended characters) */
-#define SCORE_NO_EXIST (SCORE_CP932 << 1) /* \e$BB8:_$7$J$$J8;z\e(B */
+#define SCORE_X0212 (SCORE_CP932 << 1) /* JIS X 0212 */
+#define SCORE_NO_EXIST (SCORE_X0212 << 1) /* \e$BB8:_$7$J$$J8;z\e(B */
#define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME \e$B$K$h$k;XDj\e(B */
#define SCORE_ERROR (SCORE_iMIME << 1) /* \e$B%(%i!<\e(B */
set_code_score(ptr, SCORE_ERROR);
}else if (c2 == SSO){
set_code_score(ptr, SCORE_KANA);
+ }else if (c2 == 0x8f){
+ set_code_score(ptr, SCORE_X0212);
#ifdef UTF8_OUTPUT_ENABLE
}else if (!e2w_conv(c2, c1)){
set_code_score(ptr, SCORE_NO_EXIST);
status_push_ch(ptr, c);
code_score(ptr);
status_clear(ptr);
- }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){
+ }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
ptr->stat = 1;
status_push_ch(ptr, c);
+ }else if (0xed <= c && c <= 0xee){
+ ptr->stat = 3;
+ status_push_ch(ptr, c);
#ifdef SHIFTJIS_CP932
}else if (is_ibmext_in_sjis(c)){
ptr->stat = 2;
if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
status_push_ch(ptr, c);
if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
- code_score(ptr);
+ set_code_score(ptr, SCORE_CP932);
status_clear(ptr);
break;
}
#endif /* SHIFTJIS_CP932 */
status_disable(ptr);
break;
+ case 3:
+ if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
+ status_push_ch(ptr, c);
+ s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
+ set_code_score(ptr, SCORE_CP932);
+ status_clear(ptr);
+ }else{
+ status_disable(ptr);
+ }
+ break;
}
}
printf("BINARY\n");
} else {
struct input_code *p = find_inputcode_byfunc(iconv);
- printf("%s%s%s\n",
- (input_codename ? input_codename : "ASCII"),
- ((p->score & (SCORE_DEPEND|SCORE_CP932|SCORE_NO_EXIST)) ? "+" : ""),
- input_nextline == CR ? " (CR)" :
- input_nextline == LF ? " (LF)" :
- input_nextline == CRLF ? " (CRLF)" :
- input_nextline == EOF ? " (MIXED NL)" :
- "");
+ if (guess_f == 1) {
+ printf("%s\n", input_codename ? input_codename : "ASCII");
+ } else {
+ if (!input_codename) {
+ input_codename = "ASCII";
+ } else if (strcmp(input_codename, "Shift_JIS") == 0) {
+ if (p->score & (SCORE_DEPEND|SCORE_CP932))
+ input_codename = "CP932";
+ } else if (strcmp(input_codename, "EUC-JP") == 0) {
+ if (p->score & (SCORE_X0212))
+ input_codename = "EUCJP-MS";
+ else if (p->score & (SCORE_DEPEND|SCORE_CP932))
+ input_codename = "CP51932";
+ } else if (strcmp(input_codename, "ISO-2022-JP") == 0) {
+ if (p->score & (SCORE_KANA))
+ input_codename = "CP50221";
+ else if (p->score & (SCORE_DEPEND|SCORE_CP932))
+ input_codename = "CP50220";
+ }
+ printf("%s%s\n",
+ input_codename,
+ input_nextline == CR ? " (CR)" :
+ input_nextline == LF ? " (LF)" :
+ input_nextline == CRLF ? " (CRLF)" :
+ input_nextline == EOF ? " (MIXED NL)" :
+ "");
+ }
}
}
#endif /*WIN32DLL*/
if (c > '@') {
if (c < '[') {
i = c - 'A'; /* A..Z 0-25 */
+ } else if (c == '_') {
+ i = '?' /* 63 */ ; /* _ 63 */
} else {
i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
}
} else if (c > '/') {
i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
- } else if (c == '+') {
- i = '>' /* 62 */ ; /* + 62 */
+ } else if (c == '+' || c == '-') {
+ i = '>' /* 62 */ ; /* + and - 62 */
} else {
i = '?' /* 63 */ ; /* / 63 */
}
char mimeout_buf[MIMEOUT_BUF_LENGTH+1];
int mimeout_buf_count = 0;
int mimeout_preserve_space = 0;
-#define itoh4(c) (c>=10?c+'A'-10:c+'0')
void open_mime(nkf_char mode)
{
base64_count = 0;
} else if(!nkf_isalnum(c)) {
(*o_mputc)('=');
- (*o_mputc)(itoh4(((c>>4)&0xf)));
- (*o_mputc)(itoh4((c&0xf)));
+ (*o_mputc)(bin2hex(((c>>4)&0xf)));
+ (*o_mputc)(bin2hex((c&0xf)));
base64_count += 3;
} else {
(*o_mputc)(c);
(*o_base64conv)(EOF,0);
(*o_base64conv)(0,LF);
(*o_base64conv)(0,SP);
+ base64_count = 1;
}
- } else if (c2){
- if (base64_count + mimeout_buf_count/3*4> 66){
+ } else {
+ if (base64_count + mimeout_buf_count/3*4> 66) {
(*o_base64conv)(EOF,0);
(*o_base64conv)(0,LF);
(*o_base64conv)(0,SP);
+ base64_count = 1;
+ open_mime(output_mode);
}
- }/*else if (mime_lastchar2){
- if (c1 <=DEL && !nkf_isspace(c1)){
- (*o_base64conv)(0,SP);
- }
- }*/
- }/*else{
- if (c2 && mime_lastchar2 == 0
- && mime_lastchar1 && !nkf_isspace(mime_lastchar1)){
- (*o_base64conv)(0,SP);
}
- }*/
- /*mime_lastchar2 = c2;
- mime_lastchar1 = c1;*/
+ } else if (c2) {
+ if (base64_count + mimeout_buf_count/3*4> 60) {
+ mimeout_mode = (output_mode==ASCII ||output_mode == ISO8859_1) ? 'Q' : 'B';
+ open_mime(output_mode);
+ (*o_base64conv)(EOF,0);
+ (*o_base64conv)(0,LF);
+ (*o_base64conv)(0,SP);
+ base64_count = 1;
+ }
+ }
}
void mime_putc(nkf_char c)
(*o_mputc)(SP);
base64_count++;
}
- }
- (*o_mputc)(c);
- base64_count++;
+ } else {
+ if (base64_count > 70) {
+ close_mime();
+ (*o_mputc)(LF);
+ (*o_mputc)(SP);
+ base64_count = 1;
+ open_mime(output_mode);
+ }
+ if (!nkf_noescape_mime(c)) {
+ mimeout_addchar(c);
+ return;
+ }
+ }
+ (*o_mputc)(c);
+ base64_count++;
}
return;
}
base64_count = 0;
mimeout_buf_count = 0;
}
+ mimeout_buf[mimeout_buf_count++] = (char)c;
+ return;
}
if (c==SP || c==TAB || c==CR || c==LF) {
for (i=0;i<mimeout_buf_count;i++) {
mimeout_buf_count = 0;
}
return;
- }
+ }
if (mimeout_buf_count>0 && SP<c && c!='=') {
mimeout_buf[mimeout_buf_count++] = (char)c;
if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
noout_f = FALSE;
debug_f = FALSE;
#endif
- guess_f = FALSE;
+ guess_f = 0;
#ifdef EXEC_IO
exec_f = 0;
#endif