* \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
* http://sourceforge.jp/projects/nkf/
***********************************************************************/
-/* $Id: nkf.c,v 1.145 2007/11/02 21:40:15 naruse Exp $ */
+/* $Id: nkf.c,v 1.149 2007/11/18 12:05:18 naruse Exp $ */
#define NKF_VERSION "2.0.8"
-#define NKF_RELEASE_DATE "2007-11-03"
+#define NKF_RELEASE_DATE "2007-11-18"
#define COPY_RIGHT \
"Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
"Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
#include "config.h"
#include "utf8tbl.h"
+
+#ifndef MIME_DECODE_DEFAULT
+#define MIME_DECODE_DEFAULT STRICT_MIME
+#endif
+
#if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
#define MSDOS
#if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
#define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
#define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
- && (c != '.') && (c != 0x22)))
+ && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
#define CP932_TABLE_BEGIN 0xFA
#define CP932_TABLE_END 0xFC
static int hira_f = FALSE; /* hira/kata henkan */
static int input_f = FALSE; /* non fixed input code */
static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
-static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
+static int mime_f = MIME_DECODE_DEFAULT; /* convert MIME B base64 or Q */
static int mime_decode_f = FALSE; /* mime decode is explicitly on */
static int mimebuf_f = FALSE; /* MIME buffered input */
static int broken_f = FALSE; /* convert ESC-less broken JIS */
{0}
};
-static int mimeout_mode = 0;
+static int mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
static int base64_count = 0;
/* X0208 -> ASCII converter */
#endif
static int nlmode_f = 0; /* CR, LF, CRLF */
-static int input_nextline = 0; /* 0: unestablished, EOF: MIXED */
+static int input_newline = 0; /* 0: unestablished, EOF: MIXED */
static nkf_char prev_cr = 0; /* CR or 0 */
#ifdef EASYWIN /*Easy Win */
static int end_check;
#ifdef X0213_ENABLE
x0213_f = x0213_f_back;
#endif
- }
+ }
#ifdef EXEC_IO
if (exec_f){
int fds[2], pid;
int is_argument_error = FALSE;
while (argc--) {
input_codename = NULL;
- input_nextline = 0;
+ input_newline = 0;
#ifdef CHECK_OPTION
iconv_for_check = 0;
#endif
output_mode = ASCII;
(*o_putc)(c1);
} else if (c2 == ISO8859_1) {
- output_mode = ISO8859_1;
+ output_mode = UTF8;
(*o_putc)(c1 | 0x080);
} else {
output_mode = UTF8;
void nl_conv(nkf_char c2, nkf_char c1)
{
- if (guess_f && input_nextline != EOF) {
+ if (guess_f && input_newline != EOF) {
if (c2 == 0 && c1 == LF) {
- if (!input_nextline) input_nextline = prev_cr ? CRLF : LF;
- else if (input_nextline != (prev_cr ? CRLF : LF)) input_nextline = EOF;
- } else if (c2 == 0 && c1 == CR && input_nextline == LF) input_nextline = EOF;
+ if (!input_newline) input_newline = prev_cr ? CRLF : LF;
+ else if (input_newline != (prev_cr ? CRLF : LF)) input_newline = EOF;
+ } else if (c2 == 0 && c1 == CR && input_newline == LF) input_newline = EOF;
else if (!prev_cr);
- else if (!input_nextline) input_nextline = CR;
- else if (input_nextline != CR) input_nextline = EOF;
+ else if (!input_newline) input_newline = CR;
+ else if (input_newline != CR) input_newline = EOF;
}
if (prev_cr || c2 == 0 && c1 == LF) {
prev_cr = 0;
}
printf("%s%s\n",
input_codename,
- input_nextline == CR ? " (CR)" :
- input_nextline == LF ? " (LF)" :
- input_nextline == CRLF ? " (CRLF)" :
- input_nextline == EOF ? " (MIXED NL)" :
+ input_newline == CR ? " (CR)" :
+ input_newline == LF ? " (LF)" :
+ input_newline == CRLF ? " (CRLF)" :
+ input_newline == EOF ? " (MIXED NL)" :
"");
}
}
if (c > '@') {
if (c < '[') {
i = c - 'A'; /* A..Z 0-25 */
+ } else if (c == '_') {
+ i = '?' /* 63 */ ; /* _ 63 */
} else {
i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
}
} else if (c > '/') {
i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
- } else if (c == '+') {
- i = '>' /* 62 */ ; /* + 62 */
+ } else if (c == '+' || c == '-') {
+ i = '>' /* 62 */ ; /* + and - 62 */
} else {
i = '?' /* 63 */ ; /* / 63 */
}
#define MIMEOUT_BUF_LENGTH (60)
char mimeout_buf[MIMEOUT_BUF_LENGTH+1];
int mimeout_buf_count = 0;
-int mimeout_preserve_space = 0;
void open_mime(nkf_char mode)
{
}
}
mimeout_mode = mime_encode_method[i];
-
i = 0;
if (base64_count>45) {
if (mimeout_buf_count>0 && nkf_isblank(mimeout_buf[i])){
(*o_mputc)(LF);
(*o_mputc)(SP);
base64_count = 1;
- if (!mimeout_preserve_space && mimeout_buf_count>0
+ if (mimeout_buf_count>0
&& (mimeout_buf[i]==SP || mimeout_buf[i]==TAB
|| mimeout_buf[i]==CR || mimeout_buf[i]==LF)) {
i++;
}
}
- if (!mimeout_preserve_space) {
- for (;i<mimeout_buf_count;i++) {
- if (mimeout_buf[i]==SP || mimeout_buf[i]==TAB
- || mimeout_buf[i]==CR || mimeout_buf[i]==LF) {
- (*o_mputc)(mimeout_buf[i]);
- base64_count ++;
- } else {
- break;
- }
+ for (;i<mimeout_buf_count;i++) {
+ if (mimeout_buf[i]==SP || mimeout_buf[i]==TAB
+ || mimeout_buf[i]==CR || mimeout_buf[i]==LF) {
+ (*o_mputc)(mimeout_buf[i]);
+ base64_count ++;
+ } else {
+ break;
}
}
- mimeout_preserve_space = FALSE;
-
while(*p) {
(*o_mputc)(*p++);
base64_count ++;
base64_count += 2;
break;
}
- if (mimeout_mode) {
+ if (mimeout_mode > 0) {
if (mimeout_f!=FIXED_MIME) {
close_mime();
} else if (mimeout_mode != 'Q')
void mime_prechar(nkf_char c2, nkf_char c1)
{
- if (mimeout_mode){
+ if (mimeout_mode > 0){
if (c2 == EOF){
if (base64_count + mimeout_buf_count/3*4> 73){
(*o_base64conv)(EOF,0);
(*o_base64conv)(0,LF);
(*o_base64conv)(0,SP);
- }
- } else if (c2){
- if (base64_count + mimeout_buf_count/3*4> 66){
- (*o_base64conv)(EOF,0);
- (*o_base64conv)(0,LF);
- (*o_base64conv)(0,SP);
+ base64_count = 1;
}
} else {
- if (base64_count + mimeout_buf_count/3*4> 66){
+ if (base64_count + mimeout_buf_count/3*4> 66) {
(*o_base64conv)(EOF,0);
(*o_base64conv)(0,LF);
(*o_base64conv)(0,SP);
+ base64_count = 1;
+ mimeout_mode = -1;
}
- }/*else if (mime_lastchar2){
- if (c1 <=DEL && !nkf_isspace(c1)){
- (*o_base64conv)(0,SP);
- }
- }*/
- }/*else{
- if (c2 && mime_lastchar2 == 0
- && mime_lastchar1 && !nkf_isspace(mime_lastchar1)){
- (*o_base64conv)(0,SP);
}
- }*/
- /*mime_lastchar2 = c2;
- mime_lastchar1 = c1;*/
+ } else if (c2) {
+ if (c2 != EOF && base64_count + mimeout_buf_count/3*4> 60) {
+ mimeout_mode = (output_mode==ASCII ||output_mode == ISO8859_1) ? 'Q' : 'B';
+ open_mime(output_mode);
+ (*o_base64conv)(EOF,0);
+ (*o_base64conv)(0,LF);
+ (*o_base64conv)(0,SP);
+ base64_count = 1;
+ mimeout_mode = -1;
+ }
+ }
}
void mime_putc(nkf_char c)
/* mimeout_f != FIXED_MIME */
if (c == EOF) { /* c==EOF */
+ if (mimeout_mode == -1 && mimeout_buf_count > 1) open_mime(output_mode);
j = mimeout_buf_count;
mimeout_buf_count = 0;
i = 0;
- if (mimeout_mode) {
+ if (mimeout_mode > 0) {
if (!nkf_isblank(mimeout_buf[j-1])) {
for (;i<j;i++) {
if (nkf_isspace(mimeout_buf[i]) && base64_count < 71){
return;
}
+ if (mimeout_buf_count > 0){
+ lastchar = mimeout_buf[mimeout_buf_count - 1];
+ }else{
+ lastchar = -1;
+ }
+
if (mimeout_mode=='Q') {
if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1)) {
if (c == CR || c == LF) {
if (base64_count > 70) {
close_mime();
(*o_mputc)(LF);
- base64_count = 0;
+ (*o_mputc)(SP);
+ base64_count = 1;
open_mime(output_mode);
}
if (!nkf_noescape_mime(c)) {
return;
}
- if (mimeout_buf_count > 0){
- lastchar = mimeout_buf[mimeout_buf_count - 1];
- }else{
- lastchar = -1;
- }
-
- if (!mimeout_mode) {
+ if (mimeout_mode <= 0) {
if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1)) {
if (nkf_isspace(c)) {
+ int flag = 0;
+ if (mimeout_mode == -1) {
+ flag = 1;
+ }
if (c==CR || c==LF) {
- base64_count=0;
+ if (flag) {
+ open_mime(output_mode);
+ output_mode = 0;
+ } else {
+ base64_count = 0;
+ }
}
for (i=0;i<mimeout_buf_count;i++) {
(*o_mputc)(mimeout_buf[i]);
base64_count++;
}
}
- mimeout_buf[0] = (char)c;
- mimeout_buf_count = 1;
+ if (flag) {
+ eof_mime();
+ base64_count = 0;
+ mimeout_mode = 0;
+ }
+ mimeout_buf[0] = (char)c;
+ mimeout_buf_count = 1;
}else{
if (base64_count > 1
&& base64_count + mimeout_buf_count > 76
hira_f = FALSE;
input_f = FALSE;
alpha_f = FALSE;
- mime_f = STRICT_MIME;
+ mime_f = MIME_DECODE_DEFAULT;
mime_decode_f = FALSE;
mimebuf_f = FALSE;
broken_f = FALSE;
mime_decode_mode = FALSE;
file_out_f = FALSE;
nlmode_f = 0;
- input_nextline = 0;
+ input_newline = 0;
prev_cr = 0;
option_mode = 0;
broken_counter = 0;