** E-Mail: furukawa@tcp-ip.or.jp
** \e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#\e(B
***********************************************************************/
-/* $Id: nkf.c,v 1.50 2004/12/29 21:04:23 naruse Exp $ */
+/* $Id: nkf.c,v 1.60 2005/02/19 05:54:23 naruse Exp $ */
#define NKF_VERSION "2.0.4"
-#define NKF_RELEASE_DATE "2004-12-01"
+#define NKF_RELEASE_DATE "2005-02-19"
#include "config.h"
static char *CopyRight =
STATIC int mime_integrity PROTO((FILE *f,unsigned char *p));
STATIC int base64decode PROTO((int c));
+STATIC void mime_prechar PROTO((int c2, int c1));
STATIC void mime_putc PROTO((int c));
STATIC void open_mime PROTO((int c));
STATIC void close_mime PROTO(());
static int input_f = FALSE; /* non fixed input code */
static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
+static int mime_decode_f = FALSE; /* mime decode is explicitly on */
static int mimebuf_f = FALSE; /* MIME buffered input */
static int broken_f = FALSE; /* convert ESC-less broken JIS */
static int iso8859_f = FALSE; /* ISO8859 through */
int j;
p = (unsigned char *)long_option[i].name;
for (j=0;*p && (*p != '=') && *p == cp[j];p++, j++);
- if (!*p || *p == cp[j]){
+ if (*p == cp[j]){
p = &cp[j];
break;
}
}
continue;
case 'm': /* MIME support */
+ mime_decode_f = TRUE;
if (*cp=='B'||*cp=='Q') {
mime_decode_mode = *cp++;
mimebuf_f = FIXED_MIME;
return 0;
}
+#ifdef CHECK_OPTION
+static int (*iconv_for_check)() = 0;
+#endif
+
#ifdef ANSI_C_PROTOTYPE
void set_iconv(int f, int (*iconv_func)(int c2,int c1,int c0))
#else
int (*iconv_func)();
#endif
{
-#ifdef CHECK_OPTION
- static int (*iconv_for_check)() = 0;
-#endif
#ifdef INPUT_CODE_FIX
if (f || !input_f)
#endif
/* second byte */
if (c2 > DEL) {
/* in case of 8th bit is on */
- if (!estab_f) {
+ if (!estab_f&&!mime_decode_mode) {
/* in case of not established yet */
/* It is still ambiguious */
if (h_conv(f, c2, c1)==EOF)
} else if ((c1 == NL || c1 == CR) && broken_f&4) {
input_mode = ASCII; set_iconv(FALSE, 0);
SEND;
- /*
- } else if (c1 == NL && mime_f && !mime_decode_mode ) {
+ } else if (c1 == NL && mime_decode_f && !mime_decode_mode ) {
if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
i_ungetc(SPACE,f);
continue;
}
c1 = NL;
SEND;
- } else if (c1 == CR && mime_f && !mime_decode_mode ) {
+ } else if (c1 == CR && mime_decode_f && !mime_decode_mode ) {
if ((c1=(*i_getc)(f))!=EOF) {
if (c1==SPACE) {
i_ungetc(SPACE,f);
}
c1 = CR;
SEND;
- */
} else
SEND;
}
int c2,
c1;
{
- if (base64_count>50 && !mimeout_mode && c2==0 && c1==SPACE) {
- (*o_putc)(EOF);
- (*o_putc)(NL);
- } else if (base64_count>66 && mimeout_mode) {
- (*o_base64conv)(EOF,0);
- (*o_base64conv)(NL,0);
- (*o_base64conv)(SPACE,0);
- }
+ mime_prechar(c2, c1);
(*o_base64conv)(c2,c1);
}
#define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
#define nkf_isdigit(c) ('0'<=c && c<='9')
#define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
+#define nkf_isblank(c) (c == SPACE || c == TAB)
+#define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == NL)
+#define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
+#define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
void
switch_mime_getc()
}
mime_decode_mode = p[i-2];
+ set_iconv(FALSE, mime_priority_func[j]);
clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
if (mime_decode_mode=='B') {
{
switch(mimeout_mode) {
case 'Q':
- if(c>=DEL) {
+ if(c==SPACE){
+ (*o_mputc)('_');
+ base64_count++;
+ } else if (c==CR||c==NL) {
+ (*o_mputc)(c);
+ base64_count = 0;
+ } else if(c<SPACE||c=='='||c=='?'||c=='_'||DEL<=c) {
(*o_mputc)('=');
(*o_mputc)(itoh4(((c>>4)&0xf)));
(*o_mputc)(itoh4((c&0xf)));
mimeout_mode='B';
base64_count += 2;
break;
+ default:
+ (*o_mputc)(c);
+ base64_count++;
+ break;
}
}
+int mime_lastchar2, mime_lastchar1;
+
+void mime_prechar(c2, c1)
+ int c2, c1;
+{
+ if (mimeout_mode){
+ if (c2){
+ if (base64_count + mimeout_buf_count/3*4> 66){
+ (*o_base64conv)(EOF,0);
+ (*o_base64conv)(0,NL);
+ (*o_base64conv)(0,SPACE);
+ }
+ }/*else if (mime_lastchar2){
+ if (c1 <=DEL && !nkf_isspace(c1)){
+ (*o_base64conv)(0,SPACE);
+ }
+ }*/
+ }/*else{
+ if (c2 && mime_lastchar2 == 0
+ && mime_lastchar1 && !nkf_isspace(mime_lastchar1)){
+ (*o_base64conv)(0,SPACE);
+ }
+ }*/
+ mime_lastchar2 = c2;
+ mime_lastchar1 = c1;
+}
+
void
mime_putc(c)
int c;
{
int i = 0;
int j = 0;
-
- if (mimeout_f==FIXED_MIME && base64_count>50) {
- eof_mime();
- (*o_mputc)(NL);
- base64_count=0;
- } else if (c==CR||c==NL) {
- base64_count=0;
- }
- if (c!=EOF && mimeout_f!=FIXED_MIME) {
- if ( c<=DEL &&(output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
- if (mimeout_mode=='Q') {
- if (c<=SPACE) {
- close_mime();
- (*o_mputc)(SPACE);
- base64_count++;
- }
- (*o_mputc)(c);
- base64_count++;
- return;
- } else if (mimeout_mode) {
- if (base64_count>63) {
- eof_mime();
- (*o_mputc)(NL);
- (*o_mputc)(SPACE);
- base64_count=1;
- mimeout_preserve_space = TRUE;
- }
- if (c==SPACE || c==TAB || c==CR || c==NL) {
- for (i=0;i<mimeout_buf_count;i++) {
- if (SPACE<mimeout_buf[i] && mimeout_buf[i]<DEL) {
- eof_mime();
- for (i=0;i<mimeout_buf_count;i++) {
- (*o_mputc)(mimeout_buf[i]);
- base64_count++;
- }
- mimeout_buf_count = 0;
- }
- }
- mimeout_buf[mimeout_buf_count++] = c;
- if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
- eof_mime();
- base64_count = 0;
- for (i=0;i<mimeout_buf_count;i++) {
- (*o_mputc)(mimeout_buf[i]);
- base64_count++;
- }
- }
- return;
- }
- if (mimeout_buf_count>0 && SPACE<c) {
- mimeout_buf[mimeout_buf_count++] = c;
- if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
- } else {
- return;
- }
- }
- } else if (!mimeout_mode) {
- if (c==SPACE || c==TAB || c==CR || c==NL) {
- if ((c==CR || c==NL)
- &&(mimeout_buf[mimeout_buf_count-1]==SPACE
- || mimeout_buf[mimeout_buf_count-1]==TAB)) {
- mimeout_buf_count--;
- }
- for (i=0;i<mimeout_buf_count;i++) {
- (*o_mputc)(mimeout_buf[i]);
- base64_count++;
- }
- mimeout_buf_count = 0;
- }
- mimeout_buf[mimeout_buf_count++] = c;
- if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
- open_mime(output_mode);
- }
- return;
- }
- } else if (!mimeout_mode) {
- if (mimeout_buf_count>0 && mimeout_buf[mimeout_buf_count-1]==SPACE) {
- for (i=0;i<mimeout_buf_count-1;i++) {
- (*o_mputc)(mimeout_buf[i]);
- base64_count++;
- }
- mimeout_buf[0] = SPACE;
- mimeout_buf_count = 1;
- }
- open_mime(output_mode);
+ int lastchar;
+
+ if (mimeout_f == FIXED_MIME){
+ if (mimeout_mode == 'Q'){
+ if (base64_count > 71){
+ if (c!=CR && c!=NL) {
+ (*o_mputc)('=');
+ (*o_mputc)(NL);
+ }
+ base64_count = 0;
+ }
+ }else{
+ if (base64_count > 71){
+ eof_mime();
+ (*o_mputc)(NL);
+ base64_count = 0;
+ }
+ if (c == EOF) { /* c==EOF */
+ eof_mime();
+ }
}
- } else if (c == EOF) { /* c==EOF */
+ if (c != EOF) { /* c==EOF */
+ mimeout_addchar(c);
+ }
+ return;
+ }
+
+ /* mimeout_f != FIXED_MIME */
+
+ if (c == EOF) { /* c==EOF */
j = mimeout_buf_count;
+ mimeout_buf_count = 0;
i = 0;
for (;i<j;i++) {
- if (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB
- || mimeout_buf[i]==CR || mimeout_buf[i]==NL)
+ /*if (nkf_isspace(mimeout_buf[i])){
break;
- (*mime_putc)(mimeout_buf[i]);
+ }*/
+ mimeout_addchar(mimeout_buf[i]);
}
eof_mime();
for (;i<j;i++) {
}
return;
}
-
+
+ if (mimeout_mode=='Q') {
+ if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
+ if (c <= SPACE) {
+ close_mime();
+ (*o_mputc)(SPACE);
+ base64_count++;
+ }
+ (*o_mputc)(c);
+ base64_count++;
+ }
+ return;
+ }
+
+ if (mimeout_buf_count > 0){
+ lastchar = mimeout_buf[mimeout_buf_count - 1];
+ }else{
+ lastchar = -1;
+ }
+
+ if (!mimeout_mode) {
+ if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1)) {
+ if (nkf_isspace(c)) {
+ if (c==CR || c==NL) {
+ base64_count=0;
+ }
+ for (i=0;i<mimeout_buf_count;i++) {
+ (*o_mputc)(mimeout_buf[i]);
+ base64_count++;
+ }
+ mimeout_buf[0] = c;
+ mimeout_buf_count = 1;
+ }else{
+ if (base64_count > 1
+ && base64_count + mimeout_buf_count > 76){
+ (*o_mputc)(NL);
+ base64_count = 0;
+ if (!nkf_isspace(mimeout_buf[0])){
+ (*o_mputc)(SPACE);
+ base64_count++;
+ }
+ }
+ mimeout_buf[mimeout_buf_count++] = c;
+ if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
+ open_mime(output_mode);
+ }
+ }
+ return;
+ }else{
+ if (lastchar==CR || lastchar == NL){
+ for (i=0;i<mimeout_buf_count;i++) {
+ (*o_mputc)(mimeout_buf[i]);
+ }
+ base64_count = 0;
+ mimeout_buf_count = 0;
+ }
+ if (lastchar==SPACE) {
+ for (i=0;i<mimeout_buf_count-1;i++) {
+ (*o_mputc)(mimeout_buf[i]);
+ base64_count++;
+ }
+ mimeout_buf[0] = SPACE;
+ mimeout_buf_count = 1;
+ }
+ open_mime(output_mode);
+ }
+ }else{
+ /* mimeout_mode == 'B', 1, 2 */
+ if ( c<=DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
+ if (lastchar == CR || lastchar == NL){
+ if (nkf_isblank(c)) {
+ for (i=0;i<mimeout_buf_count;i++) {
+ mimeout_addchar(mimeout_buf[i]);
+ }
+ mimeout_buf_count = 0;
+ } else if (SPACE<c && c<DEL) {
+ eof_mime();
+ for (i=0;i<mimeout_buf_count;i++) {
+ (*o_mputc)(mimeout_buf[i]);
+ }
+ base64_count = 0;
+ mimeout_buf_count = 0;
+ }
+ }
+ if (c==SPACE || c==TAB || c==CR || c==NL) {
+ for (i=0;i<mimeout_buf_count;i++) {
+ if (SPACE<mimeout_buf[i] && mimeout_buf[i]<DEL) {
+ eof_mime();
+ for (i=0;i<mimeout_buf_count;i++) {
+ (*o_mputc)(mimeout_buf[i]);
+ base64_count++;
+ }
+ mimeout_buf_count = 0;
+ }
+ }
+ mimeout_buf[mimeout_buf_count++] = c;
+ if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
+ eof_mime();
+ for (i=0;i<mimeout_buf_count;i++) {
+ (*o_mputc)(mimeout_buf[i]);
+ base64_count++;
+ }
+ mimeout_buf_count = 0;
+ }
+ return;
+ }
+ if (mimeout_buf_count>0 && SPACE<c && c!='=') {
+ mimeout_buf[mimeout_buf_count++] = c;
+ if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) {
+ j = mimeout_buf_count;
+ mimeout_buf_count = 0;
+ for (i=0;i<j;i++) {
+ mimeout_addchar(mimeout_buf[i]);
+ }
+ }
+ return;
+ }
+ }
+ }
if (mimeout_buf_count>0) {
j = mimeout_buf_count;
mimeout_buf_count = 0;
for (i=0;i<j;i++) {
+ if (mimeout_buf[i]==CR || mimeout_buf[i]==NL)
+ break;
mimeout_addchar(mimeout_buf[i]);
}
+ if (i<j) {
+ eof_mime();
+ base64_count=0;
+ for (;i<j;i++) {
+ (*o_mputc)(mimeout_buf[i]);
+ }
+ open_mime(output_mode);
+ }
}
mimeout_addchar(c);
}
input_f = FALSE;
alpha_f = FALSE;
mime_f = STRICT_MIME;
+ mime_decode_f = FALSE;
mimebuf_f = FALSE;
broken_f = FALSE;
iso8859_f = FALSE;
broken_counter = 0;
broken_last = 0;
z_prev2=0,z_prev1=0;
-
+#ifdef CHECK_OPTION
+ iconv_for_check = 0;
+#endif
}
#endif
fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n");
fprintf(stderr," --jis,--euc,--sjis,--utf8,--utf16,--mime,--base64 convert for the code\n");
fprintf(stderr," --hiragana, --katakana Hiragana/Katakana Conversion\n");
- fprintf(stderr," --cp932, --no-cp932 CP932 compatible\n");
+ fprintf(stderr," --x0212 Convert JISX0212\n");
+ fprintf(stderr," --cp932, --no-cp932 CP932 compatibility\n");
#ifdef INPUT_OPTION
fprintf(stderr," --cap-input, --url-input Convert hex after ':' or '%%'\n");
#endif