/** Network Kanji Filter. (PDS Version)
************************************************************************
** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
-** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B
+** \e$BO"Mm@h!'\e(B \e$B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j\e(B
** \e$B!J\e(BE-Mail Address: ichikawa@flab.fujitsu.co.jp\e$B!K\e(B
** Copyright (C) 1996,1998
** Copyright (C) 2002
** \e$B%P%$%J%j$NG[I[$N:]$K$O\e(Bversion message\e$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#\e(B
** \e$B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#\e(B
**
-** Everyone is permitted to do anything on this program
+** Everyone is permitted to do anything on this program
** including copying, modifying, improving,
** as long as you don't try to pretend that you wrote it.
-** i.e., the above copyright notice has to appear in all copies.
+** i.e., the above copyright notice has to appear in all copies.
** Binary distribution requires original version messages.
** You don't have to ask before copying, redistribution or publishing.
** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
***********************************************************************/
/***********************************************************************
-** UTF-8 \e$B%5%]!<%H$K$D$$$F\e(B
-** \e$B=>Mh$N\e(B nkf \e$B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9\e(B
-** nkf -e \e$B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G\e(B UTF-8 \e$B$HH=Dj$5$l$l$P!"\e(B
-** \e$B$=$N$^$^\e(B euc-jp \e$B$KJQ49$5$l$^$9\e(B
-**
-** \e$B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#\e(B
-** (\e$BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O\e(B)
-**
-** \e$B2?$+LdBj$r8+$D$1$?$i!"\e(B
-** E-Mail: furukawa@tcp-ip.or.jp
-** \e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#\e(B
+ * \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
+ * http://sourceforge.jp/projects/nkf/
***********************************************************************/
-/* $Id: nkf.c,v 1.135 2007/10/01 12:37:58 naruse Exp $ */
+/* $Id: nkf.c,v 1.138 2007/10/01 19:55:25 naruse Exp $ */
#define NKF_VERSION "2.0.8"
#define NKF_RELEASE_DATE "2007-10-01"
-#include "config.h"
-#include "utf8tbl.h"
-
#define COPY_RIGHT \
"Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
"Copyright (C) 2002-2007 Kono, Furukawa, Naruse, mastodon"
-
-/*
-**
-**
-**
-** USAGE: nkf [flags] [file]
-**
-** Flags:
-** b Output is buffered (DEFAULT)
-** u Output is unbuffered
-**
-** t no operation
-**
-** j Output code is JIS 7 bit (DEFAULT SELECT)
-** s Output code is MS Kanji (DEFAULT SELECT)
-** e Output code is AT&T JIS (DEFAULT SELECT)
-** w Output code is AT&T JIS (DEFAULT SELECT)
-** l Output code is JIS 7bit and ISO8859-1 Latin-1
-**
-** m MIME conversion for ISO-2022-JP
-** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net>
-** i_ Output sequence to designate JIS-kanji (DEFAULT_J)
-** o_ Output sequence to designate single-byte roman characters (DEFAULT_R)
-** M MIME output conversion
-**
-** r {de/en}crypt ROT13/47
-**
-** v display Version
-**
-** T Text mode output (for MS-DOS)
-**
-** x Do not convert X0201 kana into X0208
-** Z Convert X0208 alphabet to ASCII
-**
-** f60 fold option
-**
-** m MIME decode
-** B try to fix broken JIS, missing Escape
-** B[1-9] broken level
-**
-** O Output to 'nkf.out' file or last file name
-** d Delete \r in line feed
-** c Add \r in line feed
-** -- other long option
-** -- ignore following option (don't use with -O )
-**
-**/
-
+#include "config.h"
+#include "utf8tbl.h"
#if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
#define MSDOS
#if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
#define FALSE 0
#define TRUE 1
-/* state of output_mode and input_mode
+/* state of output_mode and input_mode
c2 0 means ASCII
X0201
#define nkf_isgraph(c) ('!'<=c && c<='~')
#define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
('A'<=c&&c<='F') ? (c-'A'+10) : \
- ('a'<=c&&c<='f') ? (c-'a'+10) : 0 )
+ ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
#define bin2hex(c) ("0123456789ABCDEF"[c&15])
#define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
int _file_stat;
};
-static char *input_codename = "";
+static char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
#ifndef PERL_XS
static const char *CopyRight = COPY_RIGHT;
/* MIME preprocessor fifo */
#define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
-#define MIME_BUF_MASK (MIME_BUF_SIZE-1)
+#define MIME_BUF_MASK (MIME_BUF_SIZE-1)
#define Fifo(n) mime_buf[(n)&MIME_BUF_MASK]
static unsigned char mime_buf[MIME_BUF_SIZE];
static unsigned int mime_top = 0;
static int broken_f = FALSE; /* convert ESC-less broken JIS */
static int iso8859_f = FALSE; /* ISO8859 through */
static int mimeout_f = FALSE; /* base64 mode */
-#if defined(MSDOS) || defined(__OS2__)
+#if defined(MSDOS) || defined(__OS2__)
static int x0201_f = TRUE; /* Assume JISX0201 kana */
#else
static int x0201_f = NO_X0201; /* Assume NO JISX0201 */
#endif
static void set_input_codename(char *codename);
static int is_inputcode_mixed = FALSE;
-static int is_inputcode_set = FALSE;
#ifdef EXEC_IO
static int exec_f = 0;
static char *get_backup_filename(const char *suffix, const char *filename);
#endif
-static int nlmode_f = 0; /* CR, LF, CRLF */
-static nkf_char prev_cr = 0;
+static int nlmode_f = 0; /* CR, LF, CRLF */
+static int input_nextline = 0; /* 0: unestablished, EOF: MIXED */
+static nkf_char prev_cr = 0; /* CR or 0 */
#ifdef EASYWIN /*Easy Win */
static int end_check;
#endif /*Easy Win */
if (binmode_f == TRUE)
#if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
- if (freopen("","wb",stdout) == NULL)
+ if (freopen("","wb",stdout) == NULL)
return (-1);
#else
setbinmode(stdout);
int is_argument_error = FALSE;
while (argc--) {
is_inputcode_mixed = FALSE;
- is_inputcode_set = FALSE;
- input_codename = "";
+ input_codename = NULL;
#ifdef CHECK_OPTION
iconv_for_check = 0;
#endif
}
}else
#endif
- if(argc == 1 ) {
+ if(argc == 1) {
outfname = *argv++;
argc--;
} else {
}
if (binmode_f == TRUE) {
#if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
- if (freopen("","wb",stdout) == NULL)
+ if (freopen("","wb",stdout) == NULL)
return (-1);
#else
setbinmode(stdout);
}
if (binmode_f == TRUE)
#if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
- if (freopen("","rb",fin) == NULL)
+ if (freopen("","rb",fin) == NULL)
return (-1);
#else
setbinmode(fin);
-#endif
+#endif
setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
if (nop_f)
noconvert(fin);
return(-1);
}
#ifdef EASYWIN /*Easy Win */
- if (file_out_f == FALSE)
+ if (file_out_f == FALSE)
scanf("%d",&end_check);
- else
+ else
fclose(stdout);
#else /* for Other OS */
- if (file_out_f == TRUE)
+ if (file_out_f == TRUE)
fclose(stdout);
#endif /*Easy Win */
return (0);
encode_fallback = encode_fallback_html;
continue;
}
- if (strcmp(long_option[i].name, "fb-xml" ) == 0){
+ if (strcmp(long_option[i].name, "fb-xml") == 0){
encode_fallback = encode_fallback_xml;
continue;
}
input_f = LATIN1_INPUT;
continue;
case 'i': /* Kanji IN ESC-$-@/B */
- if (*cp=='@'||*cp=='B')
+ if (*cp=='@'||*cp=='B')
kanji_intro = *cp++;
continue;
case 'o': /* ASCII IN ESC-(-J/B */
- if (*cp=='J'||*cp=='B'||*cp=='H')
+ if (*cp=='J'||*cp=='B'||*cp=='H')
ascii_intro = *cp++;
continue;
case 'h':
- /*
+ /*
bit:1 katakana->hiragana
bit:2 hiragana->katakana
*/
- if ('9'>= *cp && *cp>='0')
+ if ('9'>= *cp && *cp>='0')
hira_f |= (*cp++ -'0');
- else
+ else
hira_f |= 1;
continue;
case 'r':
rot_f = TRUE;
continue;
-#if defined(MSDOS) || defined(__OS2__)
+#if defined(MSDOS) || defined(__OS2__)
case 'T':
binmode_f = FALSE;
continue;
ESC-(-I in JIS, EUC, MS Kanji
SI/SO in JIS, EUC, MS Kanji
SSO in EUC, JIS, not in MS Kanji
- MS Kanji (0xa0-0xdf)
+ MS Kanji (0xa0-0xdf)
output X0201
ESC-(-I in JIS (0x20-0x5f)
SSO in EUC (0xa0-0xdf)
- 0xa0-0xd in MS Kanji (0xa0-0xdf)
+ 0xa0-0xd in MS Kanji (0xa0-0xdf)
*/
continue;
case 'X': /* Assume X0201 kana */
fold_len *= 10;
fold_len += *cp++ - '0';
}
- if (!(0<fold_len && fold_len<BUFSIZ))
+ if (!(0<fold_len && fold_len<BUFSIZ))
fold_len = DEFAULT_FOLD;
if (*cp=='-') {
fold_margin = 0;
bit:1 allow any x on ESC-(-x or ESC-$-x
bit:2 reset to ascii on NL
*/
- if ('9'>= *cp && *cp>='0')
+ if ('9'>= *cp && *cp>='0')
broken_f |= 1<<(*cp++ -'0');
- else
+ else
broken_f |= TRUE;
continue;
#ifndef PERL_XS
guess_f = TRUE;
#endif
continue;
- case SP:
+ case SP:
/* module muliple options in a string are allowed for Perl moudle */
while(*cp && *cp++!='-');
continue;
struct input_code *p = find_inputcode_byfunc(iconv);
if (p){
set_input_codename(p->name);
- debug(input_codename);
+ debug(p->name);
}
iconv_for_check = iconv;
}
void module_connection(void)
{
- oconv = output_conv;
+ oconv = output_conv;
o_putc = std_putc;
/* replace continucation module, from output side */
/* base64_count = 0; */
}
- if (nlmode_f) {
+ if (nlmode_f || guess_f) {
o_nlconv = oconv; oconv = nl_conv;
}
if (rot_f) {
}
/*
- Conversion main loop. Code detection only.
+ Conversion main loop. Code detection only.
*/
nkf_char kanji_convert(FILE *f)
if (!estab_f&&!mime_decode_mode) {
/* in case of not established yet */
/* It is still ambiguious */
- if (h_conv(f, c2, c1)==EOF)
+ if (h_conv(f, c2, c1)==EOF)
LAST;
- else
+ else
c2 = 0;
NEXT;
} else {
}
} else if ((c1 > SP) && (c1 != DEL)) {
/* in case of Roman characters */
- if (shift_mode) {
+ if (shift_mode) {
/* output 1 shifted byte */
if (iso8859_f) {
c2 = ISO8859_1;
SEND;
- } else if (SP<=c1 && c1<(0xe0&0x7f) ){
+ } else if (SP <= c1 && c1 < (0xe0&0x7f)){
/* output 1 shifted byte */
if(iso2022jp_f && x0201_f==NO_X0201) {
(*oconv)(GETA1, GETA2);
/* in case of Kanji shifted */
c2 = c1;
NEXT;
- } else if (c1 == '=' && mime_f && !mime_decode_mode ) {
+ } else if (c1 == '=' && mime_f && !mime_decode_mode) {
/* Check MIME code */
if ((c1 = (*i_getc)(f)) == EOF) {
(*oconv)(0, '=');
/* =? is mime conversion start sequence */
if(mime_f == STRICT_MIME) {
/* check in real detail */
- if (mime_begin_strict(f) == EOF)
+ if (mime_begin_strict(f) == EOF)
LAST;
else
NEXT;
- } else if (mime_begin(f) == EOF)
+ } else if (mime_begin(f) == EOF)
LAST;
else
NEXT;
NEXT;
}
} else {
- /* normal ASCII code */
+ /* normal ASCII code */
SEND;
}
} else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
- shift_mode = FALSE;
+ shift_mode = FALSE;
NEXT;
} else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
- shift_mode = TRUE;
+ shift_mode = TRUE;
NEXT;
} else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
if ((c1 = (*i_getc)(f)) == EOF) {
} else if (c1 == '$') {
if ((c1 = (*i_getc)(f)) == EOF) {
/*
- (*oconv)(0, ESC); don't send bogus code
+ (*oconv)(0, ESC); don't send bogus code
(*oconv)(0, '$'); */
LAST;
} else if (c1 == '@'|| c1 == 'B') {
shift_mode = FALSE;
set_input_codename("ISO-2022-JP");
#ifdef CHECK_OPTION
- debug(input_codename);
+ debug("ISO-2022-JP");
#endif
NEXT;
} else if (c1 == '(') {
if ((c1 = (*i_getc)(f)) == EOF) {
- /* don't send bogus code
+ /* don't send bogus code
(*oconv)(0, ESC);
(*oconv)(0, '$');
(*oconv)(0, '(');
}
} else if (c1 == '(') {
if ((c1 = (*i_getc)(f)) == EOF) {
- /* don't send bogus code
+ /* don't send bogus code
(*oconv)(0, ESC);
(*oconv)(0, '('); */
LAST;
SEND;
}
}
- } else if ( c1 == 'N' || c1 == 'n' ){
+ } else if ( c1 == 'N' || c1 == 'n'){
/* SS2 */
c3 = (*i_getc)(f); /* skip SS2 */
if ( (SP<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){
/* J-PHONE emoji */
if ((c1 = (*i_getc)(f)) == EOF) {
/*
- (*oconv)(0, ESC); don't send bogus code
+ (*oconv)(0, ESC); don't send bogus code
(*oconv)(0, '$'); */
LAST;
} else {
SEND;
}
}
- if (!nlmode_f) {
- if (prev_cr && c1 == LF) nlmode_f = CRLF;
- else nlmode_f = c1;
- }
- } else if (c1 == DEL && input_mode == X0208 ) {
+ } else if (c1 == DEL && input_mode == X0208) {
/* CP5022x */
c2 = c1;
NEXT;
- } else
+ } else
SEND;
}
/* send: */
/* epilogue */
(*iconv)(EOF, 0, 0);
- if (!is_inputcode_set)
+ if (!input_codename)
{
if (is_8bit) {
struct input_code *p = input_code_list;
++p;
}
set_input_codename(result->name);
+#ifdef CHECK_OPTION
+ debug(result->name);
+#endif
}
}
return 1;
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
-
+
if (c2 < 0 || 0xff < c2) {
}else if (c2 == 0) { /* 0 : 1 byte*/
c0 = 0;
}
#endif
- if (c2 == 0) {
+ if (c2 == 0) {
output_mode = ASCII;
(*o_putc)(c1);
} else if (c2 == ISO8859_1) {
if (c2 == EOF) {
(*o_putc)(EOF);
return;
- } else if (c2 == 0) {
+ } else if (c2 == 0) {
output_mode = ASCII;
(*o_putc)(c1);
} else if (c2 == X0201) {
(*o_putc)(c1);
} else if (c2==ISO8859_1) {
/* iso8859 introduction, or 8th bit on */
- /* Can we convert in 7bit form using ESC-'-'-A ?
+ /* Can we convert in 7bit form using ESC-'-'-A ?
Is this popular? */
output_mode = ISO8859_1;
(*o_putc)(c1|0x80);
return broken_buf[--broken_counter];
}
c= (*i_bgetc)(f);
- if (c=='$' && broken_last != ESC
+ if (c=='$' && broken_last != ESC
&& (input_mode==ASCII || input_mode==X0201)) {
c1= (*i_bgetc)(f);
broken_last = 0;
if (c1=='@'|| c1=='B') {
- broken_buf[0]=c1; broken_buf[1]=c;
+ broken_buf[0]=c1; broken_buf[1]=c;
broken_counter=2;
return ESC;
} else {
(*i_bungetc)(c1,f);
return c;
}
- } else if (c=='(' && broken_last != ESC
+ } else if (c=='(' && broken_last != ESC
&& (input_mode==X0208 || input_mode==X0201)) { /* ) */
c1= (*i_bgetc)(f);
broken_last = 0;
void nl_conv(nkf_char c2, nkf_char c1)
{
- if (prev_cr) {
+ if (guess_f && input_nextline != EOF) {
+ if (c2 == 0 && c1 == LF) {
+ if (!input_nextline) input_nextline = prev_cr ? CRLF : LF;
+ else if (input_nextline != (prev_cr ? CRLF : LF)) input_nextline = EOF;
+ } else if (c2 == 0 && c1 == CR && input_nextline == LF) input_nextline = EOF;
+ else if (!prev_cr);
+ else if (!input_nextline) input_nextline = CR;
+ else if (input_nextline != CR) input_nextline = EOF;
+ }
+ if (prev_cr || c2 == 0 && c1 == LF) {
prev_cr = 0;
- if (! (c2==0&&c1==LF) ) {
- nl_conv(0,LF);
- }
- }
- if (c2) {
- (*o_nlconv)(c2,c1);
- } else if (c1==CR) {
- prev_cr = c1;
- } else if (c1==LF) {
- if (nlmode_f==CRLF) {
- (*o_nlconv)(0,CR);
- } else if (nlmode_f==CR) {
- (*o_nlconv)(0,CR);
- return;
- }
- (*o_nlconv)(0,LF);
- } else if (c1!='\032' || nlmode_f!=LF){
- (*o_nlconv)(c2,c1);
+ if (nlmode_f != LF) (*o_nlconv)(0, CR);
+ if (nlmode_f != CR) (*o_nlconv)(0, LF);
}
+ if (c2 == 0 && c1 == CR) prev_cr = CR;
+ else if (c2 != 0 || c1 != LF) (*o_nlconv)(c2, c1);
}
-/*
+/*
Return value of fold_conv()
LF add newline and output char
#define char_size(c2,c1) (c2?2:1)
void fold_conv(nkf_char c2, nkf_char c1)
-{
+{
nkf_char prev0;
nkf_char fold_state;
|| ((c1==CR||(c1==LF&&f_prev!=CR))
&& fold_preserve_f)) {
/* new line */
- if (fold_preserve_f) {
+ if (fold_preserve_f) {
f_prev = c1;
f_line = 0;
fold_state = CR;
fold_state = 0;
} else {
f_prev = c1;
- if (++f_line<=fold_len)
+ if (++f_line<=fold_len)
fold_state = SP;
else {
f_line = 0;
if (f_prev == SP) {
fold_state = 0; /* remove duplicate spaces */
} else {
- f_prev = SP;
- if (++f_line<=fold_len)
+ f_prev = SP;
+ if (++f_line<=fold_len)
fold_state = SP; /* output ASCII space only */
else {
f_prev = SP; f_line = 0;
} else {
prev0 = f_prev; /* we still need this one... , but almost done */
f_prev = c1;
- if (c2 || c2==X0201)
+ if (c2 || c2==X0201)
f_prev |= 0x80; /* this is Japanese */
f_line += char_size(c2,c1);
if (f_line<=fold_len) { /* normal case */
fold_state = LF;/* add one new f_line before this character */
}
} else if (c2==0) {
- /* kinsoku point in ASCII */
+ /* kinsoku point in ASCII */
if ( c1==')'|| /* { [ ( */
c1==']'||
c1=='}'||
c1=='?'||
c1=='/'||
c1==':'||
- c1==';' ) {
+ c1==';') {
fold_state = 1;
/* just after special */
} else if (!is_alnum(prev0)) {
else if (c1=='+') fold_state = 1; /* \e$B!+\e(B */
else if (c1==',') fold_state = 1; /* \e$B!,\e(B */
/* default no fold in kinsoku */
- else {
+ else {
fold_state = LF;
f_line = char_size(c2,c1);
/* add one new f_line before this character */
}
} else {
f_line = char_size(c2,c1);
- fold_state = LF;
+ fold_state = LF;
/* add one new f_line before this character */
}
}
}
/* terminator process */
switch(fold_state) {
- case LF:
+ case LF:
(*o_fconv)(0,LF);
(*o_fconv)(c2,c1);
break;
- case 0:
+ case 0:
return;
- case CR:
+ case CR:
(*o_fconv)(0,LF);
break;
- case TAB:
- case SP:
+ case TAB:
+ case SP:
(*o_fconv)(0,SP);
break;
default:
return;
}
- if (alpha_f&1 && c2 == 0x23 ) {
+ if (alpha_f&1 && c2 == 0x23) {
/* JISX0208 Alphabet */
c2 = 0;
- } else if (c2 == 0x21) {
+ } else if (c2 == 0x21) {
/* JISX0208 Kigou */
if (0x21==c1) {
if (alpha_f&2) {
(*o_zconv)(0, SP);
(*o_zconv)(0, SP);
return;
- }
+ }
} else if (alpha_f&1 && 0x20<c1 && c1<0x7f && fv[c1-0x20]) {
c2 = 0;
c1 = fv[c1-0x20];
- }
+ }
}
if (alpha_f&8 && c2 == 0) {
#define rot13(c) ( \
- ( c < 'A' ) ? c: \
+ ( c < 'A') ? c: \
(c <= 'M') ? (c + 13): \
(c <= 'Z') ? (c - 13): \
(c < 'a') ? (c): \
)
#define rot47(c) ( \
- ( c < '!' ) ? c: \
- ( c <= 'O' ) ? (c + 47) : \
- ( c <= '~' ) ? (c - 47) : \
+ ( c < '!') ? c: \
+ ( c <= 'O') ? (c + 47) : \
+ ( c <= '~') ? (c - 47) : \
c \
)
r[0]='='; r[1]='?';
for(i=2;p[i]>SP;i++) { /* start at =? */
- if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) {
+ if (((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i]) {
/* pattern fails, try next one */
q = p;
while (mime_pattern[++j]) {
if (!unbuf_f) {
/* do MIME integrity check */
return mime_integrity(f,mime_pattern[j]);
- }
+ }
}
switch_mime_getc();
mimebuf_f = TRUE;
{
if (mimebuf_f)
(*i_mungetc_buf)(c,f);
- else
+ else
Fifo(--mime_input) = (unsigned char)c;
return c;
}
/* We accept any character type even if it is breaked by new lines */
c1 = (*i_getc)(f); Fifo(mime_last++) = (unsigned char)c1;
if (c1==LF||c1==SP||c1==CR||
- c1=='-'||c1=='_'||is_alnum(c1) ) continue;
+ c1=='-'||c1=='_'||is_alnum(c1)) continue;
if (c1=='=') {
/* Failed. But this could be another MIME preemble */
(*i_ungetc)(c1,f);
void debug(const char *str)
{
if (debug_f){
- fprintf(stderr, "%s\n", str);
+ fprintf(stderr, "%s\n", str ? str : "NULL");
}
}
#endif
void set_input_codename(char *codename)
{
- if (guess_f &&
- is_inputcode_set &&
- strcmp(codename, "") != 0 &&
- strcmp(codename, input_codename) != 0)
- {
+ if (!input_codename) {
+ input_codename = codename;
+ } else if (strcmp(codename, input_codename) != 0) {
is_inputcode_mixed = TRUE;
+ input_codename = "";
}
- input_codename = codename;
- is_inputcode_set = TRUE;
}
#if !defined(PERL_XS) && !defined(WIN32DLL)
{
char *codename = "BINARY";
char *str_nlmode = NULL;
- if (!is_inputcode_mixed) {
- if (strcmp(input_codename, "") == 0) {
- codename = "ASCII";
- } else {
- codename = input_codename;
- }
- if (nlmode_f == CR) str_nlmode = "CR";
- else if (nlmode_f == LF) str_nlmode = "LF";
- else if (nlmode_f == CRLF) str_nlmode = "CRLF";
+ if (filename != NULL) printf("%s: ", filename);
+ if (input_codename && !*input_codename) {
+ printf("BINARY\n");
+ } else {
+ printf("%s%s\n",
+ (input_codename ? input_codename : "ASCII"),
+ input_nextline == CR ? " (CR)" :
+ input_nextline == LF ? " (LF)" :
+ input_nextline == CRLF ? " (CRLF)" :
+ input_nextline == EOF ? " (MIXED NL)" :
+ "");
}
- if (filename != NULL) printf("%s:", filename);
- if (str_nlmode != NULL) printf("%s (%s)\n", codename, str_nlmode);
- else printf("%s\n", codename);
}
#endif /*WIN32DLL*/
-#ifdef INPUT_OPTION
+#ifdef INPUT_OPTION
nkf_char hex_getc(nkf_char ch, FILE *f, nkf_char (*g)(FILE *f), nkf_char (*u)(nkf_char c, FILE *f))
{
int i=0, j, k=1, lower, upper;
nkf_char buf[9];
const nkf_nfchar *array;
-
+
buf[i] = (*g)(f);
while (k > 0 && ((buf[i] & 0xc0) != 0x80)){
lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
#endif /* UNICODE_NORMALIZATION */
-nkf_char
+nkf_char
mime_getc(FILE *f)
{
nkf_char c1, c2, c3, c4, cc;
if (c1!='=' && (c1!='?' || mimebuf_f == FIXED_MIME)) {
return c1;
}
-
+
mime_decode_mode = exit_mode; /* prepare for quit */
if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
/* Base64 encoding */
- /*
- MIME allows line break in the middle of
+ /*
+ MIME allows line break in the middle of
Base64, but we are very pessimistic in decoding
- in unbuf mode because MIME encoded code may broken by
+ in unbuf mode because MIME encoded code may broken by
less or editor's control sequence (such as ESC-[-K in unbuffered
mode. ignore incomplete MIME.
*/
if (c2==EOF)
return (EOF);
if (mime_f != STRICT_MIME) goto mime_c2_retry;
- if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
+ if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
return c2;
}
if ((c1 == '?') && (c2 == '=')) {
if (c3==EOF)
return (EOF);
if (mime_f != STRICT_MIME) goto mime_c3_retry;
- if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
+ if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
return c3;
}
mime_c4_retry:
if (c4==EOF)
return (EOF);
if (mime_f != STRICT_MIME) goto mime_c4_retry;
- if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
+ if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
return c4;
}
if (c3 != '=') {
Fifo(mime_last++) = (unsigned char)cc;
cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
- if (c4 != '=')
+ if (c4 != '=')
Fifo(mime_last++) = (unsigned char)cc;
}
} else {
*/
mime_input = mime_top;
mime_last = mime_top;
-
+
while(*p) Fifo(mime_input++) = *p++;
d = 0;
q = mime_input;
/* checked. skip header, start decode */
Fifo(mime_input++) = (unsigned char)c;
/* mime_last_input = mime_input; */
- mime_input = q;
+ mime_input = q;
switch_mime_getc();
return 1;
}
}
}
mimeout_mode = mime_encode_method[i];
-
+
i = 0;
if (base64_count>45) {
if (mimeout_buf_count>0 && nkf_isblank(mimeout_buf[i])){
base64_count = 1;
if (!mimeout_preserve_space && mimeout_buf_count>0
&& (mimeout_buf[i]==SP || mimeout_buf[i]==TAB
- || mimeout_buf[i]==CR || mimeout_buf[i]==LF )) {
+ || mimeout_buf[i]==CR || mimeout_buf[i]==LF)) {
i++;
}
}
if (!mimeout_preserve_space) {
for (;i<mimeout_buf_count;i++) {
if (mimeout_buf[i]==SP || mimeout_buf[i]==TAB
- || mimeout_buf[i]==CR || mimeout_buf[i]==LF ) {
+ || mimeout_buf[i]==CR || mimeout_buf[i]==LF) {
(*o_mputc)(mimeout_buf[i]);
base64_count ++;
} else {
}
}
mimeout_preserve_space = FALSE;
-
+
while(*p) {
(*o_mputc)(*p++);
base64_count ++;
}
if (mimeout_mode) {
if (mimeout_f!=FIXED_MIME) {
- close_mime();
+ close_mime();
} else if (mimeout_mode != 'Q')
mimeout_mode = 'B';
}
}
return;
}
-
+
/* mimeout_f != FIXED_MIME */
if (c == EOF) { /* c==EOF */
}
if (mimeout_mode=='Q') {
- if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
+ if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1)) {
if (c == CR || c == LF) {
close_mime();
(*o_mputc)(c);
}
}else{
/* mimeout_mode == 'B', 1, 2 */
- if ( c<=DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) {
+ if ( c<=DEL && (output_mode==ASCII ||output_mode == ISO8859_1)) {
if (lastchar == CR || lastchar == LF){
if (nkf_isblank(c)) {
for (i=0;i<mimeout_buf_count;i++) {
#endif
guess_f = FALSE;
is_inputcode_mixed = FALSE;
- is_inputcode_set = FALSE;
#ifdef EXEC_IO
exec_f = 0;
#endif
#ifdef CHECK_OPTION
iconv_for_check = 0;
#endif
- input_codename = "";
+ input_codename = NULL;
#ifdef WIN32DLL
reinitdll();
#endif /*WIN32DLL*/
fprintf(stderr,"\n%s\n",CopyRight);
}
#endif /*PERL_XS*/
-
-/**
- ** \e$B%Q%C%A@):n<T\e(B
- ** void@merope.pleiades.or.jp (Kusakabe Youichi)
- ** NIDE Naoyuki <nide@ics.nara-wu.ac.jp>
- ** ohta@src.ricoh.co.jp (Junn Ohta)
- ** inouet@strl.nhk.or.jp (Tomoyuki Inoue)
- ** kiri@pulser.win.or.jp (Tetsuaki Kiriyama)
- ** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp>
- ** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe)
- ** kono@ie.u-ryukyu.ac.jp (Shinji Kono)
- ** GHG00637@nifty-serve.or.jp (COW)
- **
- **/
-
-/* end */