** E-Mail: furukawa@tcp-ip.or.jp
** \e$B$^$G8fO"Mm$r$*4j$$$7$^$9!#\e(B
***********************************************************************/
-/* $Id: nkf.c,v 1.58 2005/02/17 16:48:48 rei_furukawa Exp $ */
-#define NKF_VERSION "2.0.4"
-#define NKF_RELEASE_DATE "2005-02-02"
+/* $Id: nkf.c,v 1.69 2005/06/27 16:05:59 naruse Exp $ */
+#define NKF_VERSION "2.0.5"
+#define NKF_RELEASE_DATE "2005-06-28"
#include "config.h"
static char *CopyRight =
- "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2004 Kono, Furukawa";
+ "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2005 Kono, Furukawa, Naruse";
/*
#ifndef MSDOS /* UNIX, OS/2 */
#include <unistd.h>
#include <utime.h>
-#else
+#else /* defined(MSDOS) */
+#ifdef __WIN32__
+#ifdef __BORLANDC__ /* BCC32 */
+#include <utime.h>
+#else /* !defined(__BORLANDC__) */
+#include <sys/utime.h>
+#endif /* (__BORLANDC__) */
+#else /* !defined(__WIN32__) */
#if defined(_MSC_VER) || defined(__MINGW32__) /* VC++, MinGW */
#include <sys/utime.h>
#elif defined(__TURBOC__) /* BCC */
#include <utime.h>
#elif defined(LSI_C) /* LSI C */
+#endif /* (__WIN32__) */
#endif
#endif
#endif
#define UTF8 12
#define UTF8_INPUT 13
-#define UTF16LE_INPUT 14
-#define UTF16BE_INPUT 15
+#define UTF16BE_INPUT 14
+#define UTF16LE_INPUT 15
#define WISH_TRUE 15
STATIC int kanji_convert PROTO((FILE *f));
STATIC int h_conv PROTO((FILE *f,int c2,int c1));
STATIC int push_hold_buf PROTO((int c2));
-STATIC void set_iconv PROTO((int f, int (*iconv_func)()));
+STATIC void set_iconv PROTO((int f, int (*iconv_func)(int c2,int c1,int c0)));
STATIC int s_iconv PROTO((int c2,int c1,int c0));
STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1));
STATIC int e_iconv PROTO((int c2,int c1,int c0));
STATIC void usage PROTO(());
STATIC void version PROTO(());
STATIC void options PROTO((unsigned char *c));
-#ifdef PERL_XS
+#if defined(PERL_XS) || defined(WIN32DLL)
STATIC void reinit PROTO(());
#endif
static int input_f = FALSE; /* non fixed input code */
static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */
+static int mime_decode_f = FALSE; /* mime decode is explicitly on */
static int mimebuf_f = FALSE; /* MIME buffered input */
static int broken_f = FALSE; /* convert ESC-less broken JIS */
static int iso8859_f = FALSE; /* ISO8859 through */
#ifdef UTF8_INPUT_ENABLE
STATIC void w_status PROTO((struct input_code *, int));
STATIC void w16_status PROTO((struct input_code *, int));
-static int utf16_mode = UTF16LE_INPUT;
+static int utf16_mode = UTF16BE_INPUT;
#endif
struct input_code input_code_list[] = {
static int end_check;
#endif /*Easy Win */
-#ifndef PERL_XS
+#define STD_GC_BUFSIZE (256)
+int std_gc_buf[STD_GC_BUFSIZE];
+int std_gc_ndx;
+
+#ifdef WIN32DLL
+#include "nkf32dll.c"
+#elif defined(PERL_XS)
+#else /* WIN32DLL */
int
main(argc, argv)
int argc;
} else {
int nfiles = argc;
while (argc--) {
+ is_inputcode_mixed = FALSE;
+ is_inputcode_set = FALSE;
+ input_codename = "";
if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
perror(*--argv);
return(-1);
#ifdef OVERWRITE
if (overwrite) {
struct stat sb;
-#if defined(MSDOS) && !defined(__MINGW32__)
+#if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
time_t tb[2];
#else
struct utimbuf tb;
}
/* \e$B%?%$%`%9%?%s%W$rI|85\e(B */
-#if defined(MSDOS) && !defined(__MINGW32__)
+#if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__)
tb[0] = tb[1] = sb.st_mtime;
if (utime(outfname, tb)) {
fprintf(stderr, "Can't set timestamp %s\n", outfname);
#else /* for Other OS */
if (file_out == TRUE)
fclose(stdout);
-#endif
+#endif /*Easy Win */
return (0);
}
-#endif
+#endif /* WIN32DLL */
static
struct {
if (option_mode==1)
return;
- if (*cp++ != '-')
- return;
+ while(*cp && *cp++!='-');
while (*cp) {
- if (p && !*cp) {
- cp = p;
- p = 0;
- }
+ p = 0;
switch (*cp++) {
case '-': /* literal options */
if (!*cp) { /* ignore the rest of arguments */
for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) {
int j;
p = (unsigned char *)long_option[i].name;
- for (j=0;*p && (*p != '=') && *p == cp[j];p++, j++);
- if (*p == cp[j]){
+ for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
+ if (*p == cp[j] || cp[j] == ' '){
p = &cp[j];
break;
}
if (p == 0) return;
cp = (unsigned char *)long_option[i].alias;
if (!*cp){
+ cp = p;
#ifdef OVERWRITE
if (strcmp(long_option[i].name, "overwrite") == 0){
file_out = TRUE;
continue;
case 'h':
/*
- bit:1 hira -> kata
- bit:2 kata -> hira
+ bit:1 katakana->hiragana
+ bit:2 hiragana->katakana
*/
if ('9'>= *cp && *cp>='0')
hira_f |= (*cp++ -'0');
#ifdef UTF8_INPUT_ENABLE
case 'W': /* UTF-8 input */
if ('1'== cp[0] && '6'==cp[1]) {
- input_f = UTF16LE_INPUT;
+ input_f = UTF16BE_INPUT;
+ utf16_mode = UTF16BE_INPUT;
+ cp += 2;
if (cp[0]=='L') {
cp++;
+ input_f = UTF16LE_INPUT;
+ utf16_mode = UTF16LE_INPUT;
} else if (cp[0] == 'B') {
cp++;
input_f = UTF16BE_INPUT;
+ utf16_mode = UTF16BE_INPUT;
}
} else if (cp[0] == '8') {
cp++;
}
continue;
case 'm': /* MIME support */
+ /* mime_decode_f = TRUE; */ /* this has too large side effects... */
if (*cp=='B'||*cp=='Q') {
mime_decode_mode = *cp++;
mimebuf_f = FIXED_MIME;
} else if (*cp=='S') {
mime_f = STRICT_MIME; cp++;
} else if (*cp=='0') {
+ mime_decode_f = FALSE;
mime_f = FALSE; cp++;
}
continue;
continue;
case ' ':
/* module muliple options in a string are allowed for Perl moudle */
- while(*cp && *cp!='-') cp++;
- if(*cp=='-') cp++;
+ while(*cp && *cp++!='-');
continue;
default:
/* bogus option but ignored */
}
}
-#define STD_GC_BUFSIZE (256)
-int std_gc_buf[STD_GC_BUFSIZE];
-int std_gc_ndx;
-
+#ifndef WIN32DLL
int
std_getc(f)
FILE *f;
}
return getc(f);
}
+#endif /*WIN32DLL*/
int
std_ungetc(c,f)
return c;
}
+#ifndef WIN32DLL
void
std_putc(c)
int c;
if(c!=EOF)
putchar(c);
}
+#endif /*WIN32DLL*/
int
noconvert(f)
#ifdef UTF8_INPUT_ENABLE
} else if (input_f == UTF8_INPUT) {
set_iconv(-TRUE, w_iconv);
+ } else if (input_f == UTF16BE_INPUT) {
+ set_iconv(-TRUE, w_iconv16);
} else if (input_f == UTF16LE_INPUT) {
set_iconv(-TRUE, w_iconv16);
#endif
{
int c1,
c2, c3;
+ int is_8bit = FALSE;
module_connection();
c2 = 0;
/* 8 bit code */
if (!estab_f && !iso8859_f) {
/* not established yet */
+ if (!is_8bit) is_8bit = TRUE;
c2 = c1;
NEXT;
} else { /* estab_f==TRUE */
} else if ((c1 == NL || c1 == CR) && broken_f&4) {
input_mode = ASCII; set_iconv(FALSE, 0);
SEND;
- /*
- } else if (c1 == NL && mime_f && !mime_decode_mode ) {
+ } else if (c1 == NL && mime_decode_f && !mime_decode_mode ) {
if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) {
i_ungetc(SPACE,f);
continue;
}
c1 = NL;
SEND;
- } else if (c1 == CR && mime_f && !mime_decode_mode ) {
+ } else if (c1 == CR && mime_decode_f && !mime_decode_mode ) {
if ((c1=(*i_getc)(f))!=EOF) {
if (c1==SPACE) {
i_ungetc(SPACE,f);
}
c1 = CR;
SEND;
- */
} else
SEND;
}
/* epilogue */
(*iconv)(EOF, 0, 0);
+ if (!is_inputcode_set)
+ {
+ if (is_8bit) {
+ struct input_code *p = input_code_list;
+ struct input_code *result = p;
+ while (p->name){
+ if (p->score < result->score) result = p;
+ ++p;
+ }
+ set_input_codename(result->name);
+ }
+ }
return 1;
}
int ret;
if (c2==0376 && c1==0377){
- utf16_mode = UTF16LE_INPUT;
+ utf16_mode = UTF16BE_INPUT;
return 0;
} else if (c2==0377 && c1==0376){
- utf16_mode = UTF16BE_INPUT;
+ utf16_mode = UTF16LE_INPUT;
return 0;
}
- if (c2 != EOF && utf16_mode == UTF16BE_INPUT) {
+ if (c2 != EOF && utf16_mode == UTF16LE_INPUT) {
int tmp;
tmp=c1; c1=c2; c2=tmp;
}
is_inputcode_set = TRUE;
}
+#ifndef WIN32DLL
void
print_guessed_code (filename)
char *filename;
if (filename != NULL) printf("%s:", filename);
printf("%s\n", codename);
}
+#endif /*WIN32DLL*/
int
hex2bin(x)
i = 0;
if (base64_count>45) {
+ if (mimeout_buf_count>0 && nkf_isblank(mimeout_buf[i])){
+ (*o_mputc)(mimeout_buf[i]);
+ i++;
+ }
(*o_mputc)(NL);
(*o_mputc)(SPACE);
base64_count = 1;
(*o_base64conv)(0,NL);
(*o_base64conv)(0,SPACE);
}
- }else if (mime_lastchar2){
+ }/*else if (mime_lastchar2){
if (c1 <=DEL && !nkf_isspace(c1)){
(*o_base64conv)(0,SPACE);
}
- }
- }else{
+ }*/
+ }/*else{
if (c2 && mime_lastchar2 == 0
&& mime_lastchar1 && !nkf_isspace(mime_lastchar1)){
(*o_base64conv)(0,SPACE);
}
- }
+ }*/
mime_lastchar2 = c2;
mime_lastchar1 = c1;
}
mimeout_buf_count = 0;
i = 0;
for (;i<j;i++) {
- if (nkf_isspace(mimeout_buf[i])){
+ /*if (nkf_isspace(mimeout_buf[i])){
break;
- }
+ }*/
mimeout_addchar(mimeout_buf[i]);
}
eof_mime();
}
for (i=0;i<mimeout_buf_count;i++) {
(*o_mputc)(mimeout_buf[i]);
- base64_count++;
+ if (mimeout_buf[i] == CR || mimeout_buf[i] == NL){
+ base64_count = 0;
+ }else{
+ base64_count++;
+ }
}
mimeout_buf[0] = c;
mimeout_buf_count = 1;
}
-#ifdef PERL_XS
+#if defined(PERL_XS) || defined(WIN32DLL)
void
reinit()
{
input_f = FALSE;
alpha_f = FALSE;
mime_f = STRICT_MIME;
+ mime_decode_f = FALSE;
mimebuf_f = FALSE;
broken_f = FALSE;
iso8859_f = FALSE;
}
}
#ifdef UTF8_INPUT_ENABLE
- utf16_mode = UTF16LE_INPUT;
+ utf16_mode = UTF16BE_INPUT;
#endif
mimeout_buf_count = 0;
mimeout_mode = 0;
#ifdef CHECK_OPTION
iconv_for_check = 0;
#endif
+ input_codename = "";
+#ifdef WIN32DLL
+ reinitdll();
+#endif /*WIN32DLL*/
}
#endif
{
fprintf(stderr,"nkf internal module connection failure.\n");
exit(1);
+ return 0; /* LINT */
}
#ifndef PERL_XS
+#ifdef WIN32DLL
+#define fprintf dllprintf
+#endif
void
usage()
{
fprintf(stderr,"t no conversion\n");
fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n");
fprintf(stderr,"r {de/en}crypt ROT13/47\n");
- fprintf(stderr,"h 1 hirakana->katakana, 2 katakana->hirakana,3 both\n");
+ fprintf(stderr,"h 1 katakana->hiragana, 2 hiragana->katakana, 3 both\n");
fprintf(stderr,"v Show this usage. V: show version\n");
fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n");
fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
fprintf(stderr," --hiragana, --katakana Hiragana/Katakana Conversion\n");
fprintf(stderr," --x0212 Convert JISX0212\n");
fprintf(stderr," --cp932, --no-cp932 CP932 compatibility\n");
+ fprintf(stderr," --prefix= Insert escape before troublesome characters of Shift_JIS\n");
#ifdef INPUT_OPTION
fprintf(stderr," --cap-input, --url-input Convert hex after ':' or '%%'\n");
#endif
,NKF_VERSION,NKF_RELEASE_DATE);
fprintf(stderr,"\n%s\n",CopyRight);
}
-#endif
+#endif /*PERL_XS*/
/**
** \e$B%Q%C%A@):n<T\e(B