From: NARUSE, Yui Date: Wed, 23 Jan 2008 18:10:25 +0000 (+0000) Subject: * refine file structure. X-Git-Tag: v2_0_9~53 X-Git-Url: http://git.sourceforge.jp/view?p=nkf%2Fnkf.git;a=commitdiff_plain;h=98df4ad0574bd25eba31f618aa4c8861953f872b;ds=sidebyside * refine file structure. --- diff --git a/Makefile b/Makefile index 0f4c1ca..4f6b2c3 100644 --- a/Makefile +++ b/Makefile @@ -7,10 +7,13 @@ PERL = perl RM = rm -rf VERSION = 2.0.8 -nkf : nkf.c config.h utf8tbl.o - $(CC) $(CFLAGS) -o nkf nkf.c utf8tbl.o +nkf : nkf.o utf8tbl.o + $(CC) $(CFLAGS) -o nkf nkf.o utf8tbl.o -utf8tbl.o : utf8tbl.c config.h +nkf.o : nkf.c nkf.h utf8tbl.h config.h + $(CC) $(CFLAGS) -c nkf.c + +utf8tbl.o : utf8tbl.c utf8tbl.h config.h $(CC) $(CFLAGS) -c utf8tbl.c clean: diff --git a/config.h b/config.h index 56f3237..473be76 100644 --- a/config.h +++ b/config.h @@ -1,86 +1,51 @@ #ifndef _CONFIG_H_ #define _CONFIG_H_ -/* UTF8 入出力 */ +/* UTF8 input and output */ #define UTF8_INPUT_ENABLE #define UTF8_OUTPUT_ENABLE -/* Shift_JIS 範囲外の文字を、CP932 で同値な文字に読み換える */ +/* invert characters invalid in Shift_JIS to CP932 */ #define SHIFTJIS_CP932 -/* オプションで入力を指定した時に、文字コードを固定する */ +/* fix input encoding when given by option */ #define INPUT_CODE_FIX -/* --overwrite オプション */ +/* --overwrite option */ /* by Satoru Takabayashi */ #define OVERWRITE -/* --cap-input, --url-input オプション */ +/* --cap-input, --url-input option */ #define INPUT_OPTION -/* --numchar-input オプション */ +/* --numchar-input option */ #define NUMCHAR_OPTION -/* --debug, --no-output オプション */ +/* --debug, --no-output option */ #define CHECK_OPTION /* JIS X0212 */ #define X0212_ENABLE -/* --exec-in, --exec-out オプション - * pipe, fork, execvp あたりが無いと動きません。 - * MS-DOS, MinGW などでは undef にしてください - * child process 終了時の処理がいいかげんなので、 - * デフォルトで無効にしています。 +/* --exec-in, --exec-out option + * require pipe, fork, execvp and so on. + * please undef this on MS-DOS, MinGW + * this is still buggy arround child process */ /* #define EXEC_IO */ -/* SunOS の cc を使うときは undef にしてください */ -#define ANSI_C_PROTOTYPE - -/* int が 32bit 未満の環境で NUMCHAR_OPTION を使うには、 - * コメントを外してください。 - */ -/* #define INT_IS_SHORT */ - - -#if defined(INT_IS_SHORT) -typedef long nkf_char; -typedef unsigned char nkf_nfchar; -#else -typedef int nkf_char; -typedef int nkf_nfchar; -#endif - /* Unicode Normalization */ #define UNICODE_NORMALIZATION -#ifndef WIN32DLL -/******************************/ -/* デフォルトの出力コード選択 */ -/* Select DEFAULT_CODE */ -/* #define DEFAULT_CODE_JIS */ -/* #define DEFAULT_CODE_SJIS */ -/* #define DEFAULT_CODE_EUC */ -/* #define DEFAULT_CODE_UTF8 */ -/******************************/ -#endif - -#if defined(NUMCHAR_OPTION) && !defined(UTF8_INPUT_ENABLE) -#define UTF8_INPUT_ENABLE -#endif +/* + * Select Default Output Encoding + * + * If not defined, locale encoding is used. + */ -#ifdef UNICODE_NORMALIZATION -#ifndef UTF8_INPUT_ENABLE -#define UTF8_INPUT_ENABLE -#endif -#define NORMALIZATION_TABLE_LENGTH 942 -#define NORMALIZATION_TABLE_NFC_LENGTH 3 -#define NORMALIZATION_TABLE_NFD_LENGTH 9 -struct normalization_pair{ - const nkf_nfchar nfc[NORMALIZATION_TABLE_NFC_LENGTH]; - const nkf_nfchar nfd[NORMALIZATION_TABLE_NFD_LENGTH]; -}; -#endif +/* #define DEFAULT_CODE_JIS */ +/* #define DEFAULT_CODE_SJIS */ +/* #define DEFAULT_CODE_EUC */ +/* #define DEFAULT_CODE_UTF8 */ #endif /* _CONFIG_H_ */ diff --git a/nkf.c b/nkf.c index 0144645..e265e78 100644 --- a/nkf.c +++ b/nkf.c @@ -30,158 +30,17 @@ * 現在、nkf は SorceForge にてメンテナンスが続けられています。 * http://sourceforge.jp/projects/nkf/ ***********************************************************************/ -/* $Id: nkf.c,v 1.165 2008/01/22 00:30:05 naruse Exp $ */ +#define NKF_IDENT "$Id: nkf.c,v 1.166 2008/01/23 09:10:25 naruse Exp $" #define NKF_VERSION "2.0.8" -#define NKF_RELEASE_DATE "2008-01-21" +#define NKF_RELEASE_DATE "2008-01-23" #define COPY_RIGHT \ "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \ "Copyright (C) 2002-2008 Kono, Furukawa, Naruse, mastodon" #include "config.h" +#include "nkf.h" #include "utf8tbl.h" -#ifndef MIME_DECODE_DEFAULT -#define MIME_DECODE_DEFAULT STRICT_MIME -#endif -#ifndef X0201_DEFAULT -#define X0201_DEFAULT TRUE -#endif - -#if DEFAULT_NEWLINE == 0x0D0A -#define PUT_NEWLINE(func) do {\ - func(0x0D);\ - func(0x0A);\ -} while (0) -#define OCONV_NEWLINE(func) do {\ - func(0, 0x0D);\ - func(0, 0x0A);\ -} while (0) -#elif DEFAULT_NEWLINE == 0x0D -#define PUT_NEWLINE(func) func(0x0D) -#define OCONV_NEWLINE(func) func(0, 0x0D) -#else -#define DEFAULT_NEWLINE 0x0A -#define PUT_NEWLINE(func) func(0x0A) -#define OCONV_NEWLINE(func) func(0, 0x0A) -#endif -#ifdef HELP_OUTPUT_STDERR -#define HELP_OUTPUT stderr -#else -#define HELP_OUTPUT stdout -#endif - -#if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS) -#define MSDOS -#if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__) -#define __WIN32__ -#endif -#endif - -#ifdef PERL_XS -#undef OVERWRITE -#endif - -#ifndef PERL_XS -#include -#endif - -#include -#include - -#if defined(MSDOS) || defined(__OS2__) -#include -#include -#if defined(_MSC_VER) || defined(__WATCOMC__) -#define mktemp _mktemp -#endif -#endif - -#ifdef MSDOS -#ifdef LSI_C -#define setbinmode(fp) fsetbin(fp) -#elif defined(__DJGPP__) -#include -#define setbinmode(fp) djgpp_setbinmode(fp) -#else /* Microsoft C, Turbo C */ -#define setbinmode(fp) setmode(fileno(fp), O_BINARY) -#endif -#else /* UNIX */ -#define setbinmode(fp) -#endif - -#if defined(__DJGPP__) -void djgpp_setbinmode(FILE *fp) -{ - /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */ - int fd, m; - fd = fileno(fp); - m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY; - __file_handle_set(fd, m); -} -#endif - -#ifdef _IOFBF /* SysV and MSDOS, Windows */ -#define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size) -#else /* BSD */ -#define setvbuffer(fp, buf, size) setbuffer(fp, buf, size) -#endif - -/*Borland C++ 4.5 EasyWin*/ -#if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */ -#define EASYWIN -#ifndef __WIN16__ -#define __WIN16__ -#endif -#include -#endif - -#ifdef OVERWRITE -/* added by satoru@isoternet.org */ -#if defined(__EMX__) -#include -#endif -#include -#if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */ -#include -#if defined(__WATCOMC__) -#include -#else -#include -#endif -#else /* defined(MSDOS) */ -#ifdef __WIN32__ -#ifdef __BORLANDC__ /* BCC32 */ -#include -#else /* !defined(__BORLANDC__) */ -#include -#endif /* (__BORLANDC__) */ -#else /* !defined(__WIN32__) */ -#if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__) /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */ -#include -#elif defined(__TURBOC__) /* BCC */ -#include -#elif defined(LSI_C) /* LSI C */ -#endif /* (__WIN32__) */ -#endif -#endif -#endif - - -#ifndef __WIN32__ -#define HAVE_LANGINFO_H -#define HAVE_LOCALE_H -#endif - -#ifdef HAVE_LANGINFO_H -#include -#endif -#ifdef HAVE_LOCALE_H -#include -#endif - -#define FALSE 0 -#define TRUE 1 - /* state of output_mode and input_mode c2 0 means ASCII @@ -424,10 +283,6 @@ struct { ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \ && (c != '(') && (c != ')') && (c != '.') && (c != 0x22))) -#define CP932_TABLE_BEGIN 0xFA -#define CP932_TABLE_END 0xFC -#define CP932INV_TABLE_BEGIN 0xED -#define CP932INV_TABLE_END 0xEE #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END) #define HOLD_SIZE 1024 @@ -440,23 +295,12 @@ struct { #define DEFAULT_J 'B' #define DEFAULT_R 'B' -#define SJ0162 0x00e1 /* 01 - 62 ku offset */ -#define SJ6394 0x0161 /* 63 - 94 ku offset */ #define RANGE_NUM_MAX 18 #define GETA1 0x22 #define GETA2 0x2e -#if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE) -#define sizeof_euc_to_utf8_1byte 94 -#define sizeof_euc_to_utf8_2bytes 94 -#define sizeof_utf8_to_euc_C2 64 -#define sizeof_utf8_to_euc_E5B8 64 -#define sizeof_utf8_to_euc_2bytes 112 -#define sizeof_utf8_to_euc_3bytes 16 -#endif - /* MIME preprocessor */ #ifdef EASYWIN /*Easy Win */ @@ -569,7 +413,6 @@ static void eof_mime(void); static void mimeout_addchar(nkf_char c); #ifndef PERL_XS static void usage(void); -static void version(void); static void show_configuration(void); #endif static void options(unsigned char *c); @@ -634,11 +477,6 @@ static nkf_char url_getc(FILE *f); static nkf_char url_ungetc(nkf_char c,FILE *f); #endif -#if defined(INT_IS_SHORT) -#define NKF_INT32_C(n) (n##L) -#else -#define NKF_INT32_C(n) (n) -#endif #define PREFIX_EUCG3 NKF_INT32_C(0x8F00) #define CLASS_MASK NKF_INT32_C(0xFF000000) #define CLASS_UNICODE NKF_INT32_C(0x01000000) @@ -3437,6 +3275,8 @@ nkf_char s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1) if (0x9E < c1) c2++; } }else{ +#define SJ0162 0x00e1 /* 01 - 62 ku offset */ +#define SJ6394 0x0161 /* 63 - 94 ku offset */ c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394); if (0x9E < c1) c2++; } @@ -5450,7 +5290,7 @@ nkf_char nfc_getc(FILE *f) nkf_char (*u)(nkf_char c ,FILE *f) = i_nfc_ungetc; int i=0, j, k=1, lower, upper; nkf_char buf[9]; - const nkf_nfchar *array; + const unsigned char *array; buf[i] = (*g)(f); while (k > 0 && ((buf[i] & 0xc0) != 0x80)){ @@ -6350,86 +6190,100 @@ nkf_char no_connection2(nkf_char c2, nkf_char c1, nkf_char c0) #ifdef WIN32DLL #define fprintf dllprintf #endif + +void version(void) +{ + fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n"); +} + void usage(void) { - fprintf(HELP_OUTPUT,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n"); - fprintf(HELP_OUTPUT,"Flags:\n"); - fprintf(HELP_OUTPUT,"b,u Output is buffered (DEFAULT),Output is unbuffered\n"); - fprintf(HELP_OUTPUT,"j,s,e,w Output code is ISO-2022-JP, Shift JIS, EUC-JP, UTF-8N\n"); + fprintf(HELP_OUTPUT, + "USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n" + "Flags:\n" + "b,u Output is buffered (DEFAULT),Output is unbuffered\n" + "j,s,e,w Output code is ISO-2022-JP, Shift JIS, EUC-JP, UTF-8N\n" #ifdef UTF8_OUTPUT_ENABLE - fprintf(HELP_OUTPUT," After 'w' you can add more options. -w[ 8 [0], 16 [[BL] [0]] ]\n"); + " After 'w' you can add more options. -w[ 8 [0], 16 [[BL] [0]] ]\n" #endif - fprintf(HELP_OUTPUT,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, EUC-JP, UTF-8\n"); + "J,S,E,W Input assumption is JIS 7 bit , Shift JIS, EUC-JP, UTF-8\n" #ifdef UTF8_INPUT_ENABLE - fprintf(HELP_OUTPUT," After 'W' you can add more options. -W[ 8, 16 [BL] ] \n"); -#endif - fprintf(HELP_OUTPUT,"t no conversion\n"); - fprintf(HELP_OUTPUT,"i[@B] Specify the Esc Seq for JIS X 0208-1978/83 (DEFAULT B)\n"); - fprintf(HELP_OUTPUT,"o[BJH] Specify the Esc Seq for ASCII/Roman (DEFAULT B)\n"); - fprintf(HELP_OUTPUT,"r {de/en}crypt ROT13/47\n"); - fprintf(HELP_OUTPUT,"h 1 katakana->hiragana, 2 hiragana->katakana, 3 both\n"); - fprintf(HELP_OUTPUT,"m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:non-strict,0:no decode]\n"); - fprintf(HELP_OUTPUT,"M[BQ] MIME encode [B:base64 Q:quoted]\n"); - fprintf(HELP_OUTPUT,"l ISO8859-1 (Latin-1) support\n"); - fprintf(HELP_OUTPUT,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"); - fprintf(HELP_OUTPUT,"Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n"); - fprintf(HELP_OUTPUT," 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n"); - fprintf(HELP_OUTPUT," 4: JISX0208 Katakana to JISX0201 Katakana\n"); - fprintf(HELP_OUTPUT,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n"); - fprintf(HELP_OUTPUT,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n"); + " After 'W' you can add more options. -W[ 8, 16 [BL] ] \n" +#endif + "t no conversion\n" + "i[@B] Specify the Esc Seq for JIS X 0208-1978/83 (DEFAULT B)\n" + "o[BJH] Specify the Esc Seq for ASCII/Roman (DEFAULT B)\n" + "r {de/en}crypt ROT13/47\n" + "h 1 katakana->hiragana, 2 hiragana->katakana, 3 both\n" + "m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:non-strict,0:no decode]\n" + "M[BQ] MIME encode [B:base64 Q:quoted]\n" + "l ISO8859-1 (Latin-1) support\n" + "f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n" + "Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n" + " 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n" + " 4: JISX0208 Katakana to JISX0201 Katakana\n" + "X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n" + "B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n" #ifdef MSDOS - fprintf(HELP_OUTPUT,"T Text mode output\n"); -#endif - fprintf(HELP_OUTPUT,"O Output to File (DEFAULT 'nkf.out')\n"); - fprintf(HELP_OUTPUT,"I Convert non ISO-2022-JP charactor to GETA\n"); - fprintf(HELP_OUTPUT,"d,c Convert line breaks -d: LF -c: CRLF\n"); - fprintf(HELP_OUTPUT,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"); - fprintf(HELP_OUTPUT,"v, V Show this usage. V: show configuration\n"); - fprintf(HELP_OUTPUT,"\n"); - fprintf(HELP_OUTPUT,"Long name options\n"); - fprintf(HELP_OUTPUT," --ic= --oc=\n"); - fprintf(HELP_OUTPUT," Specify the input or output codeset\n"); - fprintf(HELP_OUTPUT," --fj --unix --mac --windows\n"); - fprintf(HELP_OUTPUT," --jis --euc --sjis --utf8 --utf16 --mime --base64\n"); - fprintf(HELP_OUTPUT," Convert for the system or code\n"); - fprintf(HELP_OUTPUT," --hiragana --katakana --katakana-hiragana\n"); - fprintf(HELP_OUTPUT," To Hiragana/Katakana Conversion\n"); - fprintf(HELP_OUTPUT," --prefix= Insert escape before troublesome characters of Shift_JIS\n"); + "T Text mode output\n" +#endif + "O Output to File (DEFAULT 'nkf.out')\n" + "I Convert non ISO-2022-JP charactor to GETA\n" + "d,c Convert line breaks -d: LF -c: CRLF\n" + "-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n" + "v, V Show this usage. V: show configuration\n" + "\n" + "Long name options\n" + " --ic= --oc=\n" + " Specify the input or output codeset\n" + " --fj --unix --mac --windows\n" + " --jis --euc --sjis --utf8 --utf16 --mime --base64\n" + " Convert for the system or code\n" + " --hiragana --katakana --katakana-hiragana\n" + " To Hiragana/Katakana Conversion\n" + " --prefix= Insert escape before troublesome characters of Shift_JIS\n" #ifdef INPUT_OPTION - fprintf(HELP_OUTPUT," --cap-input, --url-input Convert hex after ':' or '%%'\n"); + " --cap-input, --url-input Convert hex after ':' or '%%'\n" #endif #ifdef NUMCHAR_OPTION - fprintf(HELP_OUTPUT," --numchar-input Convert Unicode Character Reference\n"); + " --numchar-input Convert Unicode Character Reference\n" #endif #ifdef UTF8_INPUT_ENABLE - fprintf(HELP_OUTPUT," --fb-{skip, html, xml, perl, java, subchar}\n"); - fprintf(HELP_OUTPUT," Specify how nkf handles unassigned characters\n"); + " --fb-{skip, html, xml, perl, java, subchar}\n" + " Specify how nkf handles unassigned characters\n" #endif #ifdef OVERWRITE - fprintf(HELP_OUTPUT," --in-place[=SUFFIX] --overwrite[=SUFFIX]\n"); - fprintf(HELP_OUTPUT," Overwrite original listed files by filtered result\n"); - fprintf(HELP_OUTPUT," --overwrite preserves timestamp of original files\n"); -#endif - fprintf(HELP_OUTPUT," -g --guess Guess the input code\n"); - fprintf(HELP_OUTPUT," --help --version Show this help/the version\n"); - fprintf(HELP_OUTPUT," For more information, see also man nkf\n"); - fprintf(HELP_OUTPUT,"\n"); + " --in-place[=SUFFIX] --overwrite[=SUFFIX]\n" + " Overwrite original listed files by filtered result\n" + " --overwrite preserves timestamp of original files\n" +#endif + " -g --guess Guess the input code\n" + " --help --version Show this help/the version\n" + " For more information, see also man nkf\n" + "\n"); version(); } void show_configuration(void) { - fprintf(HELP_OUTPUT, "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n"); - fprintf(HELP_OUTPUT, " Compile-time options:\n"); - fprintf(HELP_OUTPUT, " Default output encoding: " + fprintf(HELP_OUTPUT, + "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n" + " nkf identity:\n" + " " NKF_IDENT "\n" + " Compile-time options:\n" + " Compiled at: " __DATE__ " " __TIME__ "\n" + ); + fprintf(HELP_OUTPUT, + " Default output encoding: " #ifdef DEFAULT_ENCIDX "%s\n", nkf_enc_name(nkf_default_encoding()) #else "%s (%s)\n", nkf_locale_encoding() ? "LOCALE" : "DEFAULT", nkf_enc_name(nkf_default_encoding()) #endif - ); - fprintf(HELP_OUTPUT, " Default output newline: " + ); + fprintf(HELP_OUTPUT, + " Default output newline: " #if DEFAULT_NEWLINE == CR "CR" #elif DEFAULT_NEWLINE == CRLF @@ -6437,32 +6291,27 @@ void show_configuration(void) #else "LF" #endif - "\n"); - fprintf(HELP_OUTPUT, " Decode MIME encoded string: " + "\n" + " Decode MIME encoded string: " #if MIME_DECODE_DEFAULT "ON" #else "OFF" #endif - "\n"); - fprintf(HELP_OUTPUT, " Convert JIS X 0201 Katakana: " + "\n" + " Convert JIS X 0201 Katakana: " #if X0201_DEFAULT "ON" #else "OFF" #endif - "\n"); - fprintf(HELP_OUTPUT, " --help, --version output: " + "\n" + " --help, --version output: " #if HELP_OUTPUT_HELP_OUTPUT -"HELP_OUTPUT" + "HELP_OUTPUT" #else -"STDOUT" + "STDOUT" #endif -"\n"); -} - -void version(void) -{ - fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n"); + "\n"); } #endif /*PERL_XS*/ diff --git a/nkf.h b/nkf.h index d2e8b2f..30efd4b 100755 --- a/nkf.h +++ b/nkf.h @@ -1,77 +1,166 @@ -/* nkf32.dll nkf.h */ -#ifndef CLASS_DECLSPEC -/* dll __declspec(dllexport) */ -/* app __declspec(dllimport) */ -#define CLASS_DECLSPEC -#endif -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ -#ifdef __BORLANDC__ -#pragma argsused -#endif /*__BORLANDC__*/ -/* uminchu nkf32103a.lzh 1.00 */ -void CALLBACK CLASS_DECLSPEC GetNkfVersion(LPSTR verStr); -int CALLBACK CLASS_DECLSPEC SetNkfOption(LPCSTR optStr); -void CALLBACK CLASS_DECLSPEC NkfConvert(LPSTR outStr, LPCSTR inStr); -/* uminchu nkf32103a.lzh 1.02 */ -void CALLBACK CLASS_DECLSPEC ToHankaku(LPSTR inStr); -void CALLBACK CLASS_DECLSPEC ToZenkakuKana(LPSTR outStr ,LPCSTR inStr); -/* uminchu nkf32103a.lzh 1.03 */ -void CALLBACK CLASS_DECLSPEC EncodeSubject(LPSTR outStr ,LPCSTR inStr); -/* tkaneto nkf32204.zip 2.0.4.0 */ -#ifdef TOMIME -void CALLBACK CLASS_DECLSPEC ToMime(LPSTR outStr ,LPCSTR inStr); -#endif /*TOMIME*/ -#ifdef GETKANJICODE -int CALLBACK CLASS_DECLSPEC NkfGetKanjiCode(VOID); -#endif /*GETKANJICODE*/ -#ifdef FILECONVERT1 -void CALLBACK CLASS_DECLSPEC NkfFileConvert1(LPCSTR fName); -#endif /*FILECONVERT1*/ -#ifdef FILECONVERT2 -void CALLBACK CLASS_DECLSPEC NkfFileConvert2(LPCSTR fInName,LPCSTR fOutName); -#endif /*FILECONVERT2*/ -#ifndef NKF32103A -/* safe */ -BOOL WINAPI CLASS_DECLSPEC GetNkfVersionSafeA(LPSTR verStr,DWORD nBufferLength /*in TCHARs*/,LPDWORD lpTCHARsReturned /*in TCHARs*/); -BOOL WINAPI CLASS_DECLSPEC NkfConvertSafe(LPSTR outStr,DWORD nOutBufferLength /*in Bytes*/,LPDWORD lpBytesReturned /*in Bytes*/, LPCSTR inStr,DWORD nInBufferLength /*in Bytes*/); -BOOL WINAPI CLASS_DECLSPEC ToZenkakuKanaSafe(LPSTR outStr,DWORD nOutBufferLength /*in Bytes*/,LPDWORD lpBytesReturned /*in Bytes*/,LPCSTR inStr,DWORD nInBufferLength /*in Bytes*/); -BOOL WINAPI CLASS_DECLSPEC ToHankakuSafe(LPSTR outStr,DWORD nOutBufferLength /*in Bytes*/,LPDWORD lpBytesReturned /*in Bytes*/,LPCSTR inStr,DWORD nInBufferLength /*in Bytes*/); -BOOL WINAPI CLASS_DECLSPEC EncodeSubjectSafe(LPSTR outStr,DWORD nOutBufferLength /*in Bytes*/,LPDWORD lpBytesReturned /*in Bytes*/,LPCSTR inStr,DWORD nInBufferLength /*in Bytes*/); -BOOL WINAPI CLASS_DECLSPEC NkfFileConvert1SafeA(LPCSTR fName,DWORD nBufferLength /*in TCHARs*/); -BOOL WINAPI CLASS_DECLSPEC NkfFileConvert2SafeA(LPCSTR fInName,DWORD fInBufferLength /*in TCHARs*/,LPCSTR fOutName,DWORD fOutBufferLength /*in TCHARs*/); -BOOL WINAPI CLASS_DECLSPEC GetNkfGuessA(LPSTR outStr,DWORD nBufferLength /*in TCHARs*/,LPDWORD lpTCHARsReturned /*in TCHARs*/); - -BOOL WINAPI CLASS_DECLSPEC GetNkfVersionSafeW(LPWSTR verStr,DWORD nBufferLength /*in TCHARs*/,LPDWORD lpTCHARsReturned /*in TCHARs*/); -BOOL WINAPI CLASS_DECLSPEC NkfFileConvert1SafeW(LPCWSTR fName,DWORD nBufferLength /*in TCHARs*/); -BOOL WINAPI CLASS_DECLSPEC NkfFileConvert2SafeW(LPCWSTR fInName,DWORD fInBufferLength /*in TCHARs*/,LPCWSTR fOutName,DWORD fOutBufferLength /*in TCHARs*/); -BOOL WINAPI CLASS_DECLSPEC GetNkfGuessW(LPWSTR outStr,DWORD nBufferLength /*in TCHARs*/,LPDWORD lpTCHARsReturned /*in TCHARs*/); -BOOL WINAPI CLASS_DECLSPEC GetNkfSupportFunctions(void *outStr,DWORD nBufferLength /*in Bytes*/,LPDWORD lpBytesReturned /*in Bytes*/); -BOOL WINAPI CLASS_DECLSPEC NkfUsage(LPSTR outStr,DWORD nBufferLength /*in Bytes*/,LPDWORD lpBytesReturned /*in Bytes*/); - -#ifdef UNICODE -#define GetNkfVersionSafe GetNkfVersionSafeW -#define GetNkfGuess GetNkfGuessW -#define NkfFileConvert1Safe NkfFileConvert1SafeW -#define NkfFileConvert2Safe NkfFileConvert2SafeW -#else /*UNICODE*/ -#define GetNkfVersionSafe GetNkfVersionSafeA -#define GetNkfGuess GetNkfGuessA -#define NkfFileConvert1Safe NkfFileConvert1SafeA -#define NkfFileConvert2Safe NkfFileConvert2SafeA -#endif /*UNICODE*/ - -struct NKFSUPPORTFUNCTIONS { -DWORD size; -LPCSTR copyrightA; -LPCSTR versionA; -LPCSTR dateA; -DWORD functions; -}; -#endif /*!defined(NKF32103A)*/ - -#ifdef __cplusplus -} // Balance extern "C" above -#endif /*__cplusplus*/ -/* nkf32.dll nkf.h end */ +/* + * + * nkf.h - Header file for nkf + * + * $Id: nkf.h,v 1.2 2008/01/23 09:10:25 naruse Exp $ + */ + + +#ifndef NKF_H + +/* Wrapper of configurations */ + +#ifndef MIME_DECODE_DEFAULT +#define MIME_DECODE_DEFAULT STRICT_MIME +#endif +#ifndef X0201_DEFAULT +#define X0201_DEFAULT TRUE +#endif + +#if DEFAULT_NEWLINE == 0x0D0A +#define PUT_NEWLINE(func) do {\ + func(0x0D);\ + func(0x0A);\ +} while (0) +#define OCONV_NEWLINE(func) do {\ + func(0, 0x0D);\ + func(0, 0x0A);\ +} while (0) +#elif DEFAULT_NEWLINE == 0x0D +#define PUT_NEWLINE(func) func(0x0D) +#define OCONV_NEWLINE(func) func(0, 0x0D) +#else +#define DEFAULT_NEWLINE 0x0A +#define PUT_NEWLINE(func) func(0x0A) +#define OCONV_NEWLINE(func) func(0, 0x0A) +#endif +#ifdef HELP_OUTPUT_STDERR +#define HELP_OUTPUT stderr +#else +#define HELP_OUTPUT stdout +#endif + + +/* Compatibility definitions */ + +#ifdef nkf_char +#elif defined(INT_IS_SHORT) +typedef long nkf_char; +#define NKF_INT32_C(n) (n##L) +#else +typedef int nkf_char; +#define NKF_INT32_C(n) (n) +#endif + +#if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS) +#define MSDOS +#if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__) +#define __WIN32__ +#endif +#endif + +#ifdef PERL_XS +#undef OVERWRITE +#endif + +#ifndef PERL_XS +#include +#endif + +#include +#include + +#if defined(MSDOS) || defined(__OS2__) +#include +#include +#if defined(_MSC_VER) || defined(__WATCOMC__) +#define mktemp _mktemp +#endif +#endif + +#ifdef MSDOS +#ifdef LSI_C +#define setbinmode(fp) fsetbin(fp) +#elif defined(__DJGPP__) +#include +void setbinmode(FILE *fp) +{ + /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */ + int fd, m; + fd = fileno(fp); + m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY; + __file_handle_set(fd, m); +} +#else /* Microsoft C, Turbo C */ +#define setbinmode(fp) setmode(fileno(fp), O_BINARY) +#endif +#else /* UNIX */ +#define setbinmode(fp) +#endif + +#ifdef _IOFBF /* SysV and MSDOS, Windows */ +#define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size) +#else /* BSD */ +#define setvbuffer(fp, buf, size) setbuffer(fp, buf, size) +#endif + +/*Borland C++ 4.5 EasyWin*/ +#if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */ +#define EASYWIN +#ifndef __WIN16__ +#define __WIN16__ +#endif +#include +#endif + +#ifdef OVERWRITE +/* added by satoru@isoternet.org */ +#if defined(__EMX__) +#include +#endif +#include +#if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */ +#include +#if defined(__WATCOMC__) +#include +#else +#include +#endif +#else /* defined(MSDOS) */ +#ifdef __WIN32__ +#ifdef __BORLANDC__ /* BCC32 */ +#include +#else /* !defined(__BORLANDC__) */ +#include +#endif /* (__BORLANDC__) */ +#else /* !defined(__WIN32__) */ +#if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__) /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */ +#include +#elif defined(__TURBOC__) /* BCC */ +#include +#elif defined(LSI_C) /* LSI C */ +#endif /* (__WIN32__) */ +#endif +#endif +#endif + +#ifndef __WIN32__ /* not win32 is posix */ +#define HAVE_LANGINFO_H +#define HAVE_LOCALE_H +#endif + +#ifdef HAVE_LANGINFO_H +#include +#endif +#ifdef HAVE_LOCALE_H +#include +#endif + +#define FALSE 0 +#define TRUE 1 + +#ifdef WIN32DLL +#include "nkf32.h" +#endif + +#endif diff --git a/nkf32.h b/nkf32.h new file mode 100755 index 0000000..e10d86f --- /dev/null +++ b/nkf32.h @@ -0,0 +1,78 @@ +#ifndef NKF32_H +#ifndef CLASS_DECLSPEC +/* dll __declspec(dllexport) */ +/* app __declspec(dllimport) */ +#define CLASS_DECLSPEC +#endif + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ +#ifdef __BORLANDC__ +#pragma argsused +#endif /*__BORLANDC__*/ +/* uminchu nkf32103a.lzh 1.00 */ +void CALLBACK CLASS_DECLSPEC GetNkfVersion(LPSTR verStr); +int CALLBACK CLASS_DECLSPEC SetNkfOption(LPCSTR optStr); +void CALLBACK CLASS_DECLSPEC NkfConvert(LPSTR outStr, LPCSTR inStr); +/* uminchu nkf32103a.lzh 1.02 */ +void CALLBACK CLASS_DECLSPEC ToHankaku(LPSTR inStr); +void CALLBACK CLASS_DECLSPEC ToZenkakuKana(LPSTR outStr ,LPCSTR inStr); +/* uminchu nkf32103a.lzh 1.03 */ +void CALLBACK CLASS_DECLSPEC EncodeSubject(LPSTR outStr ,LPCSTR inStr); +/* tkaneto nkf32204.zip 2.0.4.0 */ +#ifdef TOMIME +void CALLBACK CLASS_DECLSPEC ToMime(LPSTR outStr ,LPCSTR inStr); +#endif /*TOMIME*/ +#ifdef GETKANJICODE +int CALLBACK CLASS_DECLSPEC NkfGetKanjiCode(VOID); +#endif /*GETKANJICODE*/ +#ifdef FILECONVERT1 +void CALLBACK CLASS_DECLSPEC NkfFileConvert1(LPCSTR fName); +#endif /*FILECONVERT1*/ +#ifdef FILECONVERT2 +void CALLBACK CLASS_DECLSPEC NkfFileConvert2(LPCSTR fInName,LPCSTR fOutName); +#endif /*FILECONVERT2*/ +#ifndef NKF32103A +/* safe */ +BOOL WINAPI CLASS_DECLSPEC GetNkfVersionSafeA(LPSTR verStr,DWORD nBufferLength /*in TCHARs*/,LPDWORD lpTCHARsReturned /*in TCHARs*/); +BOOL WINAPI CLASS_DECLSPEC NkfConvertSafe(LPSTR outStr,DWORD nOutBufferLength /*in Bytes*/,LPDWORD lpBytesReturned /*in Bytes*/, LPCSTR inStr,DWORD nInBufferLength /*in Bytes*/); +BOOL WINAPI CLASS_DECLSPEC ToZenkakuKanaSafe(LPSTR outStr,DWORD nOutBufferLength /*in Bytes*/,LPDWORD lpBytesReturned /*in Bytes*/,LPCSTR inStr,DWORD nInBufferLength /*in Bytes*/); +BOOL WINAPI CLASS_DECLSPEC ToHankakuSafe(LPSTR outStr,DWORD nOutBufferLength /*in Bytes*/,LPDWORD lpBytesReturned /*in Bytes*/,LPCSTR inStr,DWORD nInBufferLength /*in Bytes*/); +BOOL WINAPI CLASS_DECLSPEC EncodeSubjectSafe(LPSTR outStr,DWORD nOutBufferLength /*in Bytes*/,LPDWORD lpBytesReturned /*in Bytes*/,LPCSTR inStr,DWORD nInBufferLength /*in Bytes*/); +BOOL WINAPI CLASS_DECLSPEC NkfFileConvert1SafeA(LPCSTR fName,DWORD nBufferLength /*in TCHARs*/); +BOOL WINAPI CLASS_DECLSPEC NkfFileConvert2SafeA(LPCSTR fInName,DWORD fInBufferLength /*in TCHARs*/,LPCSTR fOutName,DWORD fOutBufferLength /*in TCHARs*/); +BOOL WINAPI CLASS_DECLSPEC GetNkfGuessA(LPSTR outStr,DWORD nBufferLength /*in TCHARs*/,LPDWORD lpTCHARsReturned /*in TCHARs*/); + +BOOL WINAPI CLASS_DECLSPEC GetNkfVersionSafeW(LPWSTR verStr,DWORD nBufferLength /*in TCHARs*/,LPDWORD lpTCHARsReturned /*in TCHARs*/); +BOOL WINAPI CLASS_DECLSPEC NkfFileConvert1SafeW(LPCWSTR fName,DWORD nBufferLength /*in TCHARs*/); +BOOL WINAPI CLASS_DECLSPEC NkfFileConvert2SafeW(LPCWSTR fInName,DWORD fInBufferLength /*in TCHARs*/,LPCWSTR fOutName,DWORD fOutBufferLength /*in TCHARs*/); +BOOL WINAPI CLASS_DECLSPEC GetNkfGuessW(LPWSTR outStr,DWORD nBufferLength /*in TCHARs*/,LPDWORD lpTCHARsReturned /*in TCHARs*/); +BOOL WINAPI CLASS_DECLSPEC GetNkfSupportFunctions(void *outStr,DWORD nBufferLength /*in Bytes*/,LPDWORD lpBytesReturned /*in Bytes*/); +BOOL WINAPI CLASS_DECLSPEC NkfUsage(LPSTR outStr,DWORD nBufferLength /*in Bytes*/,LPDWORD lpBytesReturned /*in Bytes*/); + +#ifdef UNICODE +#define GetNkfVersionSafe GetNkfVersionSafeW +#define GetNkfGuess GetNkfGuessW +#define NkfFileConvert1Safe NkfFileConvert1SafeW +#define NkfFileConvert2Safe NkfFileConvert2SafeW +#else /*UNICODE*/ +#define GetNkfVersionSafe GetNkfVersionSafeA +#define GetNkfGuess GetNkfGuessA +#define NkfFileConvert1Safe NkfFileConvert1SafeA +#define NkfFileConvert2Safe NkfFileConvert2SafeA +#endif /*UNICODE*/ + +struct NKFSUPPORTFUNCTIONS { +DWORD size; +LPCSTR copyrightA; +LPCSTR versionA; +LPCSTR dateA; +DWORD functions; +}; +#endif /*!defined(NKF32103A)*/ + +#ifdef __cplusplus +} // Balance extern "C" above +#endif /*__cplusplus*/ +#endif diff --git a/utf8tbl.c b/utf8tbl.c index db67c5d..34c2500 100644 --- a/utf8tbl.c +++ b/utf8tbl.c @@ -1,4 +1,11 @@ +/* + * utf8tbl.c - Convertion Table for nkf + * + * $Id: utf8tbl.c,v 1.22 2008/01/23 09:10:25 naruse Exp $ + */ + #include "config.h" +#include "utf8tbl.h" #ifdef UTF8_OUTPUT_ENABLE static const unsigned short euc_to_utf8_A1[] = { diff --git a/utf8tbl.h b/utf8tbl.h index 29413d4..270f32e 100644 --- a/utf8tbl.h +++ b/utf8tbl.h @@ -1,7 +1,15 @@ +/* + * utf8tbl.h - Header file for Convertion Table + * + * $Id: utf8tbl.h,v 1.3 2008/01/23 09:10:25 naruse Exp $ + */ + #ifndef _UTF8TBL_H_ #define _UTF8TBL_H_ #ifdef UTF8_OUTPUT_ENABLE +#define sizeof_euc_to_utf8_1byte 94 +#define sizeof_euc_to_utf8_2bytes 94 extern const unsigned short euc_to_utf8_1byte[]; extern const unsigned short *const euc_to_utf8_2bytes[]; extern const unsigned short *const euc_to_utf8_2bytes_ms[]; @@ -10,6 +18,10 @@ extern const unsigned short *const x0212_to_utf8_2bytes[]; #endif /* UTF8_OUTPUT_ENABLE */ #ifdef UTF8_INPUT_ENABLE +#define sizeof_utf8_to_euc_C2 64 +#define sizeof_utf8_to_euc_E5B8 64 +#define sizeof_utf8_to_euc_2bytes 112 +#define sizeof_utf8_to_euc_3bytes 16 extern const unsigned short *const utf8_to_euc_2bytes[]; extern const unsigned short *const utf8_to_euc_2bytes_ms[]; extern const unsigned short *const utf8_to_euc_2bytes_932[]; @@ -21,11 +33,23 @@ extern const unsigned short *const *const utf8_to_euc_3bytes_mac[]; #endif /* UTF8_INPUT_ENABLE */ #ifdef UNICODE_NORMALIZATION + +#define NORMALIZATION_TABLE_LENGTH 942 +#define NORMALIZATION_TABLE_NFC_LENGTH 3 +#define NORMALIZATION_TABLE_NFD_LENGTH 9 +struct normalization_pair { + const unsigned char nfc[NORMALIZATION_TABLE_NFC_LENGTH]; + const unsigned char nfd[NORMALIZATION_TABLE_NFD_LENGTH]; +}; extern const struct normalization_pair normalization_table[]; #endif #ifdef SHIFTJIS_CP932 +#define CP932_TABLE_BEGIN 0xFA +#define CP932_TABLE_END 0xFC extern const unsigned short shiftjis_cp932[3][189]; +#define CP932INV_TABLE_BEGIN 0xED +#define CP932INV_TABLE_END 0xEE extern const unsigned short cp932inv[2][189]; #endif /* SHIFTJIS_CP932 */