RM = rm -rf
VERSION = 2.0.8
-nkf : nkf.c config.h utf8tbl.o
- $(CC) $(CFLAGS) -o nkf nkf.c utf8tbl.o
+nkf : nkf.o utf8tbl.o
+ $(CC) $(CFLAGS) -o nkf nkf.o utf8tbl.o
-utf8tbl.o : utf8tbl.c config.h
+nkf.o : nkf.c nkf.h utf8tbl.h config.h
+ $(CC) $(CFLAGS) -c nkf.c
+
+utf8tbl.o : utf8tbl.c utf8tbl.h config.h
$(CC) $(CFLAGS) -c utf8tbl.c
clean:
#ifndef _CONFIG_H_
#define _CONFIG_H_
-/* UTF8 \e$BF~=PNO\e(B */
+/* UTF8 input and output */
#define UTF8_INPUT_ENABLE
#define UTF8_OUTPUT_ENABLE
-/* Shift_JIS \e$BHO0O30$NJ8;z$r!"\e(BCP932 \e$B$GF1CM$JJ8;z$KFI$_49$($k\e(B */
+/* invert characters invalid in Shift_JIS to CP932 */
#define SHIFTJIS_CP932
-/* \e$B%*%W%7%g%s$GF~NO$r;XDj$7$?;~$K!"J8;z%3!<%I$r8GDj$9$k\e(B */
+/* fix input encoding when given by option */
#define INPUT_CODE_FIX
-/* --overwrite \e$B%*%W%7%g%s\e(B */
+/* --overwrite option */
/* by Satoru Takabayashi <ccsatoru@vega.aichi-u.ac.jp> */
#define OVERWRITE
-/* --cap-input, --url-input \e$B%*%W%7%g%s\e(B */
+/* --cap-input, --url-input option */
#define INPUT_OPTION
-/* --numchar-input \e$B%*%W%7%g%s\e(B */
+/* --numchar-input option */
#define NUMCHAR_OPTION
-/* --debug, --no-output \e$B%*%W%7%g%s\e(B */
+/* --debug, --no-output option */
#define CHECK_OPTION
/* JIS X0212 */
#define X0212_ENABLE
-/* --exec-in, --exec-out \e$B%*%W%7%g%s\e(B
- * pipe, fork, execvp \e$B$"$?$j$,L5$$$HF0$-$^$;$s!#\e(B
- * MS-DOS, MinGW \e$B$J$I$G$O\e(B undef \e$B$K$7$F$/$@$5$$\e(B
- * child process \e$B=*N;;~$N=hM}$,$$$$$+$2$s$J$N$G!"\e(B
- * \e$B%G%U%)%k%H$GL58z$K$7$F$$$^$9!#\e(B
+/* --exec-in, --exec-out option
+ * require pipe, fork, execvp and so on.
+ * please undef this on MS-DOS, MinGW
+ * this is still buggy arround child process
*/
/* #define EXEC_IO */
-/* SunOS \e$B$N\e(B cc \e$B$r;H$&$H$-$O\e(B undef \e$B$K$7$F$/$@$5$$\e(B */
-#define ANSI_C_PROTOTYPE
-
-/* int \e$B$,\e(B 32bit \e$BL$K~$N4D6-$G\e(B NUMCHAR_OPTION \e$B$r;H$&$K$O!"\e(B
- * \e$B%3%a%s%H$r30$7$F$/$@$5$$!#\e(B
- */
-/* #define INT_IS_SHORT */
-
-
-#if defined(INT_IS_SHORT)
-typedef long nkf_char;
-typedef unsigned char nkf_nfchar;
-#else
-typedef int nkf_char;
-typedef int nkf_nfchar;
-#endif
-
/* Unicode Normalization */
#define UNICODE_NORMALIZATION
-#ifndef WIN32DLL
-/******************************/
-/* \e$B%G%U%)%k%H$N=PNO%3!<%IA*Br\e(B */
-/* Select DEFAULT_CODE */
-/* #define DEFAULT_CODE_JIS */
-/* #define DEFAULT_CODE_SJIS */
-/* #define DEFAULT_CODE_EUC */
-/* #define DEFAULT_CODE_UTF8 */
-/******************************/
-#endif
-
-#if defined(NUMCHAR_OPTION) && !defined(UTF8_INPUT_ENABLE)
-#define UTF8_INPUT_ENABLE
-#endif
+/*
+ * Select Default Output Encoding
+ *
+ * If not defined, locale encoding is used.
+ */
-#ifdef UNICODE_NORMALIZATION
-#ifndef UTF8_INPUT_ENABLE
-#define UTF8_INPUT_ENABLE
-#endif
-#define NORMALIZATION_TABLE_LENGTH 942
-#define NORMALIZATION_TABLE_NFC_LENGTH 3
-#define NORMALIZATION_TABLE_NFD_LENGTH 9
-struct normalization_pair{
- const nkf_nfchar nfc[NORMALIZATION_TABLE_NFC_LENGTH];
- const nkf_nfchar nfd[NORMALIZATION_TABLE_NFD_LENGTH];
-};
-#endif
+/* #define DEFAULT_CODE_JIS */
+/* #define DEFAULT_CODE_SJIS */
+/* #define DEFAULT_CODE_EUC */
+/* #define DEFAULT_CODE_UTF8 */
#endif /* _CONFIG_H_ */
* \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
* http://sourceforge.jp/projects/nkf/
***********************************************************************/
-/* $Id: nkf.c,v 1.165 2008/01/22 00:30:05 naruse Exp $ */
+#define NKF_IDENT "$Id: nkf.c,v 1.166 2008/01/23 09:10:25 naruse Exp $"
#define NKF_VERSION "2.0.8"
-#define NKF_RELEASE_DATE "2008-01-21"
+#define NKF_RELEASE_DATE "2008-01-23"
#define COPY_RIGHT \
"Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
"Copyright (C) 2002-2008 Kono, Furukawa, Naruse, mastodon"
#include "config.h"
+#include "nkf.h"
#include "utf8tbl.h"
-#ifndef MIME_DECODE_DEFAULT
-#define MIME_DECODE_DEFAULT STRICT_MIME
-#endif
-#ifndef X0201_DEFAULT
-#define X0201_DEFAULT TRUE
-#endif
-
-#if DEFAULT_NEWLINE == 0x0D0A
-#define PUT_NEWLINE(func) do {\
- func(0x0D);\
- func(0x0A);\
-} while (0)
-#define OCONV_NEWLINE(func) do {\
- func(0, 0x0D);\
- func(0, 0x0A);\
-} while (0)
-#elif DEFAULT_NEWLINE == 0x0D
-#define PUT_NEWLINE(func) func(0x0D)
-#define OCONV_NEWLINE(func) func(0, 0x0D)
-#else
-#define DEFAULT_NEWLINE 0x0A
-#define PUT_NEWLINE(func) func(0x0A)
-#define OCONV_NEWLINE(func) func(0, 0x0A)
-#endif
-#ifdef HELP_OUTPUT_STDERR
-#define HELP_OUTPUT stderr
-#else
-#define HELP_OUTPUT stdout
-#endif
-
-#if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)
-#define MSDOS
-#if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)
-#define __WIN32__
-#endif
-#endif
-
-#ifdef PERL_XS
-#undef OVERWRITE
-#endif
-
-#ifndef PERL_XS
-#include <stdio.h>
-#endif
-
-#include <stdlib.h>
-#include <string.h>
-
-#if defined(MSDOS) || defined(__OS2__)
-#include <fcntl.h>
-#include <io.h>
-#if defined(_MSC_VER) || defined(__WATCOMC__)
-#define mktemp _mktemp
-#endif
-#endif
-
-#ifdef MSDOS
-#ifdef LSI_C
-#define setbinmode(fp) fsetbin(fp)
-#elif defined(__DJGPP__)
-#include <libc/dosio.h>
-#define setbinmode(fp) djgpp_setbinmode(fp)
-#else /* Microsoft C, Turbo C */
-#define setbinmode(fp) setmode(fileno(fp), O_BINARY)
-#endif
-#else /* UNIX */
-#define setbinmode(fp)
-#endif
-
-#if defined(__DJGPP__)
-void djgpp_setbinmode(FILE *fp)
-{
- /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */
- int fd, m;
- fd = fileno(fp);
- m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;
- __file_handle_set(fd, m);
-}
-#endif
-
-#ifdef _IOFBF /* SysV and MSDOS, Windows */
-#define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)
-#else /* BSD */
-#define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)
-#endif
-
-/*Borland C++ 4.5 EasyWin*/
-#if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */
-#define EASYWIN
-#ifndef __WIN16__
-#define __WIN16__
-#endif
-#include <windows.h>
-#endif
-
-#ifdef OVERWRITE
-/* added by satoru@isoternet.org */
-#if defined(__EMX__)
-#include <sys/types.h>
-#endif
-#include <sys/stat.h>
-#if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */
-#include <unistd.h>
-#if defined(__WATCOMC__)
-#include <sys/utime.h>
-#else
-#include <utime.h>
-#endif
-#else /* defined(MSDOS) */
-#ifdef __WIN32__
-#ifdef __BORLANDC__ /* BCC32 */
-#include <utime.h>
-#else /* !defined(__BORLANDC__) */
-#include <sys/utime.h>
-#endif /* (__BORLANDC__) */
-#else /* !defined(__WIN32__) */
-#if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__) /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */
-#include <sys/utime.h>
-#elif defined(__TURBOC__) /* BCC */
-#include <utime.h>
-#elif defined(LSI_C) /* LSI C */
-#endif /* (__WIN32__) */
-#endif
-#endif
-#endif
-
-
-#ifndef __WIN32__
-#define HAVE_LANGINFO_H
-#define HAVE_LOCALE_H
-#endif
-
-#ifdef HAVE_LANGINFO_H
-#include <langinfo.h>
-#endif
-#ifdef HAVE_LOCALE_H
-#include <locale.h>
-#endif
-
-#define FALSE 0
-#define TRUE 1
-
/* state of output_mode and input_mode
c2 0 means ASCII
((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
&& (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
-#define CP932_TABLE_BEGIN 0xFA
-#define CP932_TABLE_END 0xFC
-#define CP932INV_TABLE_BEGIN 0xED
-#define CP932INV_TABLE_END 0xEE
#define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
#define HOLD_SIZE 1024
#define DEFAULT_J 'B'
#define DEFAULT_R 'B'
-#define SJ0162 0x00e1 /* 01 - 62 ku offset */
-#define SJ6394 0x0161 /* 63 - 94 ku offset */
#define RANGE_NUM_MAX 18
#define GETA1 0x22
#define GETA2 0x2e
-#if defined(UTF8_OUTPUT_ENABLE) || defined(UTF8_INPUT_ENABLE)
-#define sizeof_euc_to_utf8_1byte 94
-#define sizeof_euc_to_utf8_2bytes 94
-#define sizeof_utf8_to_euc_C2 64
-#define sizeof_utf8_to_euc_E5B8 64
-#define sizeof_utf8_to_euc_2bytes 112
-#define sizeof_utf8_to_euc_3bytes 16
-#endif
-
/* MIME preprocessor */
#ifdef EASYWIN /*Easy Win */
static void mimeout_addchar(nkf_char c);
#ifndef PERL_XS
static void usage(void);
-static void version(void);
static void show_configuration(void);
#endif
static void options(unsigned char *c);
static nkf_char url_ungetc(nkf_char c,FILE *f);
#endif
-#if defined(INT_IS_SHORT)
-#define NKF_INT32_C(n) (n##L)
-#else
-#define NKF_INT32_C(n) (n)
-#endif
#define PREFIX_EUCG3 NKF_INT32_C(0x8F00)
#define CLASS_MASK NKF_INT32_C(0xFF000000)
#define CLASS_UNICODE NKF_INT32_C(0x01000000)
if (0x9E < c1) c2++;
}
}else{
+#define SJ0162 0x00e1 /* 01 - 62 ku offset */
+#define SJ6394 0x0161 /* 63 - 94 ku offset */
c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
if (0x9E < c1) c2++;
}
nkf_char (*u)(nkf_char c ,FILE *f) = i_nfc_ungetc;
int i=0, j, k=1, lower, upper;
nkf_char buf[9];
- const nkf_nfchar *array;
+ const unsigned char *array;
buf[i] = (*g)(f);
while (k > 0 && ((buf[i] & 0xc0) != 0x80)){
#ifdef WIN32DLL
#define fprintf dllprintf
#endif
+
+void version(void)
+{
+ fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n");
+}
+
void usage(void)
{
- fprintf(HELP_OUTPUT,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
- fprintf(HELP_OUTPUT,"Flags:\n");
- fprintf(HELP_OUTPUT,"b,u Output is buffered (DEFAULT),Output is unbuffered\n");
- fprintf(HELP_OUTPUT,"j,s,e,w Output code is ISO-2022-JP, Shift JIS, EUC-JP, UTF-8N\n");
+ fprintf(HELP_OUTPUT,
+ "USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n"
+ "Flags:\n"
+ "b,u Output is buffered (DEFAULT),Output is unbuffered\n"
+ "j,s,e,w Output code is ISO-2022-JP, Shift JIS, EUC-JP, UTF-8N\n"
#ifdef UTF8_OUTPUT_ENABLE
- fprintf(HELP_OUTPUT," After 'w' you can add more options. -w[ 8 [0], 16 [[BL] [0]] ]\n");
+ " After 'w' you can add more options. -w[ 8 [0], 16 [[BL] [0]] ]\n"
#endif
- fprintf(HELP_OUTPUT,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, EUC-JP, UTF-8\n");
+ "J,S,E,W Input assumption is JIS 7 bit , Shift JIS, EUC-JP, UTF-8\n"
#ifdef UTF8_INPUT_ENABLE
- fprintf(HELP_OUTPUT," After 'W' you can add more options. -W[ 8, 16 [BL] ] \n");
-#endif
- fprintf(HELP_OUTPUT,"t no conversion\n");
- fprintf(HELP_OUTPUT,"i[@B] Specify the Esc Seq for JIS X 0208-1978/83 (DEFAULT B)\n");
- fprintf(HELP_OUTPUT,"o[BJH] Specify the Esc Seq for ASCII/Roman (DEFAULT B)\n");
- fprintf(HELP_OUTPUT,"r {de/en}crypt ROT13/47\n");
- fprintf(HELP_OUTPUT,"h 1 katakana->hiragana, 2 hiragana->katakana, 3 both\n");
- fprintf(HELP_OUTPUT,"m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:non-strict,0:no decode]\n");
- fprintf(HELP_OUTPUT,"M[BQ] MIME encode [B:base64 Q:quoted]\n");
- fprintf(HELP_OUTPUT,"l ISO8859-1 (Latin-1) support\n");
- fprintf(HELP_OUTPUT,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n");
- fprintf(HELP_OUTPUT,"Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n");
- fprintf(HELP_OUTPUT," 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n");
- fprintf(HELP_OUTPUT," 4: JISX0208 Katakana to JISX0201 Katakana\n");
- fprintf(HELP_OUTPUT,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n");
- fprintf(HELP_OUTPUT,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n");
+ " After 'W' you can add more options. -W[ 8, 16 [BL] ] \n"
+#endif
+ "t no conversion\n"
+ "i[@B] Specify the Esc Seq for JIS X 0208-1978/83 (DEFAULT B)\n"
+ "o[BJH] Specify the Esc Seq for ASCII/Roman (DEFAULT B)\n"
+ "r {de/en}crypt ROT13/47\n"
+ "h 1 katakana->hiragana, 2 hiragana->katakana, 3 both\n"
+ "m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:non-strict,0:no decode]\n"
+ "M[BQ] MIME encode [B:base64 Q:quoted]\n"
+ "l ISO8859-1 (Latin-1) support\n"
+ "f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
+ "Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n"
+ " 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n"
+ " 4: JISX0208 Katakana to JISX0201 Katakana\n"
+ "X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n"
+ "B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n"
#ifdef MSDOS
- fprintf(HELP_OUTPUT,"T Text mode output\n");
-#endif
- fprintf(HELP_OUTPUT,"O Output to File (DEFAULT 'nkf.out')\n");
- fprintf(HELP_OUTPUT,"I Convert non ISO-2022-JP charactor to GETA\n");
- fprintf(HELP_OUTPUT,"d,c Convert line breaks -d: LF -c: CRLF\n");
- fprintf(HELP_OUTPUT,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n");
- fprintf(HELP_OUTPUT,"v, V Show this usage. V: show configuration\n");
- fprintf(HELP_OUTPUT,"\n");
- fprintf(HELP_OUTPUT,"Long name options\n");
- fprintf(HELP_OUTPUT," --ic=<input codeset> --oc=<output codeset>\n");
- fprintf(HELP_OUTPUT," Specify the input or output codeset\n");
- fprintf(HELP_OUTPUT," --fj --unix --mac --windows\n");
- fprintf(HELP_OUTPUT," --jis --euc --sjis --utf8 --utf16 --mime --base64\n");
- fprintf(HELP_OUTPUT," Convert for the system or code\n");
- fprintf(HELP_OUTPUT," --hiragana --katakana --katakana-hiragana\n");
- fprintf(HELP_OUTPUT," To Hiragana/Katakana Conversion\n");
- fprintf(HELP_OUTPUT," --prefix= Insert escape before troublesome characters of Shift_JIS\n");
+ "T Text mode output\n"
+#endif
+ "O Output to File (DEFAULT 'nkf.out')\n"
+ "I Convert non ISO-2022-JP charactor to GETA\n"
+ "d,c Convert line breaks -d: LF -c: CRLF\n"
+ "-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
+ "v, V Show this usage. V: show configuration\n"
+ "\n"
+ "Long name options\n"
+ " --ic=<input codeset> --oc=<output codeset>\n"
+ " Specify the input or output codeset\n"
+ " --fj --unix --mac --windows\n"
+ " --jis --euc --sjis --utf8 --utf16 --mime --base64\n"
+ " Convert for the system or code\n"
+ " --hiragana --katakana --katakana-hiragana\n"
+ " To Hiragana/Katakana Conversion\n"
+ " --prefix= Insert escape before troublesome characters of Shift_JIS\n"
#ifdef INPUT_OPTION
- fprintf(HELP_OUTPUT," --cap-input, --url-input Convert hex after ':' or '%%'\n");
+ " --cap-input, --url-input Convert hex after ':' or '%%'\n"
#endif
#ifdef NUMCHAR_OPTION
- fprintf(HELP_OUTPUT," --numchar-input Convert Unicode Character Reference\n");
+ " --numchar-input Convert Unicode Character Reference\n"
#endif
#ifdef UTF8_INPUT_ENABLE
- fprintf(HELP_OUTPUT," --fb-{skip, html, xml, perl, java, subchar}\n");
- fprintf(HELP_OUTPUT," Specify how nkf handles unassigned characters\n");
+ " --fb-{skip, html, xml, perl, java, subchar}\n"
+ " Specify how nkf handles unassigned characters\n"
#endif
#ifdef OVERWRITE
- fprintf(HELP_OUTPUT," --in-place[=SUFFIX] --overwrite[=SUFFIX]\n");
- fprintf(HELP_OUTPUT," Overwrite original listed files by filtered result\n");
- fprintf(HELP_OUTPUT," --overwrite preserves timestamp of original files\n");
-#endif
- fprintf(HELP_OUTPUT," -g --guess Guess the input code\n");
- fprintf(HELP_OUTPUT," --help --version Show this help/the version\n");
- fprintf(HELP_OUTPUT," For more information, see also man nkf\n");
- fprintf(HELP_OUTPUT,"\n");
+ " --in-place[=SUFFIX] --overwrite[=SUFFIX]\n"
+ " Overwrite original listed files by filtered result\n"
+ " --overwrite preserves timestamp of original files\n"
+#endif
+ " -g --guess Guess the input code\n"
+ " --help --version Show this help/the version\n"
+ " For more information, see also man nkf\n"
+ "\n");
version();
}
void show_configuration(void)
{
- fprintf(HELP_OUTPUT, "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n");
- fprintf(HELP_OUTPUT, " Compile-time options:\n");
- fprintf(HELP_OUTPUT, " Default output encoding: "
+ fprintf(HELP_OUTPUT,
+ "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n"
+ " nkf identity:\n"
+ " " NKF_IDENT "\n"
+ " Compile-time options:\n"
+ " Compiled at: " __DATE__ " " __TIME__ "\n"
+ );
+ fprintf(HELP_OUTPUT,
+ " Default output encoding: "
#ifdef DEFAULT_ENCIDX
"%s\n", nkf_enc_name(nkf_default_encoding())
#else
"%s (%s)\n", nkf_locale_encoding() ? "LOCALE" : "DEFAULT",
nkf_enc_name(nkf_default_encoding())
#endif
- );
- fprintf(HELP_OUTPUT, " Default output newline: "
+ );
+ fprintf(HELP_OUTPUT,
+ " Default output newline: "
#if DEFAULT_NEWLINE == CR
"CR"
#elif DEFAULT_NEWLINE == CRLF
#else
"LF"
#endif
- "\n");
- fprintf(HELP_OUTPUT, " Decode MIME encoded string: "
+ "\n"
+ " Decode MIME encoded string: "
#if MIME_DECODE_DEFAULT
"ON"
#else
"OFF"
#endif
- "\n");
- fprintf(HELP_OUTPUT, " Convert JIS X 0201 Katakana: "
+ "\n"
+ " Convert JIS X 0201 Katakana: "
#if X0201_DEFAULT
"ON"
#else
"OFF"
#endif
- "\n");
- fprintf(HELP_OUTPUT, " --help, --version output: "
+ "\n"
+ " --help, --version output: "
#if HELP_OUTPUT_HELP_OUTPUT
-"HELP_OUTPUT"
+ "HELP_OUTPUT"
#else
-"STDOUT"
+ "STDOUT"
#endif
-"\n");
-}
-
-void version(void)
-{
- fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n");
+ "\n");
}
#endif /*PERL_XS*/
-/* nkf32.dll nkf.h */\r
-#ifndef CLASS_DECLSPEC\r
-/* dll __declspec(dllexport) */\r
-/* app __declspec(dllimport) */\r
-#define CLASS_DECLSPEC\r
-#endif\r
-#ifdef __cplusplus\r
-extern "C" {\r
-#endif /* __cplusplus */\r
-#ifdef __BORLANDC__\r
-#pragma argsused\r
-#endif /*__BORLANDC__*/\r
-/* uminchu nkf32103a.lzh 1.00 */\r
-void CALLBACK CLASS_DECLSPEC GetNkfVersion(LPSTR verStr);\r
-int CALLBACK CLASS_DECLSPEC SetNkfOption(LPCSTR optStr);\r
-void CALLBACK CLASS_DECLSPEC NkfConvert(LPSTR outStr, LPCSTR inStr);\r
-/* uminchu nkf32103a.lzh 1.02 */\r
-void CALLBACK CLASS_DECLSPEC ToHankaku(LPSTR inStr);\r
-void CALLBACK CLASS_DECLSPEC ToZenkakuKana(LPSTR outStr ,LPCSTR inStr);\r
-/* uminchu nkf32103a.lzh 1.03 */\r
-void CALLBACK CLASS_DECLSPEC EncodeSubject(LPSTR outStr ,LPCSTR inStr);\r
-/* tkaneto nkf32204.zip 2.0.4.0 */\r
-#ifdef TOMIME\r
-void CALLBACK CLASS_DECLSPEC ToMime(LPSTR outStr ,LPCSTR inStr);\r
-#endif /*TOMIME*/\r
-#ifdef GETKANJICODE\r
-int CALLBACK CLASS_DECLSPEC NkfGetKanjiCode(VOID);\r
-#endif /*GETKANJICODE*/\r
-#ifdef FILECONVERT1\r
-void CALLBACK CLASS_DECLSPEC NkfFileConvert1(LPCSTR fName);\r
-#endif /*FILECONVERT1*/\r
-#ifdef FILECONVERT2\r
-void CALLBACK CLASS_DECLSPEC NkfFileConvert2(LPCSTR fInName,LPCSTR fOutName);\r
-#endif /*FILECONVERT2*/\r
-#ifndef NKF32103A\r
-/* safe */\r
-BOOL WINAPI CLASS_DECLSPEC GetNkfVersionSafeA(LPSTR verStr,DWORD nBufferLength /*in TCHARs*/,LPDWORD lpTCHARsReturned /*in TCHARs*/);\r
-BOOL WINAPI CLASS_DECLSPEC NkfConvertSafe(LPSTR outStr,DWORD nOutBufferLength /*in Bytes*/,LPDWORD lpBytesReturned /*in Bytes*/, LPCSTR inStr,DWORD nInBufferLength /*in Bytes*/);\r
-BOOL WINAPI CLASS_DECLSPEC ToZenkakuKanaSafe(LPSTR outStr,DWORD nOutBufferLength /*in Bytes*/,LPDWORD lpBytesReturned /*in Bytes*/,LPCSTR inStr,DWORD nInBufferLength /*in Bytes*/);\r
-BOOL WINAPI CLASS_DECLSPEC ToHankakuSafe(LPSTR outStr,DWORD nOutBufferLength /*in Bytes*/,LPDWORD lpBytesReturned /*in Bytes*/,LPCSTR inStr,DWORD nInBufferLength /*in Bytes*/);\r
-BOOL WINAPI CLASS_DECLSPEC EncodeSubjectSafe(LPSTR outStr,DWORD nOutBufferLength /*in Bytes*/,LPDWORD lpBytesReturned /*in Bytes*/,LPCSTR inStr,DWORD nInBufferLength /*in Bytes*/);\r
-BOOL WINAPI CLASS_DECLSPEC NkfFileConvert1SafeA(LPCSTR fName,DWORD nBufferLength /*in TCHARs*/);\r
-BOOL WINAPI CLASS_DECLSPEC NkfFileConvert2SafeA(LPCSTR fInName,DWORD fInBufferLength /*in TCHARs*/,LPCSTR fOutName,DWORD fOutBufferLength /*in TCHARs*/);\r
-BOOL WINAPI CLASS_DECLSPEC GetNkfGuessA(LPSTR outStr,DWORD nBufferLength /*in TCHARs*/,LPDWORD lpTCHARsReturned /*in TCHARs*/);\r
-\r
-BOOL WINAPI CLASS_DECLSPEC GetNkfVersionSafeW(LPWSTR verStr,DWORD nBufferLength /*in TCHARs*/,LPDWORD lpTCHARsReturned /*in TCHARs*/);\r
-BOOL WINAPI CLASS_DECLSPEC NkfFileConvert1SafeW(LPCWSTR fName,DWORD nBufferLength /*in TCHARs*/);\r
-BOOL WINAPI CLASS_DECLSPEC NkfFileConvert2SafeW(LPCWSTR fInName,DWORD fInBufferLength /*in TCHARs*/,LPCWSTR fOutName,DWORD fOutBufferLength /*in TCHARs*/);\r
-BOOL WINAPI CLASS_DECLSPEC GetNkfGuessW(LPWSTR outStr,DWORD nBufferLength /*in TCHARs*/,LPDWORD lpTCHARsReturned /*in TCHARs*/);\r
-BOOL WINAPI CLASS_DECLSPEC GetNkfSupportFunctions(void *outStr,DWORD nBufferLength /*in Bytes*/,LPDWORD lpBytesReturned /*in Bytes*/);\r
-BOOL WINAPI CLASS_DECLSPEC NkfUsage(LPSTR outStr,DWORD nBufferLength /*in Bytes*/,LPDWORD lpBytesReturned /*in Bytes*/);\r
-\r
-#ifdef UNICODE\r
-#define GetNkfVersionSafe GetNkfVersionSafeW\r
-#define GetNkfGuess GetNkfGuessW\r
-#define NkfFileConvert1Safe NkfFileConvert1SafeW\r
-#define NkfFileConvert2Safe NkfFileConvert2SafeW\r
-#else /*UNICODE*/\r
-#define GetNkfVersionSafe GetNkfVersionSafeA\r
-#define GetNkfGuess GetNkfGuessA\r
-#define NkfFileConvert1Safe NkfFileConvert1SafeA\r
-#define NkfFileConvert2Safe NkfFileConvert2SafeA\r
-#endif /*UNICODE*/\r
-\r
-struct NKFSUPPORTFUNCTIONS {\r
-DWORD size;\r
-LPCSTR copyrightA;\r
-LPCSTR versionA;\r
-LPCSTR dateA;\r
-DWORD functions;\r
-};\r
-#endif /*!defined(NKF32103A)*/\r
-\r
-#ifdef __cplusplus\r
-} // Balance extern "C" above\r
-#endif /*__cplusplus*/\r
-/* nkf32.dll nkf.h end */\r
+/*\r
+ * \r
+ * nkf.h - Header file for nkf\r
+ * \r
+ * $Id: nkf.h,v 1.2 2008/01/23 09:10:25 naruse Exp $\r
+ */\r
+\r
+\r
+#ifndef NKF_H\r
+\r
+/* Wrapper of configurations */\r
+\r
+#ifndef MIME_DECODE_DEFAULT\r
+#define MIME_DECODE_DEFAULT STRICT_MIME\r
+#endif\r
+#ifndef X0201_DEFAULT\r
+#define X0201_DEFAULT TRUE\r
+#endif\r
+\r
+#if DEFAULT_NEWLINE == 0x0D0A\r
+#define PUT_NEWLINE(func) do {\\r
+ func(0x0D);\\r
+ func(0x0A);\\r
+} while (0)\r
+#define OCONV_NEWLINE(func) do {\\r
+ func(0, 0x0D);\\r
+ func(0, 0x0A);\\r
+} while (0)\r
+#elif DEFAULT_NEWLINE == 0x0D\r
+#define PUT_NEWLINE(func) func(0x0D)\r
+#define OCONV_NEWLINE(func) func(0, 0x0D)\r
+#else\r
+#define DEFAULT_NEWLINE 0x0A\r
+#define PUT_NEWLINE(func) func(0x0A)\r
+#define OCONV_NEWLINE(func) func(0, 0x0A)\r
+#endif\r
+#ifdef HELP_OUTPUT_STDERR\r
+#define HELP_OUTPUT stderr\r
+#else\r
+#define HELP_OUTPUT stdout\r
+#endif\r
+\r
+\r
+/* Compatibility definitions */\r
+\r
+#ifdef nkf_char\r
+#elif defined(INT_IS_SHORT)\r
+typedef long nkf_char;\r
+#define NKF_INT32_C(n) (n##L)\r
+#else\r
+typedef int nkf_char;\r
+#define NKF_INT32_C(n) (n)\r
+#endif\r
+\r
+#if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__) || defined(__EMX__) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__DOS__) || defined(__OS2__)) && !defined(MSDOS)\r
+#define MSDOS\r
+#if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__)\r
+#define __WIN32__\r
+#endif\r
+#endif\r
+\r
+#ifdef PERL_XS\r
+#undef OVERWRITE\r
+#endif\r
+\r
+#ifndef PERL_XS\r
+#include <stdio.h>\r
+#endif\r
+\r
+#include <stdlib.h>\r
+#include <string.h>\r
+\r
+#if defined(MSDOS) || defined(__OS2__)\r
+#include <fcntl.h>\r
+#include <io.h>\r
+#if defined(_MSC_VER) || defined(__WATCOMC__)\r
+#define mktemp _mktemp\r
+#endif\r
+#endif\r
+\r
+#ifdef MSDOS\r
+#ifdef LSI_C\r
+#define setbinmode(fp) fsetbin(fp)\r
+#elif defined(__DJGPP__)\r
+#include <libc/dosio.h>\r
+void setbinmode(FILE *fp)\r
+{\r
+ /* we do not use libc's setmode(), which changes COOKED/RAW mode in device. */\r
+ int fd, m;\r
+ fd = fileno(fp);\r
+ m = (__file_handle_modes[fd] & (~O_TEXT)) | O_BINARY;\r
+ __file_handle_set(fd, m);\r
+}\r
+#else /* Microsoft C, Turbo C */\r
+#define setbinmode(fp) setmode(fileno(fp), O_BINARY)\r
+#endif\r
+#else /* UNIX */\r
+#define setbinmode(fp)\r
+#endif\r
+\r
+#ifdef _IOFBF /* SysV and MSDOS, Windows */\r
+#define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size)\r
+#else /* BSD */\r
+#define setvbuffer(fp, buf, size) setbuffer(fp, buf, size)\r
+#endif\r
+\r
+/*Borland C++ 4.5 EasyWin*/\r
+#if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */\r
+#define EASYWIN\r
+#ifndef __WIN16__\r
+#define __WIN16__\r
+#endif\r
+#include <windows.h>\r
+#endif\r
+\r
+#ifdef OVERWRITE\r
+/* added by satoru@isoternet.org */\r
+#if defined(__EMX__)\r
+#include <sys/types.h>\r
+#endif\r
+#include <sys/stat.h>\r
+#if !defined(MSDOS) || defined(__DJGPP__) /* UNIX, djgpp */\r
+#include <unistd.h>\r
+#if defined(__WATCOMC__)\r
+#include <sys/utime.h>\r
+#else\r
+#include <utime.h>\r
+#endif\r
+#else /* defined(MSDOS) */\r
+#ifdef __WIN32__\r
+#ifdef __BORLANDC__ /* BCC32 */\r
+#include <utime.h>\r
+#else /* !defined(__BORLANDC__) */\r
+#include <sys/utime.h>\r
+#endif /* (__BORLANDC__) */\r
+#else /* !defined(__WIN32__) */\r
+#if defined(_MSC_VER) || defined(__MINGW32__) || defined(__WATCOMC__) || defined(__OS2__) || defined(__EMX__) || defined(__IBMC__) || defined(__IBMCPP__) /* VC++, MinGW, Watcom, emx+gcc, IBM VAC++ */\r
+#include <sys/utime.h>\r
+#elif defined(__TURBOC__) /* BCC */\r
+#include <utime.h>\r
+#elif defined(LSI_C) /* LSI C */\r
+#endif /* (__WIN32__) */\r
+#endif\r
+#endif\r
+#endif\r
+\r
+#ifndef __WIN32__ /* not win32 is posix */\r
+#define HAVE_LANGINFO_H\r
+#define HAVE_LOCALE_H\r
+#endif\r
+\r
+#ifdef HAVE_LANGINFO_H\r
+#include <langinfo.h>\r
+#endif\r
+#ifdef HAVE_LOCALE_H\r
+#include <locale.h>\r
+#endif\r
+\r
+#define FALSE 0\r
+#define TRUE 1\r
+\r
+#ifdef WIN32DLL\r
+#include "nkf32.h"\r
+#endif\r
+\r
+#endif\r
--- /dev/null
+#ifndef NKF32_H\r
+#ifndef CLASS_DECLSPEC\r
+/* dll __declspec(dllexport) */\r
+/* app __declspec(dllimport) */\r
+#define CLASS_DECLSPEC\r
+#endif\r
+\r
+#ifdef __cplusplus\r
+extern "C" {\r
+#endif /* __cplusplus */\r
+#ifdef __BORLANDC__\r
+#pragma argsused\r
+#endif /*__BORLANDC__*/\r
+/* uminchu nkf32103a.lzh 1.00 */\r
+void CALLBACK CLASS_DECLSPEC GetNkfVersion(LPSTR verStr);\r
+int CALLBACK CLASS_DECLSPEC SetNkfOption(LPCSTR optStr);\r
+void CALLBACK CLASS_DECLSPEC NkfConvert(LPSTR outStr, LPCSTR inStr);\r
+/* uminchu nkf32103a.lzh 1.02 */\r
+void CALLBACK CLASS_DECLSPEC ToHankaku(LPSTR inStr);\r
+void CALLBACK CLASS_DECLSPEC ToZenkakuKana(LPSTR outStr ,LPCSTR inStr);\r
+/* uminchu nkf32103a.lzh 1.03 */\r
+void CALLBACK CLASS_DECLSPEC EncodeSubject(LPSTR outStr ,LPCSTR inStr);\r
+/* tkaneto nkf32204.zip 2.0.4.0 */\r
+#ifdef TOMIME\r
+void CALLBACK CLASS_DECLSPEC ToMime(LPSTR outStr ,LPCSTR inStr);\r
+#endif /*TOMIME*/\r
+#ifdef GETKANJICODE\r
+int CALLBACK CLASS_DECLSPEC NkfGetKanjiCode(VOID);\r
+#endif /*GETKANJICODE*/\r
+#ifdef FILECONVERT1\r
+void CALLBACK CLASS_DECLSPEC NkfFileConvert1(LPCSTR fName);\r
+#endif /*FILECONVERT1*/\r
+#ifdef FILECONVERT2\r
+void CALLBACK CLASS_DECLSPEC NkfFileConvert2(LPCSTR fInName,LPCSTR fOutName);\r
+#endif /*FILECONVERT2*/\r
+#ifndef NKF32103A\r
+/* safe */\r
+BOOL WINAPI CLASS_DECLSPEC GetNkfVersionSafeA(LPSTR verStr,DWORD nBufferLength /*in TCHARs*/,LPDWORD lpTCHARsReturned /*in TCHARs*/);\r
+BOOL WINAPI CLASS_DECLSPEC NkfConvertSafe(LPSTR outStr,DWORD nOutBufferLength /*in Bytes*/,LPDWORD lpBytesReturned /*in Bytes*/, LPCSTR inStr,DWORD nInBufferLength /*in Bytes*/);\r
+BOOL WINAPI CLASS_DECLSPEC ToZenkakuKanaSafe(LPSTR outStr,DWORD nOutBufferLength /*in Bytes*/,LPDWORD lpBytesReturned /*in Bytes*/,LPCSTR inStr,DWORD nInBufferLength /*in Bytes*/);\r
+BOOL WINAPI CLASS_DECLSPEC ToHankakuSafe(LPSTR outStr,DWORD nOutBufferLength /*in Bytes*/,LPDWORD lpBytesReturned /*in Bytes*/,LPCSTR inStr,DWORD nInBufferLength /*in Bytes*/);\r
+BOOL WINAPI CLASS_DECLSPEC EncodeSubjectSafe(LPSTR outStr,DWORD nOutBufferLength /*in Bytes*/,LPDWORD lpBytesReturned /*in Bytes*/,LPCSTR inStr,DWORD nInBufferLength /*in Bytes*/);\r
+BOOL WINAPI CLASS_DECLSPEC NkfFileConvert1SafeA(LPCSTR fName,DWORD nBufferLength /*in TCHARs*/);\r
+BOOL WINAPI CLASS_DECLSPEC NkfFileConvert2SafeA(LPCSTR fInName,DWORD fInBufferLength /*in TCHARs*/,LPCSTR fOutName,DWORD fOutBufferLength /*in TCHARs*/);\r
+BOOL WINAPI CLASS_DECLSPEC GetNkfGuessA(LPSTR outStr,DWORD nBufferLength /*in TCHARs*/,LPDWORD lpTCHARsReturned /*in TCHARs*/);\r
+\r
+BOOL WINAPI CLASS_DECLSPEC GetNkfVersionSafeW(LPWSTR verStr,DWORD nBufferLength /*in TCHARs*/,LPDWORD lpTCHARsReturned /*in TCHARs*/);\r
+BOOL WINAPI CLASS_DECLSPEC NkfFileConvert1SafeW(LPCWSTR fName,DWORD nBufferLength /*in TCHARs*/);\r
+BOOL WINAPI CLASS_DECLSPEC NkfFileConvert2SafeW(LPCWSTR fInName,DWORD fInBufferLength /*in TCHARs*/,LPCWSTR fOutName,DWORD fOutBufferLength /*in TCHARs*/);\r
+BOOL WINAPI CLASS_DECLSPEC GetNkfGuessW(LPWSTR outStr,DWORD nBufferLength /*in TCHARs*/,LPDWORD lpTCHARsReturned /*in TCHARs*/);\r
+BOOL WINAPI CLASS_DECLSPEC GetNkfSupportFunctions(void *outStr,DWORD nBufferLength /*in Bytes*/,LPDWORD lpBytesReturned /*in Bytes*/);\r
+BOOL WINAPI CLASS_DECLSPEC NkfUsage(LPSTR outStr,DWORD nBufferLength /*in Bytes*/,LPDWORD lpBytesReturned /*in Bytes*/);\r
+\r
+#ifdef UNICODE\r
+#define GetNkfVersionSafe GetNkfVersionSafeW\r
+#define GetNkfGuess GetNkfGuessW\r
+#define NkfFileConvert1Safe NkfFileConvert1SafeW\r
+#define NkfFileConvert2Safe NkfFileConvert2SafeW\r
+#else /*UNICODE*/\r
+#define GetNkfVersionSafe GetNkfVersionSafeA\r
+#define GetNkfGuess GetNkfGuessA\r
+#define NkfFileConvert1Safe NkfFileConvert1SafeA\r
+#define NkfFileConvert2Safe NkfFileConvert2SafeA\r
+#endif /*UNICODE*/\r
+\r
+struct NKFSUPPORTFUNCTIONS {\r
+DWORD size;\r
+LPCSTR copyrightA;\r
+LPCSTR versionA;\r
+LPCSTR dateA;\r
+DWORD functions;\r
+};\r
+#endif /*!defined(NKF32103A)*/\r
+\r
+#ifdef __cplusplus\r
+} // Balance extern "C" above\r
+#endif /*__cplusplus*/\r
+#endif\r
+/*
+ * utf8tbl.c - Convertion Table for nkf
+ *
+ * $Id: utf8tbl.c,v 1.22 2008/01/23 09:10:25 naruse Exp $
+ */
+
#include "config.h"
+#include "utf8tbl.h"
#ifdef UTF8_OUTPUT_ENABLE
static const unsigned short euc_to_utf8_A1[] = {
+/*
+ * utf8tbl.h - Header file for Convertion Table
+ *
+ * $Id: utf8tbl.h,v 1.3 2008/01/23 09:10:25 naruse Exp $
+ */
+
#ifndef _UTF8TBL_H_
#define _UTF8TBL_H_
#ifdef UTF8_OUTPUT_ENABLE
+#define sizeof_euc_to_utf8_1byte 94
+#define sizeof_euc_to_utf8_2bytes 94
extern const unsigned short euc_to_utf8_1byte[];
extern const unsigned short *const euc_to_utf8_2bytes[];
extern const unsigned short *const euc_to_utf8_2bytes_ms[];
#endif /* UTF8_OUTPUT_ENABLE */
#ifdef UTF8_INPUT_ENABLE
+#define sizeof_utf8_to_euc_C2 64
+#define sizeof_utf8_to_euc_E5B8 64
+#define sizeof_utf8_to_euc_2bytes 112
+#define sizeof_utf8_to_euc_3bytes 16
extern const unsigned short *const utf8_to_euc_2bytes[];
extern const unsigned short *const utf8_to_euc_2bytes_ms[];
extern const unsigned short *const utf8_to_euc_2bytes_932[];
#endif /* UTF8_INPUT_ENABLE */
#ifdef UNICODE_NORMALIZATION
+
+#define NORMALIZATION_TABLE_LENGTH 942
+#define NORMALIZATION_TABLE_NFC_LENGTH 3
+#define NORMALIZATION_TABLE_NFD_LENGTH 9
+struct normalization_pair {
+ const unsigned char nfc[NORMALIZATION_TABLE_NFC_LENGTH];
+ const unsigned char nfd[NORMALIZATION_TABLE_NFD_LENGTH];
+};
extern const struct normalization_pair normalization_table[];
#endif
#ifdef SHIFTJIS_CP932
+#define CP932_TABLE_BEGIN 0xFA
+#define CP932_TABLE_END 0xFC
extern const unsigned short shiftjis_cp932[3][189];
+#define CP932INV_TABLE_BEGIN 0xED
+#define CP932INV_TABLE_END 0xEE
extern const unsigned short cp932inv[2][189];
#endif /* SHIFTJIS_CP932 */