X-Git-Url: http://git.sourceforge.jp/view?p=nkf%2Fnkf.git;a=blobdiff_plain;f=nkf.c;h=b58c437d3cc0da1fd87a69c19feec8d0e6fe595f;hp=b9a38d9b3febd203bf8f216b725fe31f4d50b707;hb=9fd04d0dfbeff6ac8cb877b9853fb09af6811f33;hpb=7b58d086aa03d56b6a2ab3023dbd20ef433f79a3
diff --git a/nkf.c b/nkf.c
index b9a38d9..b58c437 100644
--- a/nkf.c
+++ b/nkf.c
@@ -1,41 +1,30 @@
-/** Network Kanji Filter. (PDS Version)
- ** -*- coding: ISO-2022-JP -*-
- ************************************************************************
- ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA)
- ** 連絡先: (株)富士通研究所 ソフト3研 市川 至
- ** (E-Mail Address: ichikawa@flab.fujitsu.co.jp)
- ** Copyright (C) 1996,1998
- ** Copyright (C) 2002
- ** 連絡先: 琉球大学情報工学科 河野 真治 mime/X0208 support
- ** (E-Mail Address: kono@ie.u-ryukyu.ac.jp)
- ** 連絡先: COW for DOS & Win16 & Win32 & OS/2
- ** (E-Mail Address: GHG00637@niftyserve.or.p)
- **
- ** このソースのいかなる複写,改変,修正も許諾します。ただし、
- ** その際には、誰が貢献したを示すこの部分を残すこと。
- ** 再配布や雑誌の付録などの問い合わせも必要ありません。
- ** 営利利用も上記に反しない範囲で許可します。
- ** バイナリの配布の際にはversion messageを保存することを条件とします。
- ** このプログラムについては特に何の保証もしない、悪しからず。
- **
- ** Everyone is permitted to do anything on this program
- ** including copying, modifying, improving,
- ** as long as you don't try to pretend that you wrote it.
- ** i.e., the above copyright notice has to appear in all copies.
- ** Binary distribution requires original version messages.
- ** You don't have to ask before copying, redistribution or publishing.
- ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
- ***********************************************************************/
-
-/***********************************************************************
- * 現在、nkf は SorceForge にてメンテナンスが続けられています。
- * http://sourceforge.jp/projects/nkf/
- ***********************************************************************/
-#define NKF_VERSION "2.0.8"
-#define NKF_RELEASE_DATE "2009-01-05"
+/*
+ * Copyright (c) 1987, Fujitsu LTD. (Itaru ICHIKAWA).
+ * Copyright (c) 1996-2013, The nkf Project.
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ *
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ *
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+#define NKF_VERSION "2.1.4"
+#define NKF_RELEASE_DATE "2015-12-12"
#define COPY_RIGHT \
- "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW\n" \
- "Copyright (C) 2002-2009 Kono, Furukawa, Naruse, mastodon"
+ "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa).\n" \
+ "Copyright (C) 1996-2015, The nkf Project."
#include "config.h"
#include "nkf.h"
@@ -221,6 +210,8 @@ struct {
} encoding_name_to_id_table[] = {
{"US-ASCII", ASCII},
{"ASCII", ASCII},
+ {"646", ASCII},
+ {"ROMAN8", ASCII},
{"ISO-2022-JP", ISO_2022_JP},
{"ISO2022JP-CP932", CP50220},
{"CP50220", CP50220},
@@ -232,6 +223,8 @@ struct {
{"ISO-2022-JP-2004", ISO_2022_JP_2004},
{"SHIFT_JIS", SHIFT_JIS},
{"SJIS", SHIFT_JIS},
+ {"MS_Kanji", SHIFT_JIS},
+ {"PCK", SHIFT_JIS},
{"WINDOWS-31J", WINDOWS_31J},
{"CSWINDOWS31J", WINDOWS_31J},
{"CP932", WINDOWS_31J},
@@ -306,7 +299,7 @@ struct {
&& (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
#define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
-#define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c < (0xE0&0x7F))
+#define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c <= 0x5F)
#define HOLD_SIZE 1024
#if defined(INT_IS_SHORT)
@@ -363,6 +356,7 @@ static int no_cp932ext_f = FALSE;
/* ignore ZERO WIDTH NO-BREAK SPACE */
static int no_best_fit_chars_f = FALSE;
static int input_endian = ENDIAN_BIG;
+static int input_bom_f = FALSE;
static nkf_char unicode_subchar = '?'; /* the regular substitution character */
static void (*encode_fallback)(nkf_char c) = NULL;
static void w_status(struct input_code *, nkf_char);
@@ -390,6 +384,8 @@ static unsigned char stdibuf[IOBUF_SIZE];
static unsigned char stdobuf[IOBUF_SIZE];
#endif
+#define NKF_UNSPECIFIED (-TRUE)
+
/* flags */
static int unbuf_f = FALSE;
static int estab_f = FALSE;
@@ -404,7 +400,7 @@ static int mimebuf_f = FALSE; /* MIME buffered input */
static int broken_f = FALSE; /* convert ESC-less broken JIS */
static int iso8859_f = FALSE; /* ISO8859 through */
static int mimeout_f = FALSE; /* base64 mode */
-static int x0201_f = X0201_DEFAULT; /* convert JIS X 0201 */
+static int x0201_f = NKF_UNSPECIFIED; /* convert JIS X 0201 */
static int iso2022jp_f = FALSE; /* replace non ISO-2022-JP with GETA */
#ifdef UNICODE_NORMALIZATION
@@ -435,6 +431,8 @@ static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
#define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= UNICODE_BMP_MAX)
#define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= UNICODE_MAX)
+#define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00))
+
#ifdef NUMCHAR_OPTION
static int numchar_f = FALSE;
static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
@@ -479,8 +477,10 @@ struct input_code input_code_list[] = {
{"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
#ifdef UTF8_INPUT_ENABLE
{"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
+ {"UTF-16", 0, 0, 0, {0, 0, 0}, NULL, w_iconv16, 0},
+ {"UTF-32", 0, 0, 0, {0, 0, 0}, NULL, w_iconv32, 0},
#endif
- {0}
+ {NULL, 0, 0, 0, {0, 0, 0}, NULL, NULL, 0}
};
static int mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
@@ -509,10 +509,10 @@ static int fold_margin = FOLD_MARGIN;
/* process default */
static nkf_char
-no_connection2(nkf_char c2, nkf_char c1, nkf_char c0)
+no_connection2(ARG_UNUSED nkf_char c2, ARG_UNUSED nkf_char c1, ARG_UNUSED nkf_char c0)
{
fprintf(stderr,"nkf internal module connection failure.\n");
- exit(1);
+ exit(EXIT_FAILURE);
return 0; /* LINT */
}
@@ -623,6 +623,27 @@ static const unsigned char ev[]= {
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00};
+/* X0201 kana to X0213 conversion table for han-daguten */
+/* 90-9F A0-DF */
+static const unsigned char ev_x0213[]= {
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x25,0x77,0x25,0x78,
+ 0x25,0x79,0x25,0x7a,0x25,0x7b,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x25,0x7c,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x25,0x7d,0x00,0x00,
+ 0x25,0x7e,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00};
+
/* X0208 kigou conversion table */
/* 0x8140 - 0x819e */
@@ -660,10 +681,6 @@ static nkf_char prev_cr = 0; /* CR or 0 */
static int end_check;
#endif /*Easy Win */
-#define STD_GC_BUFSIZE (256)
-nkf_char std_gc_buf[STD_GC_BUFSIZE];
-nkf_char std_gc_ndx;
-
static void *
nkf_xmalloc(size_t size)
{
@@ -757,7 +774,7 @@ nkf_enc_find(const char *name)
#ifdef DEFAULT_CODE_LOCALE
static const char*
-nkf_locale_charmap()
+nkf_locale_charmap(void)
{
#ifdef HAVE_LANGINFO_H
return nl_langinfo(CODESET);
@@ -785,7 +802,7 @@ nkf_locale_charmap()
}
static nkf_encoding*
-nkf_locale_encoding()
+nkf_locale_encoding(void)
{
nkf_encoding *enc = 0;
const char *encname = nkf_locale_charmap();
@@ -796,13 +813,13 @@ nkf_locale_encoding()
#endif /* DEFAULT_CODE_LOCALE */
static nkf_encoding*
-nkf_utf8_encoding()
+nkf_utf8_encoding(void)
{
return &nkf_encoding_table[UTF_8];
}
static nkf_encoding*
-nkf_default_encoding()
+nkf_default_encoding(void)
{
nkf_encoding *enc = 0;
#ifdef DEFAULT_CODE_LOCALE
@@ -817,30 +834,32 @@ nkf_default_encoding()
typedef struct {
long capa;
long len;
- unsigned char *ptr;
+ nkf_char *ptr;
} nkf_buf_t;
static nkf_buf_t *
nkf_buf_new(int length)
{
nkf_buf_t *buf = nkf_xmalloc(sizeof(nkf_buf_t));
- buf->ptr = nkf_xmalloc(length);
+ buf->ptr = nkf_xmalloc(sizeof(nkf_char) * length);
buf->capa = length;
buf->len = 0;
return buf;
-}
+}
+#if 0
static void
nkf_buf_dispose(nkf_buf_t *buf)
{
nkf_xfree(buf->ptr);
nkf_xfree(buf);
}
+#endif
#define nkf_buf_length(buf) ((buf)->len)
#define nkf_buf_empty_p(buf) ((buf)->len == 0)
-static unsigned char
+static nkf_char
nkf_buf_at(nkf_buf_t *buf, int index)
{
assert(index <= buf->len);
@@ -850,17 +869,19 @@ nkf_buf_at(nkf_buf_t *buf, int index)
static void
nkf_buf_clear(nkf_buf_t *buf)
{
- buf->ptr = 0;
+ buf->len = 0;
}
static void
-nkf_buf_push(nkf_buf_t *buf, unsigned char c)
+nkf_buf_push(nkf_buf_t *buf, nkf_char c)
{
- assert(buf->capa > buf->len);
+ if (buf->capa <= buf->len) {
+ exit(EXIT_FAILURE);
+ }
buf->ptr[buf->len++] = c;
}
-static unsigned char
+static nkf_char
nkf_buf_pop(nkf_buf_t *buf)
{
assert(!nkf_buf_empty_p(buf));
@@ -883,79 +904,61 @@ static void
usage(void)
{
fprintf(HELP_OUTPUT,
- "USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n"
- "Flags:\n"
- "b,u Output is buffered (DEFAULT),Output is unbuffered\n"
- "j,s,e,w Output code is ISO-2022-JP, Shift JIS, EUC-JP, UTF-8N\n"
+ "Usage: nkf -[flags] [--] [in file] .. [out file for -O flag]\n"
#ifdef UTF8_OUTPUT_ENABLE
- " After 'w' you can add more options. -w[ 8 [0], 16 [[BL] [0]] ]\n"
+ " j/s/e/w Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
+ " UTF options is -w[8[0],{16,32}[{B,L}[0]]]\n"
+#else
#endif
- "J,S,E,W Input assumption is JIS 7 bit , Shift JIS, EUC-JP, UTF-8\n"
#ifdef UTF8_INPUT_ENABLE
- " After 'W' you can add more options. -W[ 8, 16 [BL] ] \n"
+ " J/S/E/W Specify input encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
+ " UTF option is -W[8,[16,32][B,L]]\n"
+#else
+ " J/S/E Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
#endif
- "t no conversion\n"
);
fprintf(HELP_OUTPUT,
- "i[@B] Specify the Esc Seq for JIS X 0208-1978/83 (DEFAULT B)\n"
- "o[BJH] Specify the Esc Seq for ASCII/Roman (DEFAULT B)\n"
- "r {de/en}crypt ROT13/47\n"
- "h 1 katakana->hiragana, 2 hiragana->katakana, 3 both\n"
- "m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:non-strict,0:no decode]\n"
- "M[BQ] MIME encode [B:base64 Q:quoted]\n"
- "l ISO8859-1 (Latin-1) support\n"
- "f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
+ " m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:nonstrict,0:no decode]\n"
+ " M[BQ] MIME encode [B:base64 Q:quoted]\n"
+ " f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
);
fprintf(HELP_OUTPUT,
- "Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n"
- " 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n"
- " 4: JISX0208 Katakana to JISX0201 Katakana\n"
- "X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n"
- "B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n"
+ " Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n"
+ " 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n"
+ " 4: JISX0208 Katakana to JISX0201 Katakana\n"
+ " X,x Convert Halfwidth Katakana to Fullwidth or preserve it\n"
);
fprintf(HELP_OUTPUT,
-#ifdef MSDOS
- "T Text mode output\n"
-#endif
- "O Output to File (DEFAULT 'nkf.out')\n"
- "I Convert non ISO-2022-JP charactor to GETA\n"
- "d,c Convert line breaks -d: LF -c: CRLF\n"
- "-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
- "v, V Show this usage. V: show configuration\n"
- "\n");
+ " O Output to File (DEFAULT 'nkf.out')\n"
+ " L[uwm] Line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
+ );
fprintf(HELP_OUTPUT,
- "Long name options\n"
- " --ic= --oc=