* \e$B8=:_!"\e(Bnkf \e$B$O\e(B SorceForge \e$B$K$F%a%s%F%J%s%9$,B3$1$i$l$F$$$^$9!#\e(B
* http://sourceforge.jp/projects/nkf/
***********************************************************************/
-/* $Id: nkf.c,v 1.164 2008/01/21 23:05:37 naruse Exp $ */
+/* $Id: nkf.c,v 1.165 2008/01/22 00:30:05 naruse Exp $ */
#define NKF_VERSION "2.0.8"
#define NKF_RELEASE_DATE "2008-01-21"
#define COPY_RIGHT \
#include "config.h"
#include "utf8tbl.h"
-#if defined(DEFAULT_CODE_JIS)
-#elif defined(DEFAULT_CODE_SJIS)
-#elif defined(DEFAULT_CODE_EUC)
-#elif defined(DEFAULT_CODE_UTF8)
-#else
-#define DEFAULT_CODE_JIS 1
-#endif
-
#ifndef MIME_DECODE_DEFAULT
#define MIME_DECODE_DEFAULT STRICT_MIME
#endif
#endif
#endif
+
+#ifndef __WIN32__
+#define HAVE_LANGINFO_H
+#define HAVE_LOCALE_H
+#endif
+
+#ifdef HAVE_LANGINFO_H
+#include <langinfo.h>
+#endif
+#ifdef HAVE_LOCALE_H
+#include <locale.h>
+#endif
+
#define FALSE 0
#define TRUE 1
};
#if defined(DEFAULT_CODE_JIS)
-#define DEFAULT_ENCODING ISO_2022_JP
+#define DEFAULT_ENCIDX ISO_2022_JP
#elif defined(DEFAULT_CODE_SJIS)
-#define DEFAULT_ENCODING SHIFT_JIS
+#define DEFAULT_ENCIDX SHIFT_JIS
#elif defined(DEFAULT_CODE_EUC)
-#define DEFAULT_ENCODING EUC_JP
+#define DEFAULT_ENCIDX EUC_JP
#elif defined(DEFAULT_CODE_UTF8)
-#define DEFAULT_ENCODING UTF_8
+#define DEFAULT_ENCIDX UTF_8
#endif
static int fold_margin = FOLD_MARGIN;
-/* converters */
-
-#ifdef DEFAULT_CODE_JIS
-# define DEFAULT_CONV j_oconv
-#endif
-#ifdef DEFAULT_CODE_SJIS
-# define DEFAULT_CONV s_oconv
-#endif
-#ifdef DEFAULT_CODE_EUC
-# define DEFAULT_CONV e_oconv
-#endif
-#ifdef DEFAULT_CODE_UTF8
-# define DEFAULT_CONV w_oconv
-#endif
-
/* process default */
static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
nkf_enc_to_index(enc) == CP50221 ||\
nkf_enc_to_index(enc) == CP50222)
+#ifndef DEFAULT_ENCIDX
+static char* nkf_locale_charmap()
+{
+#ifdef HAVE_LANGINFO_H
+ return nl_langinfo(CODESET);
+#elif defined(__WIN32__)
+ return sprintf("CP%d", GetACP());
+#else
+ return NULL;
+#endif
+}
+
+static nkf_encoding* nkf_locale_encoding()
+{
+ nkf_encoding *enc = 0;
+ char *encname = nkf_locale_charmap();
+ if (encname)
+ enc = nkf_enc_find(encname);
+ if (enc < 0) enc = 0;
+ return enc;
+}
+#endif
+
+static nkf_encoding* nkf_default_encoding()
+{
+#ifdef DEFAULT_ENCIDX
+ return nkf_enc_from_index(DEFAULT_ENCIDX);
+#else
+ nkf_encoding *enc = nkf_locale_encoding();
+ if (enc <= 0) enc = nkf_enc_from_index(ISO_2022_JP);
+ return enc;
+#endif
+}
+
#ifdef WIN32DLL
#include "nkf32dll.c"
#elif defined(PERL_XS)
#ifdef EASYWIN /*Easy Win */
_BufferSize.y = 400;/*Set Scroll Buffer Size*/
#endif
+ setlocale(LC_CTYPE, "");
for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
cp = (unsigned char *)*argv;
#endif
x0213_f = TRUE;
break;
+ case SHIFT_JIS:
+ break;
case WINDOWS_31J:
#ifdef SHIFTJIS_CP932
cp51932_f = TRUE;
ms_ucs_map_f = UCS_MAP_CP932;
#endif
break;
+ case EUC_JP:
+ break;
case CP10001:
#ifdef SHIFTJIS_CP932
cp51932_f = TRUE;
static void set_output_encoding(nkf_encoding *enc)
{
- x0201_f = FALSE;
switch (nkf_enc_to_index(enc)) {
case CP50220:
x0201_f = TRUE;
#ifdef SHIFTJIS_CP932
- cp932inv_f = FALSE;
+ if (cp932inv_f == TRUE) cp932inv_f = FALSE;
#endif
#ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_CP932;
break;
case CP50221:
#ifdef SHIFTJIS_CP932
- cp932inv_f = FALSE;
+ if (cp932inv_f == TRUE) cp932inv_f = FALSE;
#endif
#ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_CP932;
x0212_f = TRUE;
#endif
#ifdef SHIFTJIS_CP932
- cp932inv_f = FALSE;
+ if (cp932inv_f == TRUE) cp932inv_f = FALSE;
#endif
break;
case ISO_2022_JP_3:
#endif
x0213_f = TRUE;
#ifdef SHIFTJIS_CP932
- cp932inv_f = FALSE;
+ if (cp932inv_f == TRUE) cp932inv_f = FALSE;
#endif
break;
+ case SHIFT_JIS:
+ break;
case WINDOWS_31J:
#ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_CP932;
ms_ucs_map_f = UCS_MAP_CP10001;
#endif
break;
+ case EUC_JP:
+ x0212_f = TRUE;
+#ifdef SHIFTJIS_CP932
+ if (cp932inv_f == TRUE) cp932inv_f = FALSE;
+#endif
+#ifdef UTF8_OUTPUT_ENABLE
+ ms_ucs_map_f = UCS_MAP_CP932;
+#endif
+ break;
case CP51932:
#ifdef SHIFTJIS_CP932
- cp932inv_f = FALSE;
+ if (cp932inv_f == TRUE) cp932inv_f = FALSE;
#endif
#ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_CP932;
case SHIFT_JIS_2004:
x0213_f = TRUE;
#ifdef SHIFTJIS_CP932
- cp932inv_f = FALSE;
+ if (cp932inv_f == TRUE) cp932inv_f = FALSE;
#endif
break;
case EUC_JISX0213:
#endif
x0213_f = TRUE;
#ifdef SHIFTJIS_CP932
- cp932inv_f = FALSE;
+ if (cp932inv_f == TRUE) cp932inv_f = FALSE;
#endif
break;
#ifdef UTF8_OUTPUT_ENABLE
enc = nkf_enc_find(codeset);
if (!enc) continue;
input_encoding = enc;
- set_input_encoding(enc);
continue;
}
if (strcmp(long_option[i].name, "oc=") == 0){
enc = nkf_enc_find(codeset);
if (enc <= 0) continue;
output_encoding = enc;
- set_output_encoding(output_encoding);
continue;
}
if (strcmp(long_option[i].name, "guess=") == 0){
if (strcmp(long_option[i].name, "cp932") == 0){
#ifdef SHIFTJIS_CP932
cp51932_f = TRUE;
- cp932inv_f = TRUE;
+ cp932inv_f = -TRUE;
#endif
#ifdef UTF8_OUTPUT_ENABLE
ms_ucs_map_f = UCS_MAP_CP932;
}
#ifdef SHIFTJIS_CP932
if (strcmp(long_option[i].name, "cp932inv") == 0){
- cp932inv_f = TRUE;
+ cp932inv_f = -TRUE;
continue;
}
#endif
output_encoding = nkf_enc_from_index(ISO_2022_JP);
continue;
case 'e': /* AT&T EUC output */
- cp932inv_f = FALSE;
output_encoding = nkf_enc_from_index(EUC_JP);
continue;
case 's': /* SJIS output */
void module_connection(void)
{
+ if (input_encoding) set_input_encoding(input_encoding);
if (!output_encoding) {
- output_encoding = nkf_enc_from_index(DEFAULT_ENCODING);
- set_output_encoding(output_encoding);
+ output_encoding = nkf_default_encoding();
}
+ set_output_encoding(output_encoding);
oconv = nkf_enc_to_oconv(output_encoding);
o_putc = std_putc;
if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
/* CP932 UDC */
c1 &= 0xFFF;
- c2 = c1 / 188 + 0xF0;
+ c2 = c1 / 188 + (cp932inv_f ? 0xF0 : 0xEB);
c1 = c1 % 188;
c1 += 0x40 + (c1 > 0x3e);
(*o_putc)(c2);
kanji_intro = DEFAULT_J;
ascii_intro = DEFAULT_R;
fold_margin = FOLD_MARGIN;
- oconv = DEFAULT_CONV;
o_zconv = no_connection;
o_fconv = no_connection;
o_nlconv = no_connection;
fprintf(HELP_OUTPUT,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n");
fprintf(HELP_OUTPUT,"Flags:\n");
fprintf(HELP_OUTPUT,"b,u Output is buffered (DEFAULT),Output is unbuffered\n");
-#ifdef DEFAULT_CODE_SJIS
- fprintf(HELP_OUTPUT,"j,s,e,w Output code is JIS 7 bit, Shift_JIS (DEFAULT), EUC-JP, UTF-8N\n");
-#endif
-#ifdef DEFAULT_CODE_JIS
- fprintf(HELP_OUTPUT,"j,s,e,w Output code is JIS 7 bit (DEFAULT), Shift JIS, EUC-JP, UTF-8N\n");
-#endif
-#ifdef DEFAULT_CODE_EUC
- fprintf(HELP_OUTPUT,"j,s,e,w Output code is JIS 7 bit, Shift JIS, EUC-JP (DEFAULT), UTF-8N\n");
-#endif
-#ifdef DEFAULT_CODE_UTF8
- fprintf(HELP_OUTPUT,"j,s,e,w Output code is JIS 7 bit, Shift JIS, EUC-JP, UTF-8N (DEFAULT)\n");
-#endif
+ fprintf(HELP_OUTPUT,"j,s,e,w Output code is ISO-2022-JP, Shift JIS, EUC-JP, UTF-8N\n");
#ifdef UTF8_OUTPUT_ENABLE
fprintf(HELP_OUTPUT," After 'w' you can add more options. -w[ 8 [0], 16 [[BL] [0]] ]\n");
#endif
fprintf(HELP_OUTPUT, "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n");
fprintf(HELP_OUTPUT, " Compile-time options:\n");
fprintf(HELP_OUTPUT, " Default output encoding: "
-#if defined(DEFAULT_CODE_JIS)
- "ISO-2022-JP"
-#elif defined(DEFAULT_CODE_SJIS)
- "Shift_JIS"
-#elif defined(DEFAULT_CODE_EUC)
- "EUC-JP"
-#elif defined(DEFAULT_CODE_UTF8)
- "UTF-8"
+#ifdef DEFAULT_ENCIDX
+ "%s\n", nkf_enc_name(nkf_default_encoding())
+#else
+ "%s (%s)\n", nkf_locale_encoding() ? "LOCALE" : "DEFAULT",
+ nkf_enc_name(nkf_default_encoding())
#endif
- "\n");
+ );
fprintf(HELP_OUTPUT, " Default output newline: "
#if DEFAULT_NEWLINE == CR
"CR"