NKF.xs must follow nkf.c doesn't have WISH_TRUE AND NO_X0201.

[nkf/nkf.git] / NKF.mod / NKF.pm
diff --git a/NKF.mod/NKF.pm b/NKF.mod/NKF.pm

index 3a52d5b..e7f39f3 100644 (file)
--- a/NKF.mod/NKF.pm
+++ b/NKF.mod/NKF.pm
@@ -1,25 +1,24 @@
-## Copyright (C) 1996,1998
-## Copyright (C) 2002
-## Ï¢ÍíÀè¡§ Î°µåÂç³Ø¾ðÊó¹©³Ø²Ê ²ÏÌî ¿¿¼£  mime/X0208 support
-## ¡ÊE-Mail Address: kono@ie.u-ryukyu.ac.jp¡Ë
-## Ï¢ÍíÀè¡§ COW for DOS & Win16 & Win32 & OS/2
-## ¡ÊE-Mail Address: GHG00637@niftyserve.or.p¡Ë
-##    
-##    ¤³¤Î¥½¡¼¥¹¤Î¤¤¤«¤Ê¤ëÊ£¼Ì¡¤²þÊÑ¡¤½¤Àµ¤âµöÂú¤·¤Þ¤¹¡£¤¿¤À¤·¡¢
-##    ¤½¤ÎºÝ¤Ë¤Ï¡¢Ã¯¤¬¹×¸¥¤·¤¿¤ò¼¨¤¹¤³¤ÎÉôÊ¬¤ò»Ä¤¹¤³¤È¡£
-##    ºÆÇÛÉÛ¤ä»¨»ï¤ÎÉÕÏ¿¤Ê¤É¤ÎÌä¤¤¹ç¤ï¤»¤âÉ¬Í×¤¢¤ê¤Þ¤»¤ó¡£
-##    ±ÄÍøÍøÍÑ¤â¾åµ¤ËÈ¿¤·¤Ê¤¤ÈÏ°Ï¤Çµö²Ä¤·¤Þ¤¹¡£
-##    ¥Ð¥¤¥Ê¥ê¤ÎÇÛÉÛ¤ÎºÝ¤Ë¤Ïversion message¤òÊÝÂ¸¤¹¤ë¤³¤È¤ò¾ò·ï¤È¤·¤Þ¤¹¡£
-##    ¤³¤Î¥×¥í¥°¥é¥à¤Ë¤Ä¤¤¤Æ¤ÏÆÃ¤Ë²¿¤ÎÊÝ¾Ú¤â¤·¤Ê¤¤¡¢°¤·¤«¤é¤º¡£
-##    
-##    Everyone is permitted to do anything on this program
-##    including copying, modifying, improving, 
-##    as long as you don't try to pretend that you wrote it.
-##    i.e., the above copyright notice has to appear in all copies.  
-##    Binar y distribution requires original version messages.
-##    You don't have to ask before copying, redistribution or publishing.
-##    THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE.
-
+# Copyright (c) 1987, Fujitsu LTD. (Itaru ICHIKAWA).
+# Copyright (c) 1996-2009, The nkf Project.
+# All rights reserved.
+#
+# This software is provided 'as-is', without any express or implied
+# warranty. In no event will the authors be held liable for any damages
+# arising from the use of this software.
+#
+# Permission is granted to anyone to use this software for any purpose,
+# including commercial applications, and to alter it and redistribute it
+# freely, subject to the following restrictions:
+#
+# 1. The origin of this software must not be misrepresented; you must not
+# claim that you wrote the original software. If you use this software
+# in a product, an acknowledgment in the product documentation would be
+# appreciated but is not required.
+#
+# 2. Altered source versions must be plainly marked as such, and must not be
+# misrepresented as being the original software.
+#
+# 3. This notice may not be removed or altered from any source distribution.
  
  package NKF;
  
@@ -36,7 +35,7 @@ require DynaLoader;
  @EXPORT = qw(
         nkf     nkf_continue    inputcode
  );
-$VERSION = '2.06';
+$VERSION = '2.09';
  
  bootstrap NKF $VERSION;
  
@@ -46,29 +45,53 @@ bootstrap NKF $VERSION;
  
  1;
  __END__
-# Below is the stub of documentation for your module. You better edit it!
+
+#
+# =begin ¤«¤é =begin COMMAND ¤Þ¤Ç¤Ï Perl/NKF ¤Î¥É¥¥å¥á¥ó¥È
+# =begin COMMAND ¤«¤é =end ¤Þ¤Ç¤Ï nkf ¥³¥Þ¥ó¥É¤Î¥É¥¥å¥á¥ó¥È
+# 
  
  =head1 NAME
  
+=begin
+
  NKF - Perl extension for Network Kanji Filter
  
+=begin COMMAND
+
+nkf - Network Kanji Filter
+
+=end
+
  =head1 SYNOPSIS
  
+=begin
+
    use NKF;
    $output = nkf("-s",$input);
  
+=begin COMMAND
+
+nkf B<[-butjnesliohrTVvwWJESZxXFfmMBOcdILg]> B<[>I<file ...>B<]>
+
+=end
+
  =head1 DESCRIPTION
  
+=begin
+
  This is a Perl Extension version of nkf (Netowrk Kanji Filter).
  It converts the last argument and return converted result. Conversion
  details are specified by flags before the last argument.
  
+=end
+
  B<Nkf> is a yet another kanji code converter among networks, hosts and terminals.
  It converts input kanji code to designated kanji code
-such as ISO-2022-JP, Shift_JIS, EUC-JP, UTF-8 or UTF-16.
+such as ISO-2022-JP, Shift_JIS, EUC-JP, UTF-8, UTF-16 or UTF-32.
  
  One of the most unique faculty of B<nkf> is the guess of the input kanji encodings.
-It currently recognizes ISO-2022-JP, Shift_JIS, EUC-JP, UTF-8 and UTF-16.
+It currently recognizes ISO-2022-JP, Shift_JIS, EUC-JP, UTF-8, UTF-16 and UTF-32.
  So users needn't set the input kanji code explicitly.
  
  By default, X0201 kana is converted into X0208 kana.
@@ -76,56 +99,77 @@ For X0201 kana, SO/SI, SSO and ESC-(-I methods are supported.
  For automatic code detection, nkf assumes no X0201 kana in Shift_JIS.
  To accept X0201 in Shift_JIS, use B<-X>, B<-x> or B<-S>.
  
-=head1 Flags
+=head1 OPTIONS
  
  =over
  
-=item B<-b -u>
-
-Output is buffered (DEFAULT), Output is unbuffered.
-
-=item B<-j -s -e -w -w16>
+=item B<-J -S -E -W -W16 -W32 -j -s -e -w -w16 -w32>
  
-Output code is ISO-2022-JP (7bit JIS), Shift_JIS, EUC-JP,
-UTF-8N, UTF-16BE.
-Without this option and compile option, ISO-2022-JP is assumed.
-
-=item B<-J -S -E -W -W16>
-
-Input assumption is JIS 7 bit, Shift_JIS, EUC-JP,
-UTF-8, UTF-16LE.
+Specify input and output encodings. Upper case is input.
+cf. --ic and --oc.
  
  =over
  
  =item B<-J>
  
-Assume  JIS input. It also accepts EUC-JP.
-This is the default. This flag does not exclude Shift_JIS.
+ISO-2022-JP (JIS code).
  
  =item B<-S>
  
-Assume Shift_JIS and X0201 kana input. It also accepts JIS.
+Shift_JIS and JIS X 0201 kana.
  EUC-JP is recognized as X0201 kana. Without B<-x> flag,
-X0201 kana (halfwidth kana) is converted into X0208.
+JIS X 0201 Katakana (a.k.a.halfwidth kana) is converted into JIS X 0208.
+If you use Windows, see Windows-31J (CP932).
  
  =item B<-E>
  
-Assume EUC-JP input. It also accepts JIS.
-Same as -J.
+EUC-JP.
+
+=item B<-W>
+
+UTF-8N.
+
+=item B<-W16[BL][0]>
+
+UTF-16.
+B or L gives whether Big Endian or Little Endian.
+0 gives whther put BOM or not.
+
+=item B<-W32[BL][0]>
+
+UTF-32.
+B or L gives whether Big Endian or Little Endian.
+0 gives whther put BOM or not.
  
  =back
  
+=item B<-b -u>
+
+Output is buffered (DEFAULT), Output is unbuffered.
+
  =item B<-t>
  
  No conversion.
  
  =item B<-i[@B]>
  
-Specify the Esc Seq for JIS X 0208-1978/83. (DEFAULT B)
+Specify the escape sequence for JIS X 0208.
+
+=over
+
+=item B<-i@>
  
-=item B<-o[BJH]>
+Use ESC ( @. (JIS X 0208-1978)
  
-Specify the Esc Seq for ASCII/Roman. (DEFAULT B)
+=item B<-iB>
+
+Use ESC ( B. (JIS X 0208-1983/1990 DEFAULT)
+
+=back
+
+=item B<-o[BJ]>
+
+Specify the escape sequence for US-ASCII/JIS X 0201 Roman. (DEFAULT B)
  
  =item B<-r>
  
@@ -178,15 +222,15 @@ Convert X0208 alphabet to ASCII.
  
  =item B<-Z1>
  
-Converts X0208 kankaku to single ASCII space.
+Convert X0208 kankaku to single ASCII space.
  
  =item B<-Z2>
  
-Converts X0208 kankaku to double ASCII spaces.
+Convert X0208 kankaku to double ASCII spaces.
  
  =item B<-Z3>
  
-Replacing Fullwidth >, <, ", & into '&gt;', '&lt;', '&quot;', '&amp;' as in HTML.
+Replacing fullwidth >, <, ", & into '&gt;', '&lt;', '&quot;', '&amp;' as in HTML.
  
  =back
  
@@ -206,11 +250,11 @@ Useful when your site is using old B-News Nihongo patch.
  
  =item B<-B1>
  
-allows any char after ESC-( or ESC-$.
+allows any chars after ESC-( or ESC-$.
  
  =item B<-B2>
  
-forces ASCII after NL.
+force ASCII after NL.
  
  =back
  
@@ -259,7 +303,7 @@ Kanji conversion is performed before encoding, so this cannot be used as a pictu
  
  =item B<-MQ>
  
-Perfome quoted encoding.
+Perform quoted encoding.
  
  =back
  
@@ -292,20 +336,20 @@ Without this option, nkf doesn't convert line breaks.
  
  =item B<--fj --unix --mac --msdos --windows>
  
-convert for these system
+Convert for these systems.
  
  =item B<--jis --euc --sjis --mime --base64>
  
-convert for named code
+Convert to named code.
  
  =item B<--jis-input --euc-input --sjis-input --mime-input --base64-input>
  
-assume input system
+Assume input system
  
  =item B<--ic=I<input codeset> --oc=I<output codeset>>
  
  Set the input or output codeset.
-NKF supports following codesets and those codeset name are case insensitive.
+NKF supports following codesets and those codeset names are case insensitive.
  
  =over
  
@@ -329,9 +373,9 @@ Microsoft Version of EUC-JP.
  
  a.k.a. SJIS, MS-Kanji
  
-=item CP932
+=item Windows-31J
  
-a.k.a. Windows-31J
+a.k.a. CP932
  
  =item UTF-8
  
@@ -345,6 +389,10 @@ UTF-8 without BOM
  
  UTF-8 with BOM
  
+=item UTF8-MAC (input only)
+
+decomposed UTF-8
+
  =item UTF-16
  
  same as UTF-16BE
@@ -365,7 +413,25 @@ UTF-16 Little Endian without BOM
  
  UTF-16 Little Endian with BOM
  
-=item UTF8-MAC (input only)
+=item UTF-32
+
+same as UTF-32BE
+
+=item UTF-32BE
+
+UTF-32 Big Endian without BOM
+
+=item UTF-32BE-BOM
+
+UTF-32 Big Endian with BOM
+
+=item UTF-32LE
+
+UTF-32 Little Endian without BOM
+
+=item UTF-32LE-BOM
+
+UTF-32 Little Endian with BOM
  
  =back
  
@@ -392,7 +458,7 @@ When Unicode to Unicode conversion,
  with this and -x option, nkf can be used as UTF converter.
  (In other words, without this and -x option, nkf doesn't save some characters)
  
-When nkf convert string which related to path, you should use this opion.
+When nkf converts strings that related to path, you should use this opion.
  
  =item B<--cap-input>
  
@@ -406,6 +472,28 @@ Unescape percent escaped characters.
  
  Decode character reference, such as "&#....;".
  
+=begin COMMAND
+
+=item B<--in-place[=>I<SUFFIX>B<]>  B<--overwrite[=>I<SUFFIX>B<]>
+
+Overwrite B<original> listed files by filtered result.
+
+B<Note> --overwrite preserves timestamps of original files.
+
+=item B<--guess=[12]>
+
+Print guessed encoding and newline. (2 is default, 1 is only encoding)
+
+=item B<--help>
+
+Print nkf's help.
+
+=item B<--version>
+
+Print nkf's version.
+
+=end
+
  =item B<-->
  
  Ignore rest of -option.
@@ -414,13 +502,16 @@ Ignore rest of -option.
  
  =head1 AUTHOR
  
-Network Kanji Filter Version 2.0.6
+Copyright (c) 1987, Fujitsu LTD. (Itaru ICHIKAWA).
+
+Copyright (c) 1996-2009, The nkf Project.
  
-Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW
-                     2002-2006 Kono, Furukawa, Naruse, mastodon
+=begin
  
  =head1 SEE ALSO
  
  perl(1).   nkf(1)
  
+=end
+
  =cut