X-Git-Url: http://git.sourceforge.jp/view?p=nkf%2Fnkf.git;a=blobdiff_plain;f=NKF.mod%2FNKF.pm;h=46a2973dd63af9de940e2e6e07e99645dac2c129;hp=6b27f039bc1b21a148df7b36e35c6ee4c93f600b;hb=fd8f86d043f10f017f3b237bb4bd5aad0a07f97a;hpb=cb07971ef0cb2d798c4ff0bfea60df0d90b0ae1a diff --git a/NKF.mod/NKF.pm b/NKF.mod/NKF.pm index 6b27f03..46a2973 100644 --- a/NKF.mod/NKF.pm +++ b/NKF.mod/NKF.pm @@ -36,7 +36,7 @@ require DynaLoader; @EXPORT = qw( nkf nkf_continue inputcode ); -$VERSION = '2.05'; +$VERSION = '2.08'; bootstrap NKF $VERSION; @@ -46,23 +46,46 @@ bootstrap NKF $VERSION; 1; __END__ -# Below is the stub of documentation for your module. You better edit it! + +# +# =begin ¤«¤é =begin COMMAND ¤Þ¤Ç¤Ï Perl/NKF ¤Î¥É¥­¥å¥á¥ó¥È +# =begin COMMAND ¤«¤é =end ¤Þ¤Ç¤Ï nkf ¥³¥Þ¥ó¥É¤Î¥É¥­¥å¥á¥ó¥È +# =head1 NAME -NKF - Perl extension for Network Kanji Filter +=begin + +NKF - Perl extension for Network Kanji Filter + +=begin COMMAND + +nkf - Network Kanji Filter + +=end =head1 SYNOPSIS +=begin + use NKF; $output = nkf("-s",$input); +=begin COMMAND + +nkf B<[-butjnesliohrTVvwWJESZxXFfmMBOcdILg]> B<[>IB<]> + +=end + =head1 DESCRIPTION +=begin + This is a Perl Extension version of nkf (Netowrk Kanji Filter). It converts the last argument and return converted result. Conversion details are specified by flags before the last argument. +=end B is a yet another kanji code converter among networks, hosts and terminals. It converts input kanji code to designated kanji code @@ -77,23 +100,23 @@ For X0201 kana, SO/SI, SSO and ESC-(-I methods are supported. For automatic code detection, nkf assumes no X0201 kana in Shift_JIS. To accept X0201 in Shift_JIS, use B<-X>, B<-x> or B<-S>. -=head1 Flags +=head1 OPTIONS =over =item B<-b -u> -Output is bufferred (DEFAULT),Output is unbufferred +Output is buffered (DEFAULT), Output is unbuffered. =item B<-j -s -e -w -w16> -Outout code is ISO-2022-JP (7bit JIS), Shift_JIS, EUC-JP, +Output code is ISO-2022-JP (7bit JIS), Shift_JIS, EUC-JP, UTF-8N, UTF-16BE. Without this option and compile option, ISO-2022-JP is assumed. =item B<-J -S -E -W -W16> -Input assumption is JIS 7 bit , Shift_JIS, EUC-JP, +Input assumption is JIS 7 bit, Shift_JIS, EUC-JP, UTF-8, UTF-16LE. =over @@ -118,21 +141,40 @@ Same as -J. =item B<-t> -no conversion +No conversion. -=item B<-i_> +=item B<-i[@B]> -Output sequence to designate JIS-kanji (DEFAULT B) +Specify the Esc Seq for JIS X 0208-1978/83. (DEFAULT B) -=item B<-o_> +=item B<-o[BJH]> -Output sequence to designate ASCII (DEFAULT B) +Specify the Esc Seq for ASCII/Roman. (DEFAULT B) =item B<-r> {de/en}crypt ROT13/47 +=item B<-h[123] --hiragana --katakana --katakana-hiragana> + +=over + +=item B<-h1 --hiragana> + +Katakana to Hiragana conversion. + +=item B<-h2 --katakana> + +Hiragana to Katakana conversion. + +=item B<-h3 --katakana-hiragana> + +Katakana to Hiragana and Hiragana to Katakana conversion. + +=back + =item B<-T> + Text mode output (MS-DOS) =item B<-l> @@ -148,7 +190,7 @@ Without this option, fold length is 60 and fold margin is 10. New line preserving line folding. -=item B<-Z[0-2]> +=item B<-Z[0-3]> Convert X0208 alphabet (Fullwidth Alphabets) to ASCII. @@ -160,11 +202,11 @@ Convert X0208 alphabet to ASCII. =item B<-Z1> -converts X0208 kankaku to single ASCII space. +Converts X0208 kankaku to single ASCII space. =item B<-Z2> -converts X0208 kankaku to double ASCII spaces. +Converts X0208 kankaku to double ASCII spaces. =item B<-Z3> @@ -176,7 +218,7 @@ Replacing Fullwidth >, <, ", & into '>', '<', '"', '&' as in HTML Assume X0201 kana in MS-Kanji. With B<-X> or without this option, X0201 is converted into X0208 Kana. -With B<-x>, try to preseve X0208 kana and do not convert X0201 kana to X0208. +With B<-x>, try to preserve X0208 kana and do not convert X0201 kana to X0208. In JIS output, ESC-(-I is used. In EUC output, SSO is used. =item B<-B[0-2]> @@ -201,13 +243,9 @@ forces ASCII after NL. Replacing non iso-2022-jp char into a geta character (substitute character in Japanese). -=item B<-d -c> - -Delete \r in line feed, Add \r in line feed - =item B<-m[BQN0]> -MIME ISO-2022-JP/ISO8859-1 decode. (default) +MIME ISO-2022-JP/ISO8859-1 decode. (DEFAULT) To see ISO8859-1 (Latin-1) -l is necessary. =over @@ -245,7 +283,7 @@ Kanji conversion is performed before encoding, so this cannot be used as a pictu =item B<-MQ> -perfome quoted encoding. +Perfome quoted encoding. =back @@ -254,17 +292,17 @@ perfome quoted encoding. Input and output code is ISO8859-1 (Latin-1) and ISO-2022-JP. B<-s>, B<-e> and B<-x> are not compatible with this option. -=item B<-L[wmu]> +=item B<-L[uwm] -d -c> -new line mode +Convert line breaks. =over -=item B<-Lu> +=item B<-Lu -d> unix (LF) -=item B<-Lw> +=item B<-Lw -c> windows (CRLF) @@ -321,10 +359,40 @@ a.k.a. Windows-31J =item UTF-8 -=item UTF-16 +same as UTF-8N + +=item UTF-8N + +UTF-8 without BOM + +=item UTF-8-BOM + +UTF-8 with BOM =item UTF8-MAC (input only) +decomposed UTF-8 + +=item UTF-16 + +same as UTF-16BE + +=item UTF-16BE + +UTF-16 Big Endian without BOM + +=item UTF-16BE-BOM + +UTF-16 Big Endian with BOM + +=item UTF-16LE + +UTF-16 Little Endian without BOM + +=item UTF-16LE-BOM + +UTF-16 Little Endian with BOM + =back =item B<--fb-{skip, html, xml, perl, java, subchar}> @@ -332,9 +400,25 @@ a.k.a. Windows-31J Specify the way that nkf handles unassigned characters. Without this option, --fb-skip is assumed. -=item B<--disable-cp932ext> +=item B<--prefix=II..> + +When nkf converts to Shift_JIS, +nkf adds a specified escape character to specified 2nd byte of Shift_JIS characters. +1st byte of argument is the escape character and following bytes are target characters. + +=item B<--no-cp932ext> + +Handle the characters extended in CP932 as unassigned characters. -Handle the characters extended in CP932 as unassinged characters. +=item B<--no-best-fit-chars> + +When Unicode to Encoded byte conversion, +don't convert characters which is not round trip safe. +When Unicode to Unicode conversion, +with this and -x option, nkf can be used as UTF converter. +(In other words, without this and -x option, nkf doesn't save some characters) + +When nkf convert string which related to path, you should use this opion. =item B<--cap-input> @@ -344,28 +428,49 @@ Decode hex encoded characters. Unescape percent escaped characters. -=item B<--> +=item B<--numchar-input> + +Decode character reference, such as "&#....;". + +=begin COMMAND + +=item B<--in-place[=>IB<]> B<--overwrite[=>IB<]> + +Overwrite B listed files by filtered result. -ignore rest of -option +B --overwrite preserves timestamp of original files. -=item B<-v --help> +=item B<--guess> -output this help. +Print guessed encoding. -=item B<-V --version> +=item B<--help> -output version info. +Print nkf's help. + +=item B<--version> + +Print nkf's version. + +=end + +=item B<--> + +Ignore rest of -option. =back =head1 AUTHOR -Network Kanji Filter Version 2.0.5 +Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW +Copyright (C) 2002-2006 Kono, Furukawa, Naruse, mastodon -Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2005 Kono, Furukawa, Naruse +=begin =head1 SEE ALSO perl(1). nkf(1) +=end + =cut