-.\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32
+.\" Automatically generated by Pod::Man 4.09 (Pod::Simple 3.35)
.\"
.\" Standard preamble:
.\" ========================================================================
-.de Sh \" Subsection heading
-.br
-.if t .Sp
-.ne 5
-.PP
-\fB\\$1\fR
-.PP
-..
.de Sp \" Vertical space (when we can't use .PP)
.if t .sp .5v
.if n .sp
..
.\" Set up some character translations and predefined strings. \*(-- will
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
-.\" double quote, and \*(R" will give a right double quote. | will give a
-.\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to
-.\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C'
-.\" expand to `' in nroff, nothing in troff, for use with C<>.
-.tr \(*W-|\(bv\*(Tr
+.\" double quote, and \*(R" will give a right double quote. \*(C+ will
+.\" give a nicer C++. Capital omega is used to do unbreakable dashes and
+.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
+.\" nothing in troff, for use with C<>.
+.tr \(*W-
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
.ie n \{\
. ds -- \(*W-
. ds PI \(*p
. ds L" ``
. ds R" ''
+. ds C`
+. ds C'
'br\}
.\"
-.\" If the F register is turned on, we'll generate index entries on stderr for
-.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
+.\" Escape single quotes in literal strings from groff's Unicode transform.
+.ie \n(.g .ds Aq \(aq
+.el .ds Aq '
+.\"
+.\" If the F register is >0, we'll generate index entries on stderr for
+.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
.\" entries marked with X<> in POD. Of course, you'll have to process the
.\" output yourself in some meaningful fashion.
-.if \nF \{\
+.\"
+.\" Avoid warning from groff about undefined register 'F'.
+.de IX
+..
+.if !\nF .nr F 0
+.if \nF>0 \{\
. de IX
. tm Index:\\$1\t\\n%\t"\\$2"
..
-. nr % 0
-. rr F
+. if !\nF==2 \{\
+. nr % 0
+. nr F 2
+. \}
.\}
.\"
-.\" For nroff, turn off justification. Always turn off hyphenation; it makes
-.\" way too many mistakes in technical documents.
-.hy 0
-.if n .na
-.\"
.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
.\" Fear. Run. Save yourself. No user-serviceable parts.
. \" fudge factors for nroff and troff
.\" ========================================================================
.\"
.IX Title "nkf 1"
-.TH nkf 1 "2007-08-08" "nkf 2.0.8" " "
+.TH nkf 1 "2018-12-15" "nkf 2.1.5" " "
+.\" For nroff, turn off justification. Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.if n .ad l
+.nh
.SH "NAME"
nkf \- Network Kanji Filter
.SH "SYNOPSIS"
.IX Header "DESCRIPTION"
\&\fBNkf\fR is a yet another kanji code converter among networks, hosts and terminals.
It converts input kanji code to designated kanji code
-such as \s-1ISO\-2022\-JP\s0, Shift_JIS, \s-1EUC\-JP\s0, \s-1UTF\-8\s0 or \s-1UTF\-16\s0.
+such as \s-1ISO\-2022\-JP,\s0 Shift_JIS, EUC-JP, \s-1UTF\-8, UTF\-16\s0 or \s-1UTF\-32.\s0
.PP
One of the most unique faculty of \fBnkf\fR is the guess of the input kanji encodings.
-It currently recognizes \s-1ISO\-2022\-JP\s0, Shift_JIS, \s-1EUC\-JP\s0, \s-1UTF\-8\s0 and \s-1UTF\-16\s0.
+It currently recognizes \s-1ISO\-2022\-JP,\s0 Shift_JIS, EUC-JP, \s-1UTF\-8, UTF\-16\s0 and \s-1UTF\-32.\s0
So users needn't set the input kanji code explicitly.
.PP
By default, X0201 kana is converted into X0208 kana.
-For X0201 kana, \s-1SO/SI\s0, \s-1SSO\s0 and \s-1ESC\-\s0(\-I methods are supported.
+For X0201 kana, \s-1SO/SI, SSO\s0 and \s-1ESC\-\s0(\-I methods are supported.
For automatic code detection, nkf assumes no X0201 kana in Shift_JIS.
To accept X0201 in Shift_JIS, use \fB\-X\fR, \fB\-x\fR or \fB\-S\fR.
+.PP
+multiple options are specifed as seprate strings, such as
+.PP
+.Vb 1
+\& print nkf(\*(Aq\-\-ic=UTF8\-MAC\*(Aq, \*(Aq\-w\*(Aq, $string), "\en";
+.Ve
+.PP
+except the last arguments.
.SH "OPTIONS"
.IX Header "OPTIONS"
-.IP "\fB\-b \-u\fR" 4
-.IX Item "-b -u"
-Output is buffered (\s-1DEFAULT\s0), Output is unbuffered.
-.IP "\fB\-j \-s \-e \-w \-w16\fR" 4
-.IX Item "-j -s -e -w -w16"
-Output code is \s-1ISO\-2022\-JP\s0 (7bit \s-1JIS\s0), Shift_JIS, \s-1EUC\-JP\s0,
-\&\s-1UTF\-8N\s0, \s-1UTF\-16BE\s0.
-Without this option and compile option, \s-1ISO\-2022\-JP\s0 is assumed.
-.IP "\fB\-J \-S \-E \-W \-W16\fR" 4
-.IX Item "-J -S -E -W -W16"
-Input assumption is \s-1JIS\s0 7 bit, Shift_JIS, \s-1EUC\-JP\s0,
-\&\s-1UTF\-8\s0, \s-1UTF\-16LE\s0.
+.IP "\fB\-J \-S \-E \-W \-W16 \-W32 \-j \-s \-e \-w \-w16 \-w32\fR" 4
+.IX Item "-J -S -E -W -W16 -W32 -j -s -e -w -w16 -w32"
+Specify input and output encodings. Upper case is input.
+cf. \-\-ic and \-\-oc.
.RS 4
.IP "\fB\-J\fR" 4
.IX Item "-J"
-Assume \s-1JIS\s0 input.
-This is the default.
+\&\s-1ISO\-2022\-JP\s0 (\s-1JIS\s0 code).
.IP "\fB\-S\fR" 4
.IX Item "-S"
-Assume Shift_JIS and X0201 kana input.
+Shift_JIS and \s-1JIS X 0201\s0 kana.
EUC-JP is recognized as X0201 kana. Without \fB\-x\fR flag,
-X0201 kana (halfwidth kana) is converted into X0208.
+\&\s-1JIS X 0201\s0 Katakana (a.k.a.halfwidth kana) is converted into \s-1JIS X 0208.\s0
+If you use Windows, see Windows\-31J (\s-1CP932\s0).
.IP "\fB\-E\fR" 4
.IX Item "-E"
-Assume EUC-JP input.
+EUC-JP.
+.IP "\fB\-W\fR" 4
+.IX Item "-W"
+\&\s-1UTF\-8N.\s0
+.IP "\fB\-W16[\s-1BL\s0][0]\fR" 4
+.IX Item "-W16[BL][0]"
+\&\s-1UTF\-16.
+B\s0 or L gives whether Big Endian or Little Endian.
+0 gives whther put \s-1BOM\s0 or not.
+.IP "\fB\-W32[\s-1BL\s0][0]\fR" 4
+.IX Item "-W32[BL][0]"
+\&\s-1UTF\-32.
+B\s0 or L gives whether Big Endian or Little Endian.
+0 gives whther put \s-1BOM\s0 or not.
.RE
.RS 4
.RE
+.IP "\fB\-b \-u\fR" 4
+.IX Item "-b -u"
+Output is buffered (\s-1DEFAULT\s0), Output is unbuffered.
.IP "\fB\-t\fR" 4
.IX Item "-t"
No conversion.
.IP "\fB\-i[@B]\fR" 4
.IX Item "-i[@B]"
-Specify the Esc Seq for \s-1JIS\s0 X 0208\-1978/83. (\s-1DEFAULT\s0 B)
-.IP "\fB\-o[\s-1BJH\s0]\fR" 4
-.IX Item "-o[BJH]"
-Specify the Esc Seq for ASCII/Roman. (\s-1DEFAULT\s0 B)
+Specify the escape sequence for \s-1JIS X 0208.\s0
+.RS 4
+.IP "\fB\-i@\fR" 4
+.IX Item "-i@"
+Use \s-1ESC\s0 ( @. (\s-1JIS X 0208\-1978\s0)
+.IP "\fB\-iB\fR" 4
+.IX Item "-iB"
+Use \s-1ESC\s0 ( B. (\s-1JIS X 0208\-1983/1990 DEFAULT\s0)
+.RE
+.RS 4
+.RE
+.IP "\fB\-o[\s-1BJ\s0]\fR" 4
+.IX Item "-o[BJ]"
+Specify the escape sequence for \s-1US\-ASCII/JIS X 0201\s0 Roman. (\s-1DEFAULT B\s0)
.IP "\fB\-r\fR" 4
.IX Item "-r"
{de/en}crypt \s-1ROT13/47\s0
.RE
.IP "\fB\-T\fR" 4
.IX Item "-T"
-Text mode output (\s-1MS\-DOS\s0)
-.IP "\fB\-l\fR" 4
-.IX Item "-l"
-\&\s-1ISO8859\-1\s0 (Latin\-1) support
+Text mode output (MS-DOS)
.IP "\fB\-f[\f(BIm\fB [\- \f(BIn\fB]]\fR" 4
.IX Item "-f[m [- n]]"
Folding on \fIm\fR length with \fIn\fR margin in a line.
New line preserving line folding.
.IP "\fB\-Z[0\-3]\fR" 4
.IX Item "-Z[0-3]"
-Convert X0208 alphabet (Fullwidth Alphabets) to \s-1ASCII\s0.
+Convert X0208 alphabet (Fullwidth Alphabets) to \s-1ASCII.\s0
.RS 4
.IP "\fB\-Z \-Z0\fR" 4
.IX Item "-Z -Z0"
-Convert X0208 alphabet to \s-1ASCII\s0.
+Convert X0208 alphabet to \s-1ASCII.\s0
.IP "\fB\-Z1\fR" 4
.IX Item "-Z1"
Convert X0208 kankaku to single \s-1ASCII\s0 space.
Convert X0208 kankaku to double \s-1ASCII\s0 spaces.
.IP "\fB\-Z3\fR" 4
.IX Item "-Z3"
-Replacing fullwidth >, <, ", & into '>', '<', '"', '&' as in \s-1HTML\s0.
+Replacing fullwidth >, <, ", & into '>', '<', '"', '&' as in \s-1HTML.\s0
.RE
.RS 4
.RE
.IP "\fB\-X \-x\fR" 4
.IX Item "-X -x"
-Assume X0201 kana in MS\-Kanji.
With \fB\-X\fR or without this option, X0201 is converted into X0208 Kana.
With \fB\-x\fR, try to preserve X0208 kana and do not convert X0201 kana to X0208.
-In \s-1JIS\s0 output, \s-1ESC\-\s0(\-I is used. In \s-1EUC\s0 output, \s-1SSO\s0 is used.
+In \s-1JIS\s0 output, \s-1ESC\-\s0(\-I is used. In \s-1EUC\s0 output, \s-1SS2\s0 is used.
.IP "\fB\-B[0\-2]\fR" 4
.IX Item "-B[0-2]"
-Assume broken JIS-Kanji input, which lost \s-1ESC\s0.
+Assume broken JIS-Kanji input, which lost \s-1ESC.\s0
Useful when your site is using old B\-News Nihongo patch.
.RS 4
.IP "\fB\-B1\fR" 4
.IX Item "-B1"
-allows any chars after \s-1ESC\-\s0( or \s-1ESC\-$\s0.
+allows any chars after \s-1ESC\-\s0( or \s-1ESC\-$.\s0
.IP "\fB\-B2\fR" 4
.IX Item "-B2"
-force \s-1ASCII\s0 after \s-1NL\s0.
+force \s-1ASCII\s0 after \s-1NL.\s0
.RE
.RS 4
.RE
(substitute character in Japanese).
.IP "\fB\-m[\s-1BQN0\s0]\fR" 4
.IX Item "-m[BQN0]"
-\&\s-1MIME\s0 \s-1ISO\-2022\-JP/ISO8859\-1\s0 decode. (\s-1DEFAULT\s0)
+\&\s-1MIME ISO\-2022\-JP/ISO8859\-1\s0 decode. (\s-1DEFAULT\s0)
To see \s-1ISO8859\-1\s0 (Latin\-1) \-l is necessary.
.RS 4
.IP "\fB\-mB\fR" 4
.IX Item "-mB"
Decode \s-1MIME\s0 base64 encoded stream. Remove header or other part before
-conversion.
+conversion.
.IP "\fB\-mQ\fR" 4
.IX Item "-mQ"
Decode \s-1MIME\s0 quoted stream. '_' in quoted stream is converted to space.
.RE
.IP "\fB\-l\fR" 4
.IX Item "-l"
-Input and output code is \s-1ISO8859\-1\s0 (Latin\-1) and \s-1ISO\-2022\-JP\s0.
+Input and output code is \s-1ISO8859\-1\s0 (Latin\-1) and \s-1ISO\-2022\-JP.\s0
\&\fB\-s\fR, \fB\-e\fR and \fB\-x\fR are not compatible with this option.
.IP "\fB\-L[uwm] \-d \-c\fR" 4
.IX Item "-L[uwm] -d -c"
.RS 4
.IP "\s-1ISO\-2022\-JP\s0" 4
.IX Item "ISO-2022-JP"
-a.k.a. \s-1RFC1468\s0, 7bit \s-1JIS\s0, \s-1JUNET\s0
-.IP "EUC-JP (eucJP\-nkf)" 4
+a.k.a. \s-1RFC1468,\s0 7bit \s-1JIS, JUNET\s0
+.IP "EUC-JP (eucJP-nkf)" 4
.IX Item "EUC-JP (eucJP-nkf)"
-a.k.a. \s-1AT&T\s0 \s-1JIS\s0, Japanese \s-1EUC\s0, \s-1UJIS\s0
+a.k.a. \s-1AT&T JIS,\s0 Japanese \s-1EUC, UJIS\s0
.IP "eucJP-ascii" 4
.IX Item "eucJP-ascii"
.PD 0
.IP "\s-1CP51932\s0" 4
.IX Item "CP51932"
.PD
-Microsoft Version of \s-1EUC\-JP\s0.
+Microsoft Version of EUC-JP.
.IP "Shift_JIS" 4
.IX Item "Shift_JIS"
-a.k.a. \s-1SJIS\s0, MS-Kanji
-.IP "\s-1CP932\s0" 4
-.IX Item "CP932"
-a.k.a. Windows\-31J
+a.k.a. \s-1SJIS,\s0 MS_Kanji
+.IP "Windows\-31J" 4
+.IX Item "Windows-31J"
+a.k.a. \s-1CP932\s0
.IP "\s-1UTF\-8\s0" 4
.IX Item "UTF-8"
same as \s-1UTF\-8N\s0
.IP "\s-1UTF\-16LE\-BOM\s0" 4
.IX Item "UTF-16LE-BOM"
\&\s-1UTF\-16\s0 Little Endian with \s-1BOM\s0
+.IP "\s-1UTF\-32\s0" 4
+.IX Item "UTF-32"
+same as \s-1UTF\-32BE\s0
+.IP "\s-1UTF\-32BE\s0" 4
+.IX Item "UTF-32BE"
+\&\s-1UTF\-32\s0 Big Endian without \s-1BOM\s0
+.IP "\s-1UTF\-32BE\-BOM\s0" 4
+.IX Item "UTF-32BE-BOM"
+\&\s-1UTF\-32\s0 Big Endian with \s-1BOM\s0
+.IP "\s-1UTF\-32LE\s0" 4
+.IX Item "UTF-32LE"
+\&\s-1UTF\-32\s0 Little Endian without \s-1BOM\s0
+.IP "\s-1UTF\-32LE\-BOM\s0" 4
+.IX Item "UTF-32LE-BOM"
+\&\s-1UTF\-32\s0 Little Endian with \s-1BOM\s0
.RE
.RS 4
.RE
.IX Item "--numchar-input"
Decode character reference, such as \*(L"&#....;\*(R".
.IP "\fB\-\-in\-place[=\fR\fI\s-1SUFFIX\s0\fR\fB]\fR \fB\-\-overwrite[=\fR\fI\s-1SUFFIX\s0\fR\fB]\fR" 4
-.IX Item "--in-place[=SUFFIX] --overwrite[=SUFFIX]"
+.IX Item "--in-place[=SUFFIX] --overwrite[=SUFFIX]"
Overwrite \fBoriginal\fR listed files by filtered result.
.Sp
\&\fBNote\fR \-\-overwrite preserves timestamps of original files.
-.IP "\fB\-\-guess\fR" 4
-.IX Item "--guess"
-Print guessed encoding.
+.IP "\fB\-\-guess=[12]\fR" 4
+.IX Item "--guess=[12]"
+Print guessed encoding and newline. (2 is default, 1 is only encoding)
.IP "\fB\-\-help\fR" 4
.IX Item "--help"
Print nkf's help.
Ignore rest of \-option.
.SH "AUTHOR"
.IX Header "AUTHOR"
-Copyright (C) 1987, \s-1FUJITSU\s0 \s-1LTD\s0. (I.Ichikawa),2000 S. Kono, \s-1COW\s0
-Copyright (C) 2002\-2007 Kono, Furukawa, Naruse, mastodon
+Copyright (c) 1987, Fujitsu \s-1LTD.\s0 (Itaru \s-1ICHIKAWA\s0).
+.PP
+Copyright (c) 1996\-2018, The nkf Project.