.\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.3 .\" .\" Standard preamble: .\" ======================================================================== .de Sh \" Subsection heading .br .if t .Sp .ne 5 .PP \fB\\$1\fR .PP .. .de Sp \" Vertical space (when we can't use .PP) .if t .sp .5v .if n .sp .. .de Vb \" Begin verbatim text .ft CW .nf .ne \\$1 .. .de Ve \" End verbatim text .ft R .fi .. .\" Set up some character translations and predefined strings. \*(-- will .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left .\" double quote, and \*(R" will give a right double quote. | will give a .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to .\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C' .\" expand to `' in nroff, nothing in troff, for use with C<>. .tr \(*W-|\(bv\*(Tr .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' .ie n \{\ . ds -- \(*W- . ds PI pi . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" . ds C` "" . ds C' "" 'br\} .el\{\ . ds -- \|\(em\| . ds PI \(*p . ds L" `` . ds R" '' 'br\} .\" .\" If the F register is turned on, we'll generate index entries on stderr for .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index .\" entries marked with X<> in POD. Of course, you'll have to process the .\" output yourself in some meaningful fashion. .if \nF \{\ . de IX . tm Index:\\$1\t\\n%\t"\\$2" .. . nr % 0 . rr F .\} .\" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .hy 0 .if n .na .\" .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). .\" Fear. Run. Save yourself. No user-serviceable parts. . \" fudge factors for nroff and troff .if n \{\ . ds #H 0 . ds #V .8m . ds #F .3m . ds #[ \f1 . ds #] \fP .\} .if t \{\ . ds #H ((1u-(\\\\n(.fu%2u))*.13m) . ds #V .6m . ds #F 0 . ds #[ \& . ds #] \& .\} . \" simple accents for nroff and troff .if n \{\ . ds ' \& . ds ` \& . ds ^ \& . ds , \& . ds ~ ~ . ds / .\} .if t \{\ . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' .\} . \" troff and (daisy-wheel) nroff accents .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' .ds 8 \h'\*(#H'\(*b\h'-\*(#H' .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] .ds ae a\h'-(\w'a'u*4/10)'e .ds Ae A\h'-(\w'A'u*4/10)'E . \" corrections for vroff .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' . \" for low resolution devices (crt and lpr) .if \n(.H>23 .if \n(.V>19 \ \{\ . ds : e . ds 8 ss . ds o a . ds d- d\h'-1'\(ga . ds D- D\h'-1'\(hy . ds th \o'bp' . ds Th \o'LP' . ds ae ae . ds Ae AE .\} .rm #[ #] #H #V #F C .\" ======================================================================== .\" .IX Title "nkf 1" .TH nkf 1 "2006-03-15" "nkf 2.0.6" " " .SH "NAME" nkf \- Network Kanji Filter .SH "SYNOPSIS" .IX Header "SYNOPSIS" nkf \fB\-[\fR\fIflags\fR\fB]\fR \fB[\fR\fIin_file\fR\fB]\fR .. \fB[\fR\fIout_file_for_\-O_flag\fR\fB]\fR .SH "DESCRIPTION" .IX Header "DESCRIPTION" \&\fBNkf\fR is a yet another kanji code converter among networks, hosts and terminals. It converts input kanji code to designated kanji code such as \s-1ISO\-2022\-JP\s0, Shift_JIS, \s-1EUC\-JP\s0, \s-1UTF\-8\s0 or \s-1UTF\-16\s0. .PP One of the most unique faculty of \fBnkf\fR is the guess of the input kanji encodings. It currently recognizes \s-1ISO\-2022\-JP\s0, Shift_JIS, \s-1EUC\-JP\s0, \s-1UTF\-8\s0 and \s-1UTF\-16\s0. So users needn't set the input kanji code explicitly. .PP By default, X0201 kana is converted into X0208 kana. For X0201 kana, \s-1SO/SI\s0, \s-1SSO\s0 and \s-1ESC\-\s0(\-I methods are supported. For automatic code detection, nkf assumes no X0201 kana in Shift_JIS. To accept X0201 in Shift_JIS, use \fB\-X\fR, \fB\-x\fR or \fB\-S\fR. .SH "Flags" .IX Header "Flags" .IP "\fB\-b \-u\fR" 4 .IX Item "-b -u" Output is buffered (\s-1DEFAULT\s0), Output is unbuffered. .IP "\fB\-j \-s \-e \-w \-w16\fR" 4 .IX Item "-j -s -e -w -w16" Output code is \s-1ISO\-2022\-JP\s0 (7bit \s-1JIS\s0), Shift_JIS, \s-1EUC\-JP\s0, \&\s-1UTF\-8N\s0, \s-1UTF\-16BE\s0. Without this option and compile option, \s-1ISO\-2022\-JP\s0 is assumed. .IP "\fB\-J \-S \-E \-W \-W16\fR" 4 .IX Item "-J -S -E -W -W16" Input assumption is \s-1JIS\s0 7 bit, Shift_JIS, \s-1EUC\-JP\s0, \&\s-1UTF\-8\s0, \s-1UTF\-16LE\s0. .RS 4 .IP "\fB\-J\fR" 4 .IX Item "-J" Assume \s-1JIS\s0 input. It also accepts \s-1EUC\-JP\s0. This is the default. This flag does not exclude Shift_JIS. .IP "\fB\-S\fR" 4 .IX Item "-S" Assume Shift_JIS and X0201 kana input. It also accepts \s-1JIS\s0. EUC-JP is recognized as X0201 kana. Without \fB\-x\fR flag, X0201 kana (halfwidth kana) is converted into X0208. .IP "\fB\-E\fR" 4 .IX Item "-E" Assume EUC-JP input. It also accepts \s-1JIS\s0. Same as \-J. .RE .RS 4 .RE .IP "\fB\-t\fR" 4 .IX Item "-t" No conversion. .IP "\fB\-i[@B]\fR" 4 .IX Item "-i[@B]" Specify the Esc Seq for \s-1JIS\s0 X 0208\-1978/83. (\s-1DEFAULT\s0 B) .IP "\fB\-o[\s-1BJH\s0]\fR" 4 .IX Item "-o[BJH]" Specify the Esc Seq for ASCII/Roman. (\s-1DEFAULT\s0 B) .IP "\fB\-r\fR" 4 .IX Item "-r" {de/en}crypt \s-1ROT13/47\s0 .IP "\fB\-h[123] \-\-hiragana \-\-katakana \-\-katakana\-hiragana\fR" 4 .IX Item "-h[123] --hiragana --katakana --katakana-hiragana" .RS 4 .PD 0 .IP "\fB\-h1 \-\-hiragana\fR" 4 .IX Item "-h1 --hiragana" .PD Katakana to Hiragana conversion. .IP "\fB\-h2 \-\-katakana\fR" 4 .IX Item "-h2 --katakana" Hiragana to Katakana conversion. .IP "\fB\-h3 \-\-katakana\-hiragana\fR" 4 .IX Item "-h3 --katakana-hiragana" Katakana to Hiragana and Hiragana to Katakana conversion. .RE .RS 4 .RE .IP "\fB\-T\fR" 4 .IX Item "-T" Text mode output (\s-1MS\-DOS\s0) .IP "\fB\-l\fR" 4 .IX Item "-l" \&\s-1ISO8859\-1\s0 (Latin\-1) support .IP "\fB\-f[\f(BIm\fB [\- \f(BIn\fB]]\fR" 4 .IX Item "-f[m [- n]]" Folding on \fIm\fR length with \fIn\fR margin in a line. Without this option, fold length is 60 and fold margin is 10. .IP "\fB\-F\fR" 4 .IX Item "-F" New line preserving line folding. .IP "\fB\-Z[0\-3]\fR" 4 .IX Item "-Z[0-3]" Convert X0208 alphabet (Fullwidth Alphabets) to \s-1ASCII\s0. .RS 4 .IP "\fB\-Z \-Z0\fR" 4 .IX Item "-Z -Z0" Convert X0208 alphabet to \s-1ASCII\s0. .IP "\fB\-Z1\fR" 4 .IX Item "-Z1" Converts X0208 kankaku to single \s-1ASCII\s0 space. .IP "\fB\-Z2\fR" 4 .IX Item "-Z2" Converts X0208 kankaku to double \s-1ASCII\s0 spaces. .IP "\fB\-Z3\fR" 4 .IX Item "-Z3" Replacing Fullwidth >, <, ", & into '>', '<', '"', '&' as in \s-1HTML\s0. .RE .RS 4 .RE .IP "\fB\-X \-x\fR" 4 .IX Item "-X -x" Assume X0201 kana in MS\-Kanji. With \fB\-X\fR or without this option, X0201 is converted into X0208 Kana. With \fB\-x\fR, try to preserve X0208 kana and do not convert X0201 kana to X0208. In \s-1JIS\s0 output, \s-1ESC\-\s0(\-I is used. In \s-1EUC\s0 output, \s-1SSO\s0 is used. .IP "\fB\-B[0\-2]\fR" 4 .IX Item "-B[0-2]" Assume broken JIS-Kanji input, which lost \s-1ESC\s0. Useful when your site is using old B\-News Nihongo patch. .RS 4 .IP "\fB\-B1\fR" 4 .IX Item "-B1" allows any char after \s-1ESC\-\s0( or \s-1ESC\-$\s0. .IP "\fB\-B2\fR" 4 .IX Item "-B2" forces \s-1ASCII\s0 after \s-1NL\s0. .RE .RS 4 .RE .IP "\fB\-I\fR" 4 .IX Item "-I" Replacing non iso\-2022\-jp char into a geta character (substitute character in Japanese). .IP "\fB\-d \-c\fR" 4 .IX Item "-d -c" Convert line breaks \-d: \s-1LF\s0 \-c: \s-1CRLF\s0. .IP "\fB\-m[\s-1BQN0\s0]\fR" 4 .IX Item "-m[BQN0]" \&\s-1MIME\s0 \s-1ISO\-2022\-JP/ISO8859\-1\s0 decode. (\s-1DEFAULT\s0) To see \s-1ISO8859\-1\s0 (Latin\-1) \-l is necessary. .RS 4 .IP "\fB\-mB\fR" 4 .IX Item "-mB" Decode \s-1MIME\s0 base64 encoded stream. Remove header or other part before conversion. .IP "\fB\-mQ\fR" 4 .IX Item "-mQ" Decode \s-1MIME\s0 quoted stream. '_' in quoted stream is converted to space. .IP "\fB\-mN\fR" 4 .IX Item "-mN" Non-strict decoding. It allows line break in the middle of the base64 encoding. .IP "\fB\-m0\fR" 4 .IX Item "-m0" No \s-1MIME\s0 decode. .RE .RS 4 .RE .IP "\fB\-M\fR" 4 .IX Item "-M" \&\s-1MIME\s0 encode. Header style. All \s-1ASCII\s0 code and control characters are intact. .RS 4 .IP "\fB\-MB\fR" 4 .IX Item "-MB" \&\s-1MIME\s0 encode Base64 stream. Kanji conversion is performed before encoding, so this cannot be used as a picture encoder. .IP "\fB\-MQ\fR" 4 .IX Item "-MQ" Perfome quoted encoding. .RE .RS 4 .RE .IP "\fB\-l\fR" 4 .IX Item "-l" Input and output code is \s-1ISO8859\-1\s0 (Latin\-1) and \s-1ISO\-2022\-JP\s0. \&\fB\-s\fR, \fB\-e\fR and \fB\-x\fR are not compatible with this option. .IP "\fB\-L[uwm]\fR" 4 .IX Item "-L[uwm]" new line mode .RS 4 .IP "\fB\-Lu\fR" 4 .IX Item "-Lu" unix (\s-1LF\s0) .IP "\fB\-Lw\fR" 4 .IX Item "-Lw" windows (\s-1CRLF\s0) .IP "\fB\-Lm\fR" 4 .IX Item "-Lm" mac (\s-1CR\s0) .Sp Without this option, nkf doesn't convert line breaks. .RE .RS 4 .RE .IP "\fB\-\-fj \-\-unix \-\-mac \-\-msdos \-\-windows\fR" 4 .IX Item "--fj --unix --mac --msdos --windows" convert for these system .IP "\fB\-\-jis \-\-euc \-\-sjis \-\-mime \-\-base64\fR" 4 .IX Item "--jis --euc --sjis --mime --base64" convert for named code .IP "\fB\-\-jis\-input \-\-euc\-input \-\-sjis\-input \-\-mime\-input \-\-base64\-input\fR" 4 .IX Item "--jis-input --euc-input --sjis-input --mime-input --base64-input" assume input system .IP "\fB\-\-ic=\f(BIinput codeset\fB \-\-oc=\f(BIoutput codeset\fB\fR" 4 .IX Item "--ic=input codeset --oc=output codeset" Set the input or output codeset. \&\s-1NKF\s0 supports following codesets and those codeset name are case insensitive. .RS 4 .IP "\s-1ISO\-2022\-JP\s0" 4 .IX Item "ISO-2022-JP" a.k.a. \s-1RFC1468\s0, 7bit \s-1JIS\s0, \s-1JUNET\s0 .IP "EUC-JP (eucJP\-nkf)" 4 .IX Item "EUC-JP (eucJP-nkf)" a.k.a. \s-1AT&T\s0 \s-1JIS\s0, Japanese \s-1EUC\s0, \s-1UJIS\s0 .IP "eucJP-ascii" 4 .IX Item "eucJP-ascii" .PD 0 .IP "eucJP-ms" 4 .IX Item "eucJP-ms" .IP "\s-1CP51932\s0" 4 .IX Item "CP51932" .PD Microsoft Version of \s-1EUC\-JP\s0. .IP "Shift_JIS" 4 .IX Item "Shift_JIS" a.k.a. \s-1SJIS\s0, MS-Kanji .IP "\s-1CP932\s0" 4 .IX Item "CP932" a.k.a. Windows\-31J .IP "\s-1UTF\-8\s0" 4 .IX Item "UTF-8" same as \s-1UTF\-8N\s0 .IP "\s-1UTF\-8N\s0" 4 .IX Item "UTF-8N" \&\s-1UTF\-8\s0 without \s-1BOM\s0 .IP "\s-1UTF\-8\-BOM\s0" 4 .IX Item "UTF-8-BOM" \&\s-1UTF\-8\s0 with \s-1BOM\s0 .IP "\s-1UTF\-16\s0" 4 .IX Item "UTF-16" same as \s-1UTF\-16BE\s0 .IP "\s-1UTF\-16BE\s0" 4 .IX Item "UTF-16BE" \&\s-1UTF\-16\s0 Big Endian without \s-1BOM\s0 .IP "\s-1UTF\-16BE\-BOM\s0" 4 .IX Item "UTF-16BE-BOM" \&\s-1UTF\-16\s0 Big Endian with \s-1BOM\s0 .IP "\s-1UTF\-16LE\s0" 4 .IX Item "UTF-16LE" \&\s-1UTF\-16\s0 Little Endian without \s-1BOM\s0 .IP "\s-1UTF\-16LE\-BOM\s0" 4 .IX Item "UTF-16LE-BOM" \&\s-1UTF\-16\s0 Little Endian with \s-1BOM\s0 .IP "\s-1UTF8\-MAC\s0 (input only)" 4 .IX Item "UTF8-MAC (input only)" .RE .RS 4 .RE .PD 0 .IP "\fB\-\-fb\-{skip, html, xml, perl, java, subchar}\fR" 4 .IX Item "--fb-{skip, html, xml, perl, java, subchar}" .PD Specify the way that nkf handles unassigned characters. Without this option, \-\-fb\-skip is assumed. .IP "\fB\-\-prefix=\f(BIescape character\fB\f(BItarget character\fB..\fR" 4 .IX Item "--prefix=escape charactertarget character.." When nkf converts to Shift_JIS, nkf adds a specified escape character to specified 2nd byte of Shift_JIS characters. 1st byte of argument is the escape character and following bytes are target characters. .IP "\fB\-\-no\-cp932ext\fR" 4 .IX Item "--no-cp932ext" Handle the characters extended in \s-1CP932\s0 as unassigned characters. .IP "\fB\-\-no\-best\-fit\-chars\fR" 4 .IX Item "--no-best-fit-chars" When Unicode to Encoded byte conversion, don't convert characters which is not round trip safe. When Unicode to Unicode conversion, with this and \-x option, nkf can be used as \s-1UTF\s0 converter. (In other words, without this and \-x option, nkf doesn't save some characters) .Sp When nkf convert string which related to path, you should use this opion. .IP "\fB\-\-cap\-input\fR" 4 .IX Item "--cap-input" Decode hex encoded characters. .IP "\fB\-\-url\-input\fR" 4 .IX Item "--url-input" Unescape percent escaped characters. .IP "\fB\-\-numchar\-input\fR" 4 .IX Item "--numchar-input" Decode character reference, such as \*(L"&#....;\*(R". .IP "\fB\-\-in\-place[=\fR\fI\s-1SUFFIX\s0\fR\fB]\fR \fB\-\-overwrite[=\fR\fI\s-1SUFFIX\s0\fR\fB]\fR" 4 .IX Item "--in-place[=SUFFIX] --overwrite[=SUFFIX]" Overwrite \fBoriginal\fR listed files by filtered result. .Sp \&\fBNote\fR \-\-overwrite preserves timestamp of original files. .IP "\fB\-\-guess\fR" 4 .IX Item "--guess" Print guessed encoding. .IP "\fB\-\-\fR" 4 .IX Item "--" Ignore rest of \-option. .SH "AUTHOR" .IX Header "AUTHOR" Network Kanji Filter Version 2.0.6 .PP Copyright (C) 1987, \s-1FUJITSU\s0 \s-1LTD\s0. (I.Ichikawa),2000 S. Kono, \s-1COW\s0 2002\-2006 Kono, Furukawa, Naruse, mastodon