nkf.1

   1 .\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32
   2 .\"
   3 .\" Standard preamble:
   4 .\" ========================================================================
   5 .de Sh \" Subsection heading
   6 .br
   7 .if t .Sp
   8 .ne 5
   9 .PP
  10 \fB\\$1\fR
  11 .PP
  12 ..
  13 .de Sp \" Vertical space (when we can't use .PP)
  14 .if t .sp .5v
  15 .if n .sp
  16 ..
  17 .de Vb \" Begin verbatim text
  18 .ft CW
  19 .nf
  20 .ne \\$1
  21 ..
  22 .de Ve \" End verbatim text
  23 .ft R
  24 .fi
  25 ..
  26 .\" Set up some character translations and predefined strings.  \*(-- will
  27 .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
  28 .\" double quote, and \*(R" will give a right double quote.  | will give a
  29 .\" real vertical bar.  \*(C+ will give a nicer C++.  Capital omega is used to
  30 .\" do unbreakable dashes and therefore won't be available.  \*(C` and \*(C'
  31 .\" expand to `' in nroff, nothing in troff, for use with C<>.
  32 .tr \(*W-|\(bv\*(Tr
  33 .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
  34 .ie n \{\
  35 .    ds -- \(*W-
  36 .    ds PI pi
  37 .    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
  38 .    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
  39 .    ds L" ""
  40 .    ds R" ""
  41 .    ds C` ""
  42 .    ds C' ""
  43 'br\}
  44 .el\{\
  45 .    ds -- \|\(em\|
  46 .    ds PI \(*p
  47 .    ds L" ``
  48 .    ds R" ''
  49 'br\}
  50 .\"
  51 .\" If the F register is turned on, we'll generate index entries on stderr for
  52 .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
  53 .\" entries marked with X<> in POD.  Of course, you'll have to process the
  54 .\" output yourself in some meaningful fashion.
  55 .if \nF \{\
  56 .    de IX
  57 .    tm Index:\\$1\t\\n%\t"\\$2"
  58 ..
  59 .    nr % 0
  60 .    rr F
  61 .\}
  62 .\"
  63 .\" For nroff, turn off justification.  Always turn off hyphenation; it makes
  64 .\" way too many mistakes in technical documents.
  65 .hy 0
  66 .if n .na
  67 .\"
  68 .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
  69 .\" Fear.  Run.  Save yourself.  No user-serviceable parts.
  70 .    \" fudge factors for nroff and troff
  71 .if n \{\
  72 .    ds #H 0
  73 .    ds #V .8m
  74 .    ds #F .3m
  75 .    ds #[ \f1
  76 .    ds #] \fP
  77 .\}
  78 .if t \{\
  79 .    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
  80 .    ds #V .6m
  81 .    ds #F 0
  82 .    ds #[ \&
  83 .    ds #] \&
  84 .\}
  85 .    \" simple accents for nroff and troff
  86 .if n \{\
  87 .    ds ' \&
  88 .    ds ` \&
  89 .    ds ^ \&
  90 .    ds , \&
  91 .    ds ~ ~
  92 .    ds /
  93 .\}
  94 .if t \{\
  95 .    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
  96 .    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
  97 .    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
  98 .    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
  99 .    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
 100 .    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
 101 .\}
 102 .    \" troff and (daisy-wheel) nroff accents
 103 .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
 104 .ds 8 \h'\*(#H'\(*b\h'-\*(#H'
 105 .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
 106 .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
 107 .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
 108 .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
 109 .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
 110 .ds ae a\h'-(\w'a'u*4/10)'e
 111 .ds Ae A\h'-(\w'A'u*4/10)'E
 112 .    \" corrections for vroff
 113 .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
 114 .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
 115 .    \" for low resolution devices (crt and lpr)
 116 .if \n(.H>23 .if \n(.V>19 \
 117 \{\
 118 .    ds : e
 119 .    ds 8 ss
 120 .    ds o a
 121 .    ds d- d\h'-1'\(ga
 122 .    ds D- D\h'-1'\(hy
 123 .    ds th \o'bp'
 124 .    ds Th \o'LP'
 125 .    ds ae ae
 126 .    ds Ae AE
 127 .\}
 128 .rm #[ #] #H #V #F C
 129 .\" ========================================================================
 130 .\"
 131 .IX Title "nkf 1"
 132 .TH nkf 1 "2006-08-22" "nkf 2.0.8" " "
 133 .SH "NAME"
 134 nkf \- Network Kanji Filter
 135 .SH "SYNOPSIS"
 136 .IX Header "SYNOPSIS"
 137 nkf \fB[\-butjnesliohrTVvwWJESZxXFfmMBOcdILg]\fR \fB[\fR\fIfile ...\fR\fB]\fR
 138 .SH "DESCRIPTION"
 139 .IX Header "DESCRIPTION"
 140 \&\fBNkf\fR is a yet another kanji code converter among networks, hosts and terminals.
 141 It converts input kanji code to designated kanji code
 142 such as \s-1ISO\-2022\-JP\s0, Shift_JIS, \s-1EUC\-JP\s0, \s-1UTF\-8\s0 or \s-1UTF\-16\s0.
 143 .PP
 144 One of the most unique faculty of \fBnkf\fR is the guess of the input kanji encodings.
 145 It currently recognizes \s-1ISO\-2022\-JP\s0, Shift_JIS, \s-1EUC\-JP\s0, \s-1UTF\-8\s0 and \s-1UTF\-16\s0.
 146 So users needn't set the input kanji code explicitly.
 147 .PP
 148 By default, X0201 kana is converted into X0208 kana.
 149 For X0201 kana, \s-1SO/SI\s0, \s-1SSO\s0 and \s-1ESC\-\s0(\-I methods are supported.
 150 For automatic code detection, nkf assumes no X0201 kana in Shift_JIS.
 151 To accept X0201 in Shift_JIS, use \fB\-X\fR, \fB\-x\fR or \fB\-S\fR.
 152 .SH "OPTIONS"
 153 .IX Header "OPTIONS"
 154 .IP "\fB\-b \-u\fR" 4
 155 .IX Item "-b -u"
 156 Output is buffered (\s-1DEFAULT\s0), Output is unbuffered.
 157 .IP "\fB\-j \-s \-e \-w \-w16\fR" 4
 158 .IX Item "-j -s -e -w -w16"
 159 Output code is \s-1ISO\-2022\-JP\s0 (7bit \s-1JIS\s0), Shift_JIS, \s-1EUC\-JP\s0,
 160 \&\s-1UTF\-8N\s0, \s-1UTF\-16BE\s0.
 161 Without this option and compile option, \s-1ISO\-2022\-JP\s0 is assumed.
 162 .IP "\fB\-J \-S \-E \-W \-W16\fR" 4
 163 .IX Item "-J -S -E -W -W16"
 164 Input assumption is \s-1JIS\s0 7 bit, Shift_JIS, \s-1EUC\-JP\s0,
 165 \&\s-1UTF\-8\s0, \s-1UTF\-16LE\s0.
 166 .RS 4
 167 .IP "\fB\-J\fR" 4
 168 .IX Item "-J"
 169 Assume  \s-1JIS\s0 input. It also accepts \s-1EUC\-JP\s0.
 170 This is the default. This flag does not exclude Shift_JIS.
 171 .IP "\fB\-S\fR" 4
 172 .IX Item "-S"
 173 Assume Shift_JIS and X0201 kana input. It also accepts \s-1JIS\s0.
 174 EUC-JP is recognized as X0201 kana. Without \fB\-x\fR flag,
 175 X0201 kana (halfwidth kana) is converted into X0208.
 176 .IP "\fB\-E\fR" 4
 177 .IX Item "-E"
 178 Assume EUC-JP input. It also accepts \s-1JIS\s0.
 179 Same as \-J.
 180 .RE
 181 .RS 4
 182 .RE
 183 .IP "\fB\-t\fR" 4
 184 .IX Item "-t"
 185 No conversion.
 186 .IP "\fB\-i[@B]\fR" 4
 187 .IX Item "-i[@B]"
 188 Specify the Esc Seq for \s-1JIS\s0 X 0208\-1978/83. (\s-1DEFAULT\s0 B)
 189 .IP "\fB\-o[\s-1BJH\s0]\fR" 4
 190 .IX Item "-o[BJH]"
 191 Specify the Esc Seq for ASCII/Roman. (\s-1DEFAULT\s0 B)
 192 .IP "\fB\-r\fR" 4
 193 .IX Item "-r"
 194 {de/en}crypt \s-1ROT13/47\s0
 195 .IP "\fB\-h[123] \-\-hiragana \-\-katakana \-\-katakana\-hiragana\fR" 4
 196 .IX Item "-h[123] --hiragana --katakana --katakana-hiragana"
 197 .RS 4
 198 .PD 0
 199 .IP "\fB\-h1 \-\-hiragana\fR" 4
 200 .IX Item "-h1 --hiragana"
 201 .PD
 202 Katakana to Hiragana conversion.
 203 .IP "\fB\-h2 \-\-katakana\fR" 4
 204 .IX Item "-h2 --katakana"
 205 Hiragana to Katakana conversion.
 206 .IP "\fB\-h3 \-\-katakana\-hiragana\fR" 4
 207 .IX Item "-h3 --katakana-hiragana"
 208 Katakana to Hiragana and Hiragana to Katakana conversion.
 209 .RE
 210 .RS 4
 211 .RE
 212 .IP "\fB\-T\fR" 4
 213 .IX Item "-T"
 214 Text mode output (\s-1MS\-DOS\s0)
 215 .IP "\fB\-l\fR" 4
 216 .IX Item "-l"
 217 \&\s-1ISO8859\-1\s0 (Latin\-1) support
 218 .IP "\fB\-f[\f(BIm\fB [\- \f(BIn\fB]]\fR" 4
 219 .IX Item "-f[m [- n]]"
 220 Folding on \fIm\fR length with \fIn\fR margin in a line.
 221 Without this option, fold length is 60 and fold margin is 10.
 222 .IP "\fB\-F\fR" 4
 223 .IX Item "-F"
 224 New line preserving line folding.
 225 .IP "\fB\-Z[0\-3]\fR" 4
 226 .IX Item "-Z[0-3]"
 227 Convert X0208 alphabet (Fullwidth Alphabets) to \s-1ASCII\s0.
 228 .RS 4
 229 .IP "\fB\-Z \-Z0\fR" 4
 230 .IX Item "-Z -Z0"
 231 Convert X0208 alphabet to \s-1ASCII\s0.
 232 .IP "\fB\-Z1\fR" 4
 233 .IX Item "-Z1"
 234 Converts X0208 kankaku to single \s-1ASCII\s0 space.
 235 .IP "\fB\-Z2\fR" 4
 236 .IX Item "-Z2"
 237 Converts X0208 kankaku to double \s-1ASCII\s0 spaces.
 238 .IP "\fB\-Z3\fR" 4
 239 .IX Item "-Z3"
 240 Replacing Fullwidth >, <, ", & into '&gt;', '&lt;', '&quot;', '&amp;' as in \s-1HTML\s0.
 241 .RE
 242 .RS 4
 243 .RE
 244 .IP "\fB\-X \-x\fR" 4
 245 .IX Item "-X -x"
 246 Assume X0201 kana in MS\-Kanji.
 247 With \fB\-X\fR or without this option, X0201 is converted into X0208 Kana.
 248 With \fB\-x\fR, try to preserve X0208 kana and do not convert X0201 kana to X0208.
 249 In \s-1JIS\s0 output, \s-1ESC\-\s0(\-I is used. In \s-1EUC\s0 output, \s-1SSO\s0 is used.
 250 .IP "\fB\-B[0\-2]\fR" 4
 251 .IX Item "-B[0-2]"
 252 Assume broken JIS-Kanji input, which lost \s-1ESC\s0.
 253 Useful when your site is using old B\-News Nihongo patch.
 254 .RS 4
 255 .IP "\fB\-B1\fR" 4
 256 .IX Item "-B1"
 257 allows any char after \s-1ESC\-\s0( or \s-1ESC\-$\s0.
 258 .IP "\fB\-B2\fR" 4
 259 .IX Item "-B2"
 260 forces \s-1ASCII\s0 after \s-1NL\s0.
 261 .RE
 262 .RS 4
 263 .RE
 264 .IP "\fB\-I\fR" 4
 265 .IX Item "-I"
 266 Replacing non iso\-2022\-jp char into a geta character
 267 (substitute character in Japanese).
 268 .IP "\fB\-m[\s-1BQN0\s0]\fR" 4
 269 .IX Item "-m[BQN0]"
 270 \&\s-1MIME\s0 \s-1ISO\-2022\-JP/ISO8859\-1\s0 decode. (\s-1DEFAULT\s0)
 271 To see \s-1ISO8859\-1\s0 (Latin\-1) \-l is necessary.
 272 .RS 4
 273 .IP "\fB\-mB\fR" 4
 274 .IX Item "-mB"
 275 Decode \s-1MIME\s0 base64 encoded stream. Remove header or other part before
 276 conversion.
 277 .IP "\fB\-mQ\fR" 4
 278 .IX Item "-mQ"
 279 Decode \s-1MIME\s0 quoted stream. '_' in quoted stream is converted to space.
 280 .IP "\fB\-mN\fR" 4
 281 .IX Item "-mN"
 282 Non-strict decoding.
 283 It allows line break in the middle of the base64 encoding.
 284 .IP "\fB\-m0\fR" 4
 285 .IX Item "-m0"
 286 No \s-1MIME\s0 decode.
 287 .RE
 288 .RS 4
 289 .RE
 290 .IP "\fB\-M\fR" 4
 291 .IX Item "-M"
 292 \&\s-1MIME\s0 encode. Header style. All \s-1ASCII\s0 code and control characters are intact.
 293 .RS 4
 294 .IP "\fB\-MB\fR" 4
 295 .IX Item "-MB"
 296 \&\s-1MIME\s0 encode Base64 stream.
 297 Kanji conversion is performed before encoding, so this cannot be used as a picture encoder.
 298 .IP "\fB\-MQ\fR" 4
 299 .IX Item "-MQ"
 300 Perfome quoted encoding.
 301 .RE
 302 .RS 4
 303 .RE
 304 .IP "\fB\-l\fR" 4
 305 .IX Item "-l"
 306 Input and output code is \s-1ISO8859\-1\s0 (Latin\-1) and \s-1ISO\-2022\-JP\s0.
 307 \&\fB\-s\fR, \fB\-e\fR and \fB\-x\fR are not compatible with this option.
 308 .IP "\fB\-L[uwm] \-d \-c\fR" 4
 309 .IX Item "-L[uwm] -d -c"
 310 Convert line breaks.
 311 .RS 4
 312 .IP "\fB\-Lu \-d\fR" 4
 313 .IX Item "-Lu -d"
 314 unix (\s-1LF\s0)
 315 .IP "\fB\-Lw \-c\fR" 4
 316 .IX Item "-Lw -c"
 317 windows (\s-1CRLF\s0)
 318 .IP "\fB\-Lm\fR" 4
 319 .IX Item "-Lm"
 320 mac (\s-1CR\s0)
 321 .Sp
 322 Without this option, nkf doesn't convert line breaks.
 323 .RE
 324 .RS 4
 325 .RE
 326 .IP "\fB\-\-fj \-\-unix \-\-mac \-\-msdos \-\-windows\fR" 4
 327 .IX Item "--fj --unix --mac --msdos --windows"
 328 convert for these system
 329 .IP "\fB\-\-jis \-\-euc \-\-sjis \-\-mime \-\-base64\fR" 4
 330 .IX Item "--jis --euc --sjis --mime --base64"
 331 convert for named code
 332 .IP "\fB\-\-jis\-input \-\-euc\-input \-\-sjis\-input \-\-mime\-input \-\-base64\-input\fR" 4
 333 .IX Item "--jis-input --euc-input --sjis-input --mime-input --base64-input"
 334 assume input system
 335 .IP "\fB\-\-ic=\f(BIinput codeset\fB \-\-oc=\f(BIoutput codeset\fB\fR" 4
 336 .IX Item "--ic=input codeset --oc=output codeset"
 337 Set the input or output codeset.
 338 \&\s-1NKF\s0 supports following codesets and those codeset name are case insensitive.
 339 .RS 4
 340 .IP "\s-1ISO\-2022\-JP\s0" 4
 341 .IX Item "ISO-2022-JP"
 342 a.k.a. \s-1RFC1468\s0, 7bit \s-1JIS\s0, \s-1JUNET\s0
 343 .IP "EUC-JP (eucJP\-nkf)" 4
 344 .IX Item "EUC-JP (eucJP-nkf)"
 345 a.k.a. \s-1AT&T\s0 \s-1JIS\s0, Japanese \s-1EUC\s0, \s-1UJIS\s0
 346 .IP "eucJP-ascii" 4
 347 .IX Item "eucJP-ascii"
 348 .PD 0
 349 .IP "eucJP-ms" 4
 350 .IX Item "eucJP-ms"
 351 .IP "\s-1CP51932\s0" 4
 352 .IX Item "CP51932"
 353 .PD
 354 Microsoft Version of \s-1EUC\-JP\s0.
 355 .IP "Shift_JIS" 4
 356 .IX Item "Shift_JIS"
 357 a.k.a. \s-1SJIS\s0, MS-Kanji
 358 .IP "\s-1CP932\s0" 4
 359 .IX Item "CP932"
 360 a.k.a. Windows\-31J
 361 .IP "\s-1UTF\-8\s0" 4
 362 .IX Item "UTF-8"
 363 same as \s-1UTF\-8N\s0
 364 .IP "\s-1UTF\-8N\s0" 4
 365 .IX Item "UTF-8N"
 366 \&\s-1UTF\-8\s0 without \s-1BOM\s0
 367 .IP "\s-1UTF\-8\-BOM\s0" 4
 368 .IX Item "UTF-8-BOM"
 369 \&\s-1UTF\-8\s0 with \s-1BOM\s0
 370 .IP "\s-1UTF8\-MAC\s0 (input only)" 4
 371 .IX Item "UTF8-MAC (input only)"
 372 decomposed \s-1UTF\-8\s0
 373 .IP "\s-1UTF\-16\s0" 4
 374 .IX Item "UTF-16"
 375 same as \s-1UTF\-16BE\s0
 376 .IP "\s-1UTF\-16BE\s0" 4
 377 .IX Item "UTF-16BE"
 378 \&\s-1UTF\-16\s0 Big Endian without \s-1BOM\s0
 379 .IP "\s-1UTF\-16BE\-BOM\s0" 4
 380 .IX Item "UTF-16BE-BOM"
 381 \&\s-1UTF\-16\s0 Big Endian with \s-1BOM\s0
 382 .IP "\s-1UTF\-16LE\s0" 4
 383 .IX Item "UTF-16LE"
 384 \&\s-1UTF\-16\s0 Little Endian without \s-1BOM\s0
 385 .IP "\s-1UTF\-16LE\-BOM\s0" 4
 386 .IX Item "UTF-16LE-BOM"
 387 \&\s-1UTF\-16\s0 Little Endian with \s-1BOM\s0
 388 .RE
 389 .RS 4
 390 .RE
 391 .IP "\fB\-\-fb\-{skip, html, xml, perl, java, subchar}\fR" 4
 392 .IX Item "--fb-{skip, html, xml, perl, java, subchar}"
 393 Specify the way that nkf handles unassigned characters.
 394 Without this option, \-\-fb\-skip is assumed.
 395 .IP "\fB\-\-prefix=\f(BIescape character\fB\f(BItarget character\fB..\fR" 4
 396 .IX Item "--prefix=escape charactertarget character.."
 397 When nkf converts to Shift_JIS,
 398 nkf adds a specified escape character to specified 2nd byte of Shift_JIS characters.
 399 1st byte of argument is the escape character and following bytes are target characters.
 400 .IP "\fB\-\-no\-cp932ext\fR" 4
 401 .IX Item "--no-cp932ext"
 402 Handle the characters extended in \s-1CP932\s0 as unassigned characters.
 403 .IP "\fB\-\-no\-best\-fit\-chars\fR" 4
 404 .IX Item "--no-best-fit-chars"
 405 When Unicode to Encoded byte conversion,
 406 don't convert characters which is not round trip safe.
 407 When Unicode to Unicode conversion,
 408 with this and \-x option, nkf can be used as \s-1UTF\s0 converter.
 409 (In other words, without this and \-x option, nkf doesn't save some characters)
 410 .Sp
 411 When nkf convert string which related to path, you should use this opion.
 412 .IP "\fB\-\-cap\-input\fR" 4
 413 .IX Item "--cap-input"
 414 Decode hex encoded characters.
 415 .IP "\fB\-\-url\-input\fR" 4
 416 .IX Item "--url-input"
 417 Unescape percent escaped characters.
 418 .IP "\fB\-\-numchar\-input\fR" 4
 419 .IX Item "--numchar-input"
 420 Decode character reference, such as \*(L"&#....;\*(R".
 421 .IP "\fB\-\-in\-place[=\fR\fI\s-1SUFFIX\s0\fR\fB]\fR  \fB\-\-overwrite[=\fR\fI\s-1SUFFIX\s0\fR\fB]\fR" 4
 422 .IX Item "--in-place[=SUFFIX]  --overwrite[=SUFFIX]"
 423 Overwrite \fBoriginal\fR listed files by filtered result.
 424 .Sp
 425 \&\fBNote\fR \-\-overwrite preserves timestamp of original files.
 426 .IP "\fB\-\-guess\fR" 4
 427 .IX Item "--guess"
 428 Print guessed encoding.
 429 .IP "\fB\-\-help\fR" 4
 430 .IX Item "--help"
 431 Print nkf's help.
 432 .IP "\fB\-\-version\fR" 4
 433 .IX Item "--version"
 434 Print nkf's version.
 435 .IP "\fB\-\-\fR" 4
 436 .IX Item "--"
 437 Ignore rest of \-option.
 438 .SH "AUTHOR"
 439 .IX Header "AUTHOR"
 440 Copyright (C) 1987, \s-1FUJITSU\s0 \s-1LTD\s0. (I.Ichikawa),2000 S. Kono, \s-1COW\s0
 441 Copyright (C) 2002\-2006 Kono, Furukawa, Naruse, mastodon