1 .\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.35
4 .\" ========================================================================
5 .de Sh \" Subsection heading
13 .de Sp \" Vertical space (when we can't use .PP)
17 .de Vb \" Begin verbatim text
22 .de Ve \" End verbatim text
26 .\" Set up some character translations and predefined strings. \*(-- will
27 .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
28 .\" double quote, and \*(R" will give a right double quote. | will give a
29 .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to
30 .\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C'
31 .\" expand to `' in nroff, nothing in troff, for use with C<>.
33 .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
37 . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
38 . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
51 .\" If the F register is turned on, we'll generate index entries on stderr for
52 .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
53 .\" entries marked with X<> in POD. Of course, you'll have to process the
54 .\" output yourself in some meaningful fashion.
57 . tm Index:\\$1\t\\n%\t"\\$2"
63 .\" For nroff, turn off justification. Always turn off hyphenation; it makes
64 .\" way too many mistakes in technical documents.
68 .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
69 .\" Fear. Run. Save yourself. No user-serviceable parts.
70 . \" fudge factors for nroff and troff
79 . ds #H ((1u-(\\\\n(.fu%2u))*.13m)
85 . \" simple accents for nroff and troff
95 . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
96 . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
97 . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
98 . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
99 . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
100 . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
102 . \" troff and (daisy-wheel) nroff accents
103 .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
104 .ds 8 \h'\*(#H'\(*b\h'-\*(#H'
105 .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
106 .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
107 .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
108 .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
109 .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
110 .ds ae a\h'-(\w'a'u*4/10)'e
111 .ds Ae A\h'-(\w'A'u*4/10)'E
112 . \" corrections for vroff
113 .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
114 .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
115 . \" for low resolution devices (crt and lpr)
116 .if \n(.H>23 .if \n(.V>19 \
129 .\" ========================================================================
132 .TH nkf 1 "2009-01-20" "nkf 2.0.9" " "
134 nkf \- Network Kanji Filter
136 .IX Header "SYNOPSIS"
137 nkf \fB[\-butjnesliohrTVvwWJESZxXFfmMBOcdILg]\fR \fB[\fR\fIfile ...\fR\fB]\fR
139 .IX Header "DESCRIPTION"
140 \&\fBNkf\fR is a yet another kanji code converter among networks, hosts and terminals.
141 It converts input kanji code to designated kanji code
142 such as \s-1ISO\-2022\-JP\s0, Shift_JIS, \s-1EUC\-JP\s0, \s-1UTF\-8\s0, \s-1UTF\-16\s0 or \s-1UTF\-32\s0.
144 One of the most unique faculty of \fBnkf\fR is the guess of the input kanji encodings.
145 It currently recognizes \s-1ISO\-2022\-JP\s0, Shift_JIS, \s-1EUC\-JP\s0, \s-1UTF\-8\s0, \s-1UTF\-16\s0 and \s-1UTF\-32\s0.
146 So users needn't set the input kanji code explicitly.
148 By default, X0201 kana is converted into X0208 kana.
149 For X0201 kana, \s-1SO/SI\s0, \s-1SSO\s0 and \s-1ESC\-\s0(\-I methods are supported.
150 For automatic code detection, nkf assumes no X0201 kana in Shift_JIS.
151 To accept X0201 in Shift_JIS, use \fB\-X\fR, \fB\-x\fR or \fB\-S\fR.
154 .IP "\fB\-J \-S \-E \-W \-W16 \-W32 \-j \-s \-e \-w \-w16 \-w32\fR" 4
155 .IX Item "-J -S -E -W -W16 -W32 -j -s -e -w -w16 -w32"
156 Specify input and output encodings. Upper case is input.
157 cf. \-\-ic and \-\-oc.
161 \&\s-1ISO\-2022\-JP\s0 (\s-1JIS\s0 code).
164 Shift_JIS and \s-1JIS\s0 X 0201 kana.
165 EUC-JP is recognized as X0201 kana. Without \fB\-x\fR flag,
166 \&\s-1JIS\s0 X 0201 Katakana (a.k.a.halfwidth kana) is converted into \s-1JIS\s0 X 0208.
167 If you use Windows, see Windows\-31J (\s-1CP932\s0).
174 .IP "\fB\-W16[\s-1BL\s0][0]\fR" 4
175 .IX Item "-W16[BL][0]"
177 B or L gives whether Big Endian or Little Endian.
178 0 gives whther put \s-1BOM\s0 or not.
179 .IP "\fB\-W32[\s-1BL\s0][0]\fR" 4
180 .IX Item "-W32[BL][0]"
182 B or L gives whether Big Endian or Little Endian.
183 0 gives whther put \s-1BOM\s0 or not.
187 .IP "\fB\-b \-u\fR" 4
189 Output is buffered (\s-1DEFAULT\s0), Output is unbuffered.
193 .IP "\fB\-i[@B]\fR" 4
195 Specify the escape sequence for \s-1JIS\s0 X 0208.
199 Use \s-1ESC\s0 ( @. (\s-1JIS\s0 X 0208\-1978)
202 Use \s-1ESC\s0 ( B. (\s-1JIS\s0 X 0208\-1983/1990 \s-1DEFAULT\s0)
206 .IP "\fB\-o[\s-1BJ\s0]\fR" 4
208 Specify the escape sequence for \s-1US\-ASCII/JIS\s0 X 0201 Roman. (\s-1DEFAULT\s0 B)
211 {de/en}crypt \s-1ROT13/47\s0
212 .IP "\fB\-h[123] \-\-hiragana \-\-katakana \-\-katakana\-hiragana\fR" 4
213 .IX Item "-h[123] --hiragana --katakana --katakana-hiragana"
216 .IP "\fB\-h1 \-\-hiragana\fR" 4
217 .IX Item "-h1 --hiragana"
219 Katakana to Hiragana conversion.
220 .IP "\fB\-h2 \-\-katakana\fR" 4
221 .IX Item "-h2 --katakana"
222 Hiragana to Katakana conversion.
223 .IP "\fB\-h3 \-\-katakana\-hiragana\fR" 4
224 .IX Item "-h3 --katakana-hiragana"
225 Katakana to Hiragana and Hiragana to Katakana conversion.
231 Text mode output (\s-1MS\-DOS\s0)
234 \&\s-1ISO8859\-1\s0 (Latin\-1) support
235 .IP "\fB\-f[\f(BIm\fB [\- \f(BIn\fB]]\fR" 4
236 .IX Item "-f[m [- n]]"
237 Folding on \fIm\fR length with \fIn\fR margin in a line.
238 Without this option, fold length is 60 and fold margin is 10.
241 New line preserving line folding.
242 .IP "\fB\-Z[0\-3]\fR" 4
244 Convert X0208 alphabet (Fullwidth Alphabets) to \s-1ASCII\s0.
246 .IP "\fB\-Z \-Z0\fR" 4
248 Convert X0208 alphabet to \s-1ASCII\s0.
251 Convert X0208 kankaku to single \s-1ASCII\s0 space.
254 Convert X0208 kankaku to double \s-1ASCII\s0 spaces.
257 Replacing fullwidth >, <, ", & into '>', '<', '"', '&' as in \s-1HTML\s0.
261 .IP "\fB\-X \-x\fR" 4
263 Assume X0201 kana in MS\-Kanji.
264 With \fB\-X\fR or without this option, X0201 is converted into X0208 Kana.
265 With \fB\-x\fR, try to preserve X0208 kana and do not convert X0201 kana to X0208.
266 In \s-1JIS\s0 output, \s-1ESC\-\s0(\-I is used. In \s-1EUC\s0 output, \s-1SSO\s0 is used.
267 .IP "\fB\-B[0\-2]\fR" 4
269 Assume broken JIS-Kanji input, which lost \s-1ESC\s0.
270 Useful when your site is using old B\-News Nihongo patch.
274 allows any chars after \s-1ESC\-\s0( or \s-1ESC\-$\s0.
277 force \s-1ASCII\s0 after \s-1NL\s0.
283 Replacing non iso\-2022\-jp char into a geta character
284 (substitute character in Japanese).
285 .IP "\fB\-m[\s-1BQN0\s0]\fR" 4
287 \&\s-1MIME\s0 \s-1ISO\-2022\-JP/ISO8859\-1\s0 decode. (\s-1DEFAULT\s0)
288 To see \s-1ISO8859\-1\s0 (Latin\-1) \-l is necessary.
292 Decode \s-1MIME\s0 base64 encoded stream. Remove header or other part before
296 Decode \s-1MIME\s0 quoted stream. '_' in quoted stream is converted to space.
300 It allows line break in the middle of the base64 encoding.
303 No \s-1MIME\s0 decode.
309 \&\s-1MIME\s0 encode. Header style. All \s-1ASCII\s0 code and control characters are intact.
313 \&\s-1MIME\s0 encode Base64 stream.
314 Kanji conversion is performed before encoding, so this cannot be used as a picture encoder.
317 Perform quoted encoding.
323 Input and output code is \s-1ISO8859\-1\s0 (Latin\-1) and \s-1ISO\-2022\-JP\s0.
324 \&\fB\-s\fR, \fB\-e\fR and \fB\-x\fR are not compatible with this option.
325 .IP "\fB\-L[uwm] \-d \-c\fR" 4
326 .IX Item "-L[uwm] -d -c"
329 .IP "\fB\-Lu \-d\fR" 4
332 .IP "\fB\-Lw \-c\fR" 4
334 windows (\s-1CRLF\s0)
339 Without this option, nkf doesn't convert line breaks.
343 .IP "\fB\-\-fj \-\-unix \-\-mac \-\-msdos \-\-windows\fR" 4
344 .IX Item "--fj --unix --mac --msdos --windows"
345 Convert for these systems.
346 .IP "\fB\-\-jis \-\-euc \-\-sjis \-\-mime \-\-base64\fR" 4
347 .IX Item "--jis --euc --sjis --mime --base64"
348 Convert to named code.
349 .IP "\fB\-\-jis\-input \-\-euc\-input \-\-sjis\-input \-\-mime\-input \-\-base64\-input\fR" 4
350 .IX Item "--jis-input --euc-input --sjis-input --mime-input --base64-input"
352 .IP "\fB\-\-ic=\f(BIinput codeset\fB \-\-oc=\f(BIoutput codeset\fB\fR" 4
353 .IX Item "--ic=input codeset --oc=output codeset"
354 Set the input or output codeset.
355 \&\s-1NKF\s0 supports following codesets and those codeset names are case insensitive.
357 .IP "\s-1ISO\-2022\-JP\s0" 4
358 .IX Item "ISO-2022-JP"
359 a.k.a. \s-1RFC1468\s0, 7bit \s-1JIS\s0, \s-1JUNET\s0
360 .IP "EUC-JP (eucJP\-nkf)" 4
361 .IX Item "EUC-JP (eucJP-nkf)"
362 a.k.a. \s-1AT&T\s0 \s-1JIS\s0, Japanese \s-1EUC\s0, \s-1UJIS\s0
364 .IX Item "eucJP-ascii"
368 .IP "\s-1CP51932\s0" 4
371 Microsoft Version of \s-1EUC\-JP\s0.
374 a.k.a. \s-1SJIS\s0, MS-Kanji
376 .IX Item "Windows-31J"
378 .IP "\s-1UTF\-8\s0" 4
380 same as \s-1UTF\-8N\s0
381 .IP "\s-1UTF\-8N\s0" 4
383 \&\s-1UTF\-8\s0 without \s-1BOM\s0
384 .IP "\s-1UTF\-8\-BOM\s0" 4
386 \&\s-1UTF\-8\s0 with \s-1BOM\s0
387 .IP "\s-1UTF8\-MAC\s0 (input only)" 4
388 .IX Item "UTF8-MAC (input only)"
389 decomposed \s-1UTF\-8\s0
390 .IP "\s-1UTF\-16\s0" 4
392 same as \s-1UTF\-16BE\s0
393 .IP "\s-1UTF\-16BE\s0" 4
395 \&\s-1UTF\-16\s0 Big Endian without \s-1BOM\s0
396 .IP "\s-1UTF\-16BE\-BOM\s0" 4
397 .IX Item "UTF-16BE-BOM"
398 \&\s-1UTF\-16\s0 Big Endian with \s-1BOM\s0
399 .IP "\s-1UTF\-16LE\s0" 4
401 \&\s-1UTF\-16\s0 Little Endian without \s-1BOM\s0
402 .IP "\s-1UTF\-16LE\-BOM\s0" 4
403 .IX Item "UTF-16LE-BOM"
404 \&\s-1UTF\-16\s0 Little Endian with \s-1BOM\s0
405 .IP "\s-1UTF\-32\s0" 4
407 same as \s-1UTF\-32BE\s0
408 .IP "\s-1UTF\-32BE\s0" 4
410 \&\s-1UTF\-32\s0 Big Endian without \s-1BOM\s0
411 .IP "\s-1UTF\-32BE\-BOM\s0" 4
412 .IX Item "UTF-32BE-BOM"
413 \&\s-1UTF\-32\s0 Big Endian with \s-1BOM\s0
414 .IP "\s-1UTF\-32LE\s0" 4
416 \&\s-1UTF\-32\s0 Little Endian without \s-1BOM\s0
417 .IP "\s-1UTF\-32LE\-BOM\s0" 4
418 .IX Item "UTF-32LE-BOM"
419 \&\s-1UTF\-32\s0 Little Endian with \s-1BOM\s0
423 .IP "\fB\-\-fb\-{skip, html, xml, perl, java, subchar}\fR" 4
424 .IX Item "--fb-{skip, html, xml, perl, java, subchar}"
425 Specify the way that nkf handles unassigned characters.
426 Without this option, \-\-fb\-skip is assumed.
427 .IP "\fB\-\-prefix=\f(BIescape character\fB\f(BItarget character\fB..\fR" 4
428 .IX Item "--prefix=escape charactertarget character.."
429 When nkf converts to Shift_JIS,
430 nkf adds a specified escape character to specified 2nd byte of Shift_JIS characters.
431 1st byte of argument is the escape character and following bytes are target characters.
432 .IP "\fB\-\-no\-cp932ext\fR" 4
433 .IX Item "--no-cp932ext"
434 Handle the characters extended in \s-1CP932\s0 as unassigned characters.
435 .IP "\fB\-\-no\-best\-fit\-chars\fR" 4
436 .IX Item "--no-best-fit-chars"
437 When Unicode to Encoded byte conversion,
438 don't convert characters which is not round trip safe.
439 When Unicode to Unicode conversion,
440 with this and \-x option, nkf can be used as \s-1UTF\s0 converter.
441 (In other words, without this and \-x option, nkf doesn't save some characters)
443 When nkf converts strings that related to path, you should use this opion.
444 .IP "\fB\-\-cap\-input\fR" 4
445 .IX Item "--cap-input"
446 Decode hex encoded characters.
447 .IP "\fB\-\-url\-input\fR" 4
448 .IX Item "--url-input"
449 Unescape percent escaped characters.
450 .IP "\fB\-\-numchar\-input\fR" 4
451 .IX Item "--numchar-input"
452 Decode character reference, such as \*(L"&#....;\*(R".
453 .IP "\fB\-\-in\-place[=\fR\fI\s-1SUFFIX\s0\fR\fB]\fR \fB\-\-overwrite[=\fR\fI\s-1SUFFIX\s0\fR\fB]\fR" 4
454 .IX Item "--in-place[=SUFFIX] --overwrite[=SUFFIX]"
455 Overwrite \fBoriginal\fR listed files by filtered result.
457 \&\fBNote\fR \-\-overwrite preserves timestamps of original files.
458 .IP "\fB\-\-guess=[12]\fR" 4
459 .IX Item "--guess=[12]"
460 Print guessed encoding and newline. (2 is default, 1 is only encoding)
461 .IP "\fB\-\-help\fR" 4
464 .IP "\fB\-\-version\fR" 4
469 Ignore rest of \-option.
472 Copyright (c) 1987, Fujitsu \s-1LTD\s0. (Itaru \s-1ICHIKAWA\s0).
474 Copyright (c) 1996\-2009, The nkf Project.