README.md

   1 # NAME
   2
   3
   4
   5 nkf - Network Kanji Filter
   6
   7
   8
   9 # SYNOPSIS
  10
  11
  12
  13 nkf __[-butjnesliohrTVvwWJESZxXFfmMBOcdILg]__ __[___file ...___]__
  14
  15
  16
  17 # DESCRIPTION
  18
  19
  20
  21 __Nkf__ is a yet another kanji code converter among networks, hosts and terminals.
  22 It converts input kanji code to designated kanji code
  23 such as ISO-2022-JP, Shift_JIS, EUC-JP, UTF-8, UTF-16 or UTF-32.
  24
  25 One of the most unique faculty of __nkf__ is the guess of the input kanji encodings.
  26 It currently recognizes ISO-2022-JP, Shift_JIS, EUC-JP, UTF-8, UTF-16 and UTF-32.
  27 So users needn't set the input kanji code explicitly.
  28
  29 By default, X0201 kana is converted into X0208 kana.
  30 For X0201 kana, SO/SI, SSO and ESC-(-I methods are supported.
  31 For automatic code detection, nkf assumes no X0201 kana in Shift_JIS.
  32 To accept X0201 in Shift_JIS, use __-X__, __-x__ or __-S__.
  33
  34 # OPTIONS
  35
  36 - __-J -S -E -W -W16 -W32 -j -s -e -w -w16 -w32__
  37
  38 Specify input and output encodings. Upper case is input.
  39 cf. --ic and --oc.
  40
  41     - __-J__
  42
  43     ISO-2022-JP (JIS code).
  44
  45     - __-S__
  46
  47     Shift_JIS and JIS X 0201 kana.
  48     EUC-JP is recognized as X0201 kana. Without __-x__ flag,
  49     JIS X 0201 Katakana (a.k.a.halfwidth kana) is converted into JIS X 0208.
  50     If you use Windows, see Windows-31J (CP932).
  51
  52     - __-E__
  53
  54     EUC-JP.
  55
  56     - __-W__
  57
  58     UTF-8N.
  59
  60     - __-W16[BL][0]__
  61
  62     UTF-16.
  63     B or L gives whether Big Endian or Little Endian.
  64     0 gives whther put BOM or not.
  65
  66     - __-W32[BL][0]__
  67
  68     UTF-32.
  69     B or L gives whether Big Endian or Little Endian.
  70     0 gives whther put BOM or not.
  71
  72 - __-b -u__
  73
  74 Output is buffered (DEFAULT), Output is unbuffered.
  75
  76 - __-t__
  77
  78 No conversion.
  79
  80 - __-i[@B]__
  81
  82 Specify the escape sequence for JIS X 0208.
  83
  84     - __-i@__
  85
  86     Use ESC ( @. (JIS X 0208-1978)
  87
  88     - __-iB__
  89
  90     Use ESC ( B. (JIS X 0208-1983/1990 DEFAULT)
  91
  92 - __-o[BJ]__
  93
  94 Specify the escape sequence for US-ASCII/JIS X 0201 Roman. (DEFAULT B)
  95
  96 - __-r__
  97
  98 {de/en}crypt ROT13/47
  99
 100     - __-h[123] --hiragana --katakana --katakana-hiragana__
 101     - __-h1 --hiragana__
 102
 103     Katakana to Hiragana conversion.
 104
 105     - __-h2 --katakana__
 106
 107     Hiragana to Katakana conversion.
 108
 109     - __-h3 --katakana-hiragana__
 110
 111     Katakana to Hiragana and Hiragana to Katakana conversion.
 112
 113 - __-T__
 114
 115 Text mode output (MS-DOS)
 116
 117 - __-f[_m_ [- _n_]]__
 118
 119 Folding on _m_ length with _n_ margin in a line.
 120 Without this option, fold length is 60 and fold margin is 10.
 121
 122 - __-F__
 123
 124 New line preserving line folding.
 125
 126 - __-Z[0-3]__
 127
 128 Convert X0208 alphabet (Fullwidth Alphabets) to ASCII.
 129
 130     - __-Z -Z0__
 131
 132     Convert X0208 alphabet to ASCII.
 133
 134     - __-Z1__
 135
 136     Convert X0208 kankaku to single ASCII space.
 137
 138     - __-Z2__
 139
 140     Convert X0208 kankaku to double ASCII spaces.
 141
 142     - __-Z3__
 143
 144     Replacing fullwidth >, <, ", & into '&gt;', '&lt;', '&quot;', '&amp;' as in HTML.
 145
 146 - __-X -x__
 147
 148 With __-X__ or without this option, X0201 is converted into X0208 Kana.
 149 With __-x__, try to preserve X0208 kana and do not convert X0201 kana to X0208.
 150 In JIS output, ESC-(-I is used. In EUC output, SS2 is used.
 151
 152 - __-B[0-2]__
 153
 154 Assume broken JIS-Kanji input, which lost ESC.
 155 Useful when your site is using old B-News Nihongo patch.
 156
 157     - __-B1__
 158
 159     allows any chars after ESC-( or ESC-$.
 160
 161     - __-B2__
 162
 163     force ASCII after NL.
 164
 165 - __-I__
 166
 167 Replacing non iso-2022-jp char into a geta character
 168 (substitute character in Japanese).
 169
 170 - __-m[BQN0]__
 171
 172 MIME ISO-2022-JP/ISO8859-1 decode. (DEFAULT)
 173 To see ISO8859-1 (Latin-1) -l is necessary.
 174
 175     - __-mB__
 176
 177     Decode MIME base64 encoded stream. Remove header or other part before
 178     conversion.
 179
 180     - __-mQ__
 181
 182     Decode MIME quoted stream. '_' in quoted stream is converted to space.
 183
 184     - __-mN__
 185
 186     Non-strict decoding.
 187     It allows line break in the middle of the base64 encoding.
 188
 189     - __-m0__
 190
 191     No MIME decode.
 192
 193 - __-M__
 194
 195 MIME encode. Header style. All ASCII code and control characters are intact.
 196
 197     - __-MB__
 198
 199     MIME encode Base64 stream.
 200     Kanji conversion is performed before encoding, so this cannot be used as a picture encoder.
 201
 202     - __-MQ__
 203
 204     Perform quoted encoding.
 205
 206 - __-l__
 207
 208 Input and output code is ISO8859-1 (Latin-1) and ISO-2022-JP.
 209 __-s__, __-e__ and __-x__ are not compatible with this option.
 210
 211 - __-L[uwm] -d -c__
 212
 213 Convert line breaks.
 214
 215     - __-Lu -d__
 216
 217     unix (LF)
 218
 219     - __-Lw -c__
 220
 221     windows (CRLF)
 222
 223     - __-Lm__
 224
 225     mac (CR)
 226
 227     Without this option, nkf doesn't convert line breaks.
 228
 229 - __--fj --unix --mac --msdos --windows__
 230
 231 Convert for these systems.
 232
 233 - __--jis --euc --sjis --mime --base64__
 234
 235 Convert to named code.
 236
 237 - __--jis-input --euc-input --sjis-input --mime-input --base64-input__
 238
 239 Assume input system
 240
 241 - __--ic=_input codeset_ --oc=_output codeset___
 242
 243 Set the input or output codeset.
 244 NKF supports following codesets and those codeset names are case insensitive.
 245
 246     - ISO-2022-JP
 247
 248     a.k.a. RFC1468, 7bit JIS, JUNET
 249
 250     - EUC-JP (eucJP-nkf)
 251
 252     a.k.a. AT&T JIS, Japanese EUC, UJIS
 253
 254             - eucJP-ascii
 255         - eucJP-ms
 256     - CP51932
 257
 258     Microsoft Version of EUC-JP.
 259
 260     - Shift_JIS
 261
 262     a.k.a. SJIS, MS_Kanji
 263
 264     - Windows-31J
 265
 266     a.k.a. CP932
 267
 268     - UTF-8
 269
 270     same as UTF-8N
 271
 272     - UTF-8N
 273
 274     UTF-8 without BOM
 275
 276     - UTF-8-BOM
 277
 278     UTF-8 with BOM
 279
 280     - UTF8-MAC (input only)
 281
 282     decomposed UTF-8
 283
 284     - UTF-16
 285
 286     same as UTF-16BE
 287
 288     - UTF-16BE
 289
 290     UTF-16 Big Endian without BOM
 291
 292     - UTF-16BE-BOM
 293
 294     UTF-16 Big Endian with BOM
 295
 296     - UTF-16LE
 297
 298     UTF-16 Little Endian without BOM
 299
 300     - UTF-16LE-BOM
 301
 302     UTF-16 Little Endian with BOM
 303
 304     - UTF-32
 305
 306     same as UTF-32BE
 307
 308     - UTF-32BE
 309
 310     UTF-32 Big Endian without BOM
 311
 312     - UTF-32BE-BOM
 313
 314     UTF-32 Big Endian with BOM
 315
 316     - UTF-32LE
 317
 318     UTF-32 Little Endian without BOM
 319
 320     - UTF-32LE-BOM
 321
 322     UTF-32 Little Endian with BOM
 323
 324 - __--fb-{skip, html, xml, perl, java, subchar}__
 325
 326 Specify the way that nkf handles unassigned characters.
 327 Without this option, --fb-skip is assumed.
 328
 329 - __--prefix=_escape character__target character_..__
 330
 331 When nkf converts to Shift_JIS,
 332 nkf adds a specified escape character to specified 2nd byte of Shift_JIS characters.
 333 1st byte of argument is the escape character and following bytes are target characters.
 334
 335 - __--no-cp932ext__
 336
 337 Handle the characters extended in CP932 as unassigned characters.
 338
 339 - __--no-best-fit-chars__
 340
 341 When Unicode to Encoded byte conversion,
 342 don't convert characters which is not round trip safe.
 343 When Unicode to Unicode conversion,
 344 with this and -x option, nkf can be used as UTF converter.
 345 (In other words, without this and -x option, nkf doesn't save some characters)
 346
 347 When nkf converts strings that related to path, you should use this opion.
 348
 349 - __--cap-input__
 350
 351 Decode hex encoded characters.
 352
 353 - __--url-input__
 354
 355 Unescape percent escaped characters.
 356
 357 - __--numchar-input__
 358
 359 Decode character reference, such as "&#....;".
 360
 361
 362
 363 - __--in-place[=___SUFFIX___]__  __--overwrite[=___SUFFIX___]__
 364
 365 Overwrite __original__ listed files by filtered result.
 366
 367 __Note__ --overwrite preserves timestamps of original files.
 368
 369 - __--guess=[12]__
 370
 371 Print guessed encoding and newline. (2 is default, 1 is only encoding)
 372
 373 - __--help__
 374
 375 Print nkf's help.
 376
 377 - __--version__
 378
 379 Print nkf's version.
 380
 381
 382
 383 - __--__
 384
 385 Ignore rest of -option.
 386
 387 # AUTHOR
 388
 389 Copyright (c) 1987, Fujitsu LTD. (Itaru ICHIKAWA).
 390
 391 Copyright (c) 1996-2012, The nkf Project.
 392