libgo/go/compress/flate/deflate.go

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package flate
   6
   7 import (
   8         "io"
   9         "math"
  10         "os"
  11 )
  12
  13 const (
  14         NoCompression        = 0
  15         BestSpeed            = 1
  16         fastCompression      = 3
  17         BestCompression      = 9
  18         DefaultCompression   = -1
  19         logMaxOffsetSize     = 15  // Standard DEFLATE
  20         wideLogMaxOffsetSize = 22  // Wide DEFLATE
  21         minMatchLength       = 3   // The smallest match that the compressor looks for
  22         maxMatchLength       = 258 // The longest match for the compressor
  23         minOffsetSize        = 1   // The shortest offset that makes any sence
  24
  25         // The maximum number of tokens we put into a single flat block, just too
  26         // stop things from getting too large.
  27         maxFlateBlockTokens = 1 << 14
  28         maxStoreBlockSize   = 65535
  29         hashBits            = 15
  30         hashSize            = 1 << hashBits
  31         hashMask            = (1 << hashBits) - 1
  32         hashShift           = (hashBits + minMatchLength - 1) / minMatchLength
  33 )
  34
  35 type syncPipeReader struct {
  36         *io.PipeReader
  37         closeChan chan bool
  38 }
  39
  40 func (sr *syncPipeReader) CloseWithError(err os.Error) os.Error {
  41         retErr := sr.PipeReader.CloseWithError(err)
  42         sr.closeChan <- true // finish writer close
  43         return retErr
  44 }
  45
  46 type syncPipeWriter struct {
  47         *io.PipeWriter
  48         closeChan chan bool
  49 }
  50
  51 type compressionLevel struct {
  52         good, lazy, nice, chain, fastSkipHashing int
  53 }
  54
  55 var levels = []compressionLevel{
  56         {}, // 0
  57         // For levels 1-3 we don't bother trying with lazy matches
  58         {3, 0, 8, 4, 4},
  59         {3, 0, 16, 8, 5},
  60         {3, 0, 32, 32, 6},
  61         // Levels 4-9 use increasingly more lazy matching
  62         // and increasingly stringent conditions for "good enough".
  63         {4, 4, 16, 16, math.MaxInt32},
  64         {8, 16, 32, 32, math.MaxInt32},
  65         {8, 16, 128, 128, math.MaxInt32},
  66         {8, 32, 128, 256, math.MaxInt32},
  67         {32, 128, 258, 1024, math.MaxInt32},
  68         {32, 258, 258, 4096, math.MaxInt32},
  69 }
  70
  71 func (sw *syncPipeWriter) Close() os.Error {
  72         err := sw.PipeWriter.Close()
  73         <-sw.closeChan // wait for reader close
  74         return err
  75 }
  76
  77 func syncPipe() (*syncPipeReader, *syncPipeWriter) {
  78         r, w := io.Pipe()
  79         sr := &syncPipeReader{r, make(chan bool, 1)}
  80         sw := &syncPipeWriter{w, sr.closeChan}
  81         return sr, sw
  82 }
  83
  84 type compressor struct {
  85         level         int
  86         logWindowSize uint
  87         w             *huffmanBitWriter
  88         r             io.Reader
  89         // (1 << logWindowSize) - 1.
  90         windowMask int
  91
  92         eof      bool // has eof been reached on input?
  93         sync     bool // writer wants to flush
  94         syncChan chan os.Error
  95
  96         // hashHead[hashValue] contains the largest inputIndex with the specified hash value
  97         hashHead []int
  98
  99         // If hashHead[hashValue] is within the current window, then
 100         // hashPrev[hashHead[hashValue] & windowMask] contains the previous index
 101         // with the same hash value.
 102         hashPrev []int
 103
 104         // If we find a match of length >= niceMatch, then we don't bother searching
 105         // any further.
 106         niceMatch int
 107
 108         // If we find a match of length >= goodMatch, we only do a half-hearted
 109         // effort at doing lazy matching starting at the next character
 110         goodMatch int
 111
 112         // The maximum number of chains we look at when finding a match
 113         maxChainLength int
 114
 115         // The sliding window we use for matching
 116         window []byte
 117
 118         // The index just past the last valid character
 119         windowEnd int
 120
 121         // index in "window" at which current block starts
 122         blockStart int
 123 }
 124
 125 func (d *compressor) flush() os.Error {
 126         d.w.flush()
 127         return d.w.err
 128 }
 129
 130 func (d *compressor) fillWindow(index int) (int, os.Error) {
 131         if d.sync {
 132                 return index, nil
 133         }
 134         wSize := d.windowMask + 1
 135         if index >= wSize+wSize-(minMatchLength+maxMatchLength) {
 136                 // shift the window by wSize
 137                 copy(d.window, d.window[wSize:2*wSize])
 138                 index -= wSize
 139                 d.windowEnd -= wSize
 140                 if d.blockStart >= wSize {
 141                         d.blockStart -= wSize
 142                 } else {
 143                         d.blockStart = math.MaxInt32
 144                 }
 145                 for i, h := range d.hashHead {
 146                         v := h - wSize
 147                         if v < -1 {
 148                                 v = -1
 149                         }
 150                         d.hashHead[i] = v
 151                 }
 152                 for i, h := range d.hashPrev {
 153                         v := -h - wSize
 154                         if v < -1 {
 155                                 v = -1
 156                         }
 157                         d.hashPrev[i] = v
 158                 }
 159         }
 160         count, err := d.r.Read(d.window[d.windowEnd:])
 161         d.windowEnd += count
 162         if count == 0 && err == nil {
 163                 d.sync = true
 164         }
 165         if err == os.EOF {
 166                 d.eof = true
 167                 err = nil
 168         }
 169         return index, err
 170 }
 171
 172 func (d *compressor) writeBlock(tokens []token, index int, eof bool) os.Error {
 173         if index > 0 || eof {
 174                 var window []byte
 175                 if d.blockStart <= index {
 176                         window = d.window[d.blockStart:index]
 177                 }
 178                 d.blockStart = index
 179                 d.w.writeBlock(tokens, eof, window)
 180                 return d.w.err
 181         }
 182         return nil
 183 }
 184
 185 // Try to find a match starting at index whose length is greater than prevSize.
 186 // We only look at chainCount possibilities before giving up.
 187 func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) {
 188         minMatchLook := maxMatchLength
 189         if lookahead < minMatchLook {
 190                 minMatchLook = lookahead
 191         }
 192
 193         win := d.window[0 : pos+minMatchLook]
 194
 195         // We quit when we get a match that's at least nice long
 196         nice := len(win) - pos
 197         if d.niceMatch < nice {
 198                 nice = d.niceMatch
 199         }
 200
 201         // If we've got a match that's good enough, only look in 1/4 the chain.
 202         tries := d.maxChainLength
 203         length = prevLength
 204         if length >= d.goodMatch {
 205                 tries >>= 2
 206         }
 207
 208         w0 := win[pos]
 209         w1 := win[pos+1]
 210         wEnd := win[pos+length]
 211         minIndex := pos - (d.windowMask + 1)
 212
 213         for i := prevHead; tries > 0; tries-- {
 214                 if w0 == win[i] && w1 == win[i+1] && wEnd == win[i+length] {
 215                         // The hash function ensures that if win[i] and win[i+1] match, win[i+2] matches
 216
 217                         n := 3
 218                         for pos+n < len(win) && win[i+n] == win[pos+n] {
 219                                 n++
 220                         }
 221                         if n > length && (n > 3 || pos-i <= 4096) {
 222                                 length = n
 223                                 offset = pos - i
 224                                 ok = true
 225                                 if n >= nice {
 226                                         // The match is good enough that we don't try to find a better one.
 227                                         break
 228                                 }
 229                                 wEnd = win[pos+n]
 230                         }
 231                 }
 232                 if i == minIndex {
 233                         // hashPrev[i & windowMask] has already been overwritten, so stop now.
 234                         break
 235                 }
 236                 if i = d.hashPrev[i&d.windowMask]; i < minIndex || i < 0 {
 237                         break
 238                 }
 239         }
 240         return
 241 }
 242
 243 func (d *compressor) writeStoredBlock(buf []byte) os.Error {
 244         if d.w.writeStoredHeader(len(buf), false); d.w.err != nil {
 245                 return d.w.err
 246         }
 247         d.w.writeBytes(buf)
 248         return d.w.err
 249 }
 250
 251 func (d *compressor) storedDeflate() os.Error {
 252         buf := make([]byte, maxStoreBlockSize)
 253         for {
 254                 n, err := d.r.Read(buf)
 255                 if n == 0 && err == nil {
 256                         d.sync = true
 257                 }
 258                 if n > 0 || d.sync {
 259                         if err := d.writeStoredBlock(buf[0:n]); err != nil {
 260                                 return err
 261                         }
 262                         if d.sync {
 263                                 d.syncChan <- nil
 264                                 d.sync = false
 265                         }
 266                 }
 267                 if err != nil {
 268                         if err == os.EOF {
 269                                 break
 270                         }
 271                         return err
 272                 }
 273         }
 274         return nil
 275 }
 276
 277 func (d *compressor) doDeflate() (err os.Error) {
 278         // init
 279         d.windowMask = 1<<d.logWindowSize - 1
 280         d.hashHead = make([]int, hashSize)
 281         d.hashPrev = make([]int, 1<<d.logWindowSize)
 282         d.window = make([]byte, 2<<d.logWindowSize)
 283         fillInts(d.hashHead, -1)
 284         tokens := make([]token, maxFlateBlockTokens, maxFlateBlockTokens+1)
 285         l := levels[d.level]
 286         d.goodMatch = l.good
 287         d.niceMatch = l.nice
 288         d.maxChainLength = l.chain
 289         lazyMatch := l.lazy
 290         length := minMatchLength - 1
 291         offset := 0
 292         byteAvailable := false
 293         isFastDeflate := l.fastSkipHashing != 0
 294         index := 0
 295         // run
 296         if index, err = d.fillWindow(index); err != nil {
 297                 return
 298         }
 299         maxOffset := d.windowMask + 1 // (1 << logWindowSize);
 300         // only need to change when you refill the window
 301         windowEnd := d.windowEnd
 302         maxInsertIndex := windowEnd - (minMatchLength - 1)
 303         ti := 0
 304
 305         hash := int(0)
 306         if index < maxInsertIndex {
 307                 hash = int(d.window[index])<<hashShift + int(d.window[index+1])
 308         }
 309         chainHead := -1
 310 Loop:
 311         for {
 312                 if index > windowEnd {
 313                         panic("index > windowEnd")
 314                 }
 315                 lookahead := windowEnd - index
 316                 if lookahead < minMatchLength+maxMatchLength {
 317                         if index, err = d.fillWindow(index); err != nil {
 318                                 return
 319                         }
 320                         windowEnd = d.windowEnd
 321                         if index > windowEnd {
 322                                 panic("index > windowEnd")
 323                         }
 324                         maxInsertIndex = windowEnd - (minMatchLength - 1)
 325                         lookahead = windowEnd - index
 326                         if lookahead == 0 {
 327                                 // Flush current output block if any.
 328                                 if byteAvailable {
 329                                         // There is still one pending token that needs to be flushed
 330                                         tokens[ti] = literalToken(uint32(d.window[index-1]) & 0xFF)
 331                                         ti++
 332                                         byteAvailable = false
 333                                 }
 334                                 if ti > 0 {
 335                                         if err = d.writeBlock(tokens[0:ti], index, false); err != nil {
 336                                                 return
 337                                         }
 338                                         ti = 0
 339                                 }
 340                                 if d.sync {
 341                                         d.w.writeStoredHeader(0, false)
 342                                         d.w.flush()
 343                                         d.syncChan <- d.w.err
 344                                         d.sync = false
 345                                 }
 346
 347                                 // If this was only a sync (not at EOF) keep going.
 348                                 if !d.eof {
 349                                         continue
 350                                 }
 351                                 break Loop
 352                         }
 353                 }
 354                 if index < maxInsertIndex {
 355                         // Update the hash
 356                         hash = (hash<<hashShift + int(d.window[index+2])) & hashMask
 357                         chainHead = d.hashHead[hash]
 358                         d.hashPrev[index&d.windowMask] = chainHead
 359                         d.hashHead[hash] = index
 360                 }
 361                 prevLength := length
 362                 prevOffset := offset
 363                 length = minMatchLength - 1
 364                 offset = 0
 365                 minIndex := index - maxOffset
 366                 if minIndex < 0 {
 367                         minIndex = 0
 368                 }
 369
 370                 if chainHead >= minIndex &&
 371                         (isFastDeflate && lookahead > minMatchLength-1 ||
 372                                 !isFastDeflate && lookahead > prevLength && prevLength < lazyMatch) {
 373                         if newLength, newOffset, ok := d.findMatch(index, chainHead, minMatchLength-1, lookahead); ok {
 374                                 length = newLength
 375                                 offset = newOffset
 376                         }
 377                 }
 378                 if isFastDeflate && length >= minMatchLength ||
 379                         !isFastDeflate && prevLength >= minMatchLength && length <= prevLength {
 380                         // There was a match at the previous step, and the current match is
 381                         // not better. Output the previous match.
 382                         if isFastDeflate {
 383                                 tokens[ti] = matchToken(uint32(length-minMatchLength), uint32(offset-minOffsetSize))
 384                         } else {
 385                                 tokens[ti] = matchToken(uint32(prevLength-minMatchLength), uint32(prevOffset-minOffsetSize))
 386                         }
 387                         ti++
 388                         // Insert in the hash table all strings up to the end of the match.
 389                         // index and index-1 are already inserted. If there is not enough
 390                         // lookahead, the last two strings are not inserted into the hash
 391                         // table.
 392                         if length <= l.fastSkipHashing {
 393                                 var newIndex int
 394                                 if isFastDeflate {
 395                                         newIndex = index + length
 396                                 } else {
 397                                         newIndex = prevLength - 1
 398                                 }
 399                                 for index++; index < newIndex; index++ {
 400                                         if index < maxInsertIndex {
 401                                                 hash = (hash<<hashShift + int(d.window[index+2])) & hashMask
 402                                                 // Get previous value with the same hash.
 403                                                 // Our chain should point to the previous value.
 404                                                 d.hashPrev[index&d.windowMask] = d.hashHead[hash]
 405                                                 // Set the head of the hash chain to us.
 406                                                 d.hashHead[hash] = index
 407                                         }
 408                                 }
 409                                 if !isFastDeflate {
 410                                         byteAvailable = false
 411                                         length = minMatchLength - 1
 412                                 }
 413                         } else {
 414                                 // For matches this long, we don't bother inserting each individual
 415                                 // item into the table.
 416                                 index += length
 417                                 hash = (int(d.window[index])<<hashShift + int(d.window[index+1]))
 418                         }
 419                         if ti == maxFlateBlockTokens {
 420                                 // The block includes the current character
 421                                 if err = d.writeBlock(tokens, index, false); err != nil {
 422                                         return
 423                                 }
 424                                 ti = 0
 425                         }
 426                 } else {
 427                         if isFastDeflate || byteAvailable {
 428                                 i := index - 1
 429                                 if isFastDeflate {
 430                                         i = index
 431                                 }
 432                                 tokens[ti] = literalToken(uint32(d.window[i]) & 0xFF)
 433                                 ti++
 434                                 if ti == maxFlateBlockTokens {
 435                                         if err = d.writeBlock(tokens, i+1, false); err != nil {
 436                                                 return
 437                                         }
 438                                         ti = 0
 439                                 }
 440                         }
 441                         index++
 442                         if !isFastDeflate {
 443                                 byteAvailable = true
 444                         }
 445                 }
 446         }
 447         return
 448 }
 449
 450 func (d *compressor) compress(r io.Reader, w io.Writer, level int, logWindowSize uint) (err os.Error) {
 451         d.r = r
 452         d.w = newHuffmanBitWriter(w)
 453         d.level = level
 454         d.logWindowSize = logWindowSize
 455
 456         switch {
 457         case level == NoCompression:
 458                 err = d.storedDeflate()
 459         case level == DefaultCompression:
 460                 d.level = 6
 461                 fallthrough
 462         case 1 <= level && level <= 9:
 463                 err = d.doDeflate()
 464         default:
 465                 return WrongValueError{"level", 0, 9, int32(level)}
 466         }
 467
 468         if d.sync {
 469                 d.syncChan <- err
 470                 d.sync = false
 471         }
 472         if err != nil {
 473                 return err
 474         }
 475         if d.w.writeStoredHeader(0, true); d.w.err != nil {
 476                 return d.w.err
 477         }
 478         return d.flush()
 479 }
 480
 481 // NewWriter returns a new Writer compressing
 482 // data at the given level.  Following zlib, levels
 483 // range from 1 (BestSpeed) to 9 (BestCompression);
 484 // higher levels typically run slower but compress more.
 485 // Level 0 (NoCompression) does not attempt any
 486 // compression; it only adds the necessary DEFLATE framing.
 487 func NewWriter(w io.Writer, level int) *Writer {
 488         const logWindowSize = logMaxOffsetSize
 489         var d compressor
 490         d.syncChan = make(chan os.Error, 1)
 491         pr, pw := syncPipe()
 492         go func() {
 493                 err := d.compress(pr, w, level, logWindowSize)
 494                 pr.CloseWithError(err)
 495         }()
 496         return &Writer{pw, &d}
 497 }
 498
 499 // NewWriterDict is like NewWriter but initializes the new
 500 // Writer with a preset dictionary.  The returned Writer behaves
 501 // as if the dictionary had been written to it without producing
 502 // any compressed output.  The compressed data written to w
 503 // can only be decompressed by a Reader initialized with the
 504 // same dictionary.
 505 func NewWriterDict(w io.Writer, level int, dict []byte) *Writer {
 506         dw := &dictWriter{w, false}
 507         zw := NewWriter(dw, level)
 508         zw.Write(dict)
 509         zw.Flush()
 510         dw.enabled = true
 511         return zw
 512 }
 513
 514 type dictWriter struct {
 515         w       io.Writer
 516         enabled bool
 517 }
 518
 519 func (w *dictWriter) Write(b []byte) (n int, err os.Error) {
 520         if w.enabled {
 521                 return w.w.Write(b)
 522         }
 523         return len(b), nil
 524 }
 525
 526 // A Writer takes data written to it and writes the compressed
 527 // form of that data to an underlying writer (see NewWriter).
 528 type Writer struct {
 529         w *syncPipeWriter
 530         d *compressor
 531 }
 532
 533 // Write writes data to w, which will eventually write the
 534 // compressed form of data to its underlying writer.
 535 func (w *Writer) Write(data []byte) (n int, err os.Error) {
 536         if len(data) == 0 {
 537                 // no point, and nil interferes with sync
 538                 return
 539         }
 540         return w.w.Write(data)
 541 }
 542
 543 // Flush flushes any pending compressed data to the underlying writer.
 544 // It is useful mainly in compressed network protocols, to ensure that
 545 // a remote reader has enough data to reconstruct a packet.
 546 // Flush does not return until the data has been written.
 547 // If the underlying writer returns an error, Flush returns that error.
 548 //
 549 // In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH.
 550 func (w *Writer) Flush() os.Error {
 551         // For more about flushing:
 552         // http://www.bolet.org/~pornin/deflate-flush.html
 553         if w.d.sync {
 554                 panic("compress/flate: double Flush")
 555         }
 556         _, err := w.w.Write(nil)
 557         err1 := <-w.d.syncChan
 558         if err == nil {
 559                 err = err1
 560         }
 561         return err
 562 }
 563
 564 // Close flushes and closes the writer.
 565 func (w *Writer) Close() os.Error {
 566         return w.w.Close()
 567 }