1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
18 DefaultCompression = -1
19 logMaxOffsetSize = 15 // Standard DEFLATE
20 wideLogMaxOffsetSize = 22 // Wide DEFLATE
21 minMatchLength = 3 // The smallest match that the compressor looks for
22 maxMatchLength = 258 // The longest match for the compressor
23 minOffsetSize = 1 // The shortest offset that makes any sence
25 // The maximum number of tokens we put into a single flat block, just too
26 // stop things from getting too large.
27 maxFlateBlockTokens = 1 << 14
28 maxStoreBlockSize = 65535
30 hashSize = 1 << hashBits
31 hashMask = (1 << hashBits) - 1
32 hashShift = (hashBits + minMatchLength - 1) / minMatchLength
35 type syncPipeReader struct {
40 func (sr *syncPipeReader) CloseWithError(err os.Error) os.Error {
41 retErr := sr.PipeReader.CloseWithError(err)
42 sr.closeChan <- true // finish writer close
46 type syncPipeWriter struct {
51 type compressionLevel struct {
52 good, lazy, nice, chain, fastSkipHashing int
55 var levels = []compressionLevel{
57 // For levels 1-3 we don't bother trying with lazy matches
61 // Levels 4-9 use increasingly more lazy matching
62 // and increasingly stringent conditions for "good enough".
63 {4, 4, 16, 16, math.MaxInt32},
64 {8, 16, 32, 32, math.MaxInt32},
65 {8, 16, 128, 128, math.MaxInt32},
66 {8, 32, 128, 256, math.MaxInt32},
67 {32, 128, 258, 1024, math.MaxInt32},
68 {32, 258, 258, 4096, math.MaxInt32},
71 func (sw *syncPipeWriter) Close() os.Error {
72 err := sw.PipeWriter.Close()
73 <-sw.closeChan // wait for reader close
77 func syncPipe() (*syncPipeReader, *syncPipeWriter) {
79 sr := &syncPipeReader{r, make(chan bool, 1)}
80 sw := &syncPipeWriter{w, sr.closeChan}
84 type compressor struct {
89 // (1 << logWindowSize) - 1.
92 eof bool // has eof been reached on input?
93 sync bool // writer wants to flush
94 syncChan chan os.Error
96 // hashHead[hashValue] contains the largest inputIndex with the specified hash value
99 // If hashHead[hashValue] is within the current window, then
100 // hashPrev[hashHead[hashValue] & windowMask] contains the previous index
101 // with the same hash value.
104 // If we find a match of length >= niceMatch, then we don't bother searching
108 // If we find a match of length >= goodMatch, we only do a half-hearted
109 // effort at doing lazy matching starting at the next character
112 // The maximum number of chains we look at when finding a match
115 // The sliding window we use for matching
118 // The index just past the last valid character
121 // index in "window" at which current block starts
125 func (d *compressor) flush() os.Error {
130 func (d *compressor) fillWindow(index int) (int, os.Error) {
134 wSize := d.windowMask + 1
135 if index >= wSize+wSize-(minMatchLength+maxMatchLength) {
136 // shift the window by wSize
137 copy(d.window, d.window[wSize:2*wSize])
140 if d.blockStart >= wSize {
141 d.blockStart -= wSize
143 d.blockStart = math.MaxInt32
145 for i, h := range d.hashHead {
152 for i, h := range d.hashPrev {
160 count, err := d.r.Read(d.window[d.windowEnd:])
162 if count == 0 && err == nil {
172 func (d *compressor) writeBlock(tokens []token, index int, eof bool) os.Error {
173 if index > 0 || eof {
175 if d.blockStart <= index {
176 window = d.window[d.blockStart:index]
179 d.w.writeBlock(tokens, eof, window)
185 // Try to find a match starting at index whose length is greater than prevSize.
186 // We only look at chainCount possibilities before giving up.
187 func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) {
188 minMatchLook := maxMatchLength
189 if lookahead < minMatchLook {
190 minMatchLook = lookahead
193 win := d.window[0 : pos+minMatchLook]
195 // We quit when we get a match that's at least nice long
196 nice := len(win) - pos
197 if d.niceMatch < nice {
201 // If we've got a match that's good enough, only look in 1/4 the chain.
202 tries := d.maxChainLength
204 if length >= d.goodMatch {
210 wEnd := win[pos+length]
211 minIndex := pos - (d.windowMask + 1)
213 for i := prevHead; tries > 0; tries-- {
214 if w0 == win[i] && w1 == win[i+1] && wEnd == win[i+length] {
215 // The hash function ensures that if win[i] and win[i+1] match, win[i+2] matches
218 for pos+n < len(win) && win[i+n] == win[pos+n] {
221 if n > length && (n > 3 || pos-i <= 4096) {
226 // The match is good enough that we don't try to find a better one.
233 // hashPrev[i & windowMask] has already been overwritten, so stop now.
236 if i = d.hashPrev[i&d.windowMask]; i < minIndex || i < 0 {
243 func (d *compressor) writeStoredBlock(buf []byte) os.Error {
244 if d.w.writeStoredHeader(len(buf), false); d.w.err != nil {
251 func (d *compressor) storedDeflate() os.Error {
252 buf := make([]byte, maxStoreBlockSize)
254 n, err := d.r.Read(buf)
255 if n == 0 && err == nil {
259 if err := d.writeStoredBlock(buf[0:n]); err != nil {
277 func (d *compressor) doDeflate() (err os.Error) {
279 d.windowMask = 1<<d.logWindowSize - 1
280 d.hashHead = make([]int, hashSize)
281 d.hashPrev = make([]int, 1<<d.logWindowSize)
282 d.window = make([]byte, 2<<d.logWindowSize)
283 fillInts(d.hashHead, -1)
284 tokens := make([]token, maxFlateBlockTokens, maxFlateBlockTokens+1)
288 d.maxChainLength = l.chain
290 length := minMatchLength - 1
292 byteAvailable := false
293 isFastDeflate := l.fastSkipHashing != 0
296 if index, err = d.fillWindow(index); err != nil {
299 maxOffset := d.windowMask + 1 // (1 << logWindowSize);
300 // only need to change when you refill the window
301 windowEnd := d.windowEnd
302 maxInsertIndex := windowEnd - (minMatchLength - 1)
306 if index < maxInsertIndex {
307 hash = int(d.window[index])<<hashShift + int(d.window[index+1])
312 if index > windowEnd {
313 panic("index > windowEnd")
315 lookahead := windowEnd - index
316 if lookahead < minMatchLength+maxMatchLength {
317 if index, err = d.fillWindow(index); err != nil {
320 windowEnd = d.windowEnd
321 if index > windowEnd {
322 panic("index > windowEnd")
324 maxInsertIndex = windowEnd - (minMatchLength - 1)
325 lookahead = windowEnd - index
327 // Flush current output block if any.
329 // There is still one pending token that needs to be flushed
330 tokens[ti] = literalToken(uint32(d.window[index-1]) & 0xFF)
332 byteAvailable = false
335 if err = d.writeBlock(tokens[0:ti], index, false); err != nil {
341 d.w.writeStoredHeader(0, false)
343 d.syncChan <- d.w.err
347 // If this was only a sync (not at EOF) keep going.
354 if index < maxInsertIndex {
356 hash = (hash<<hashShift + int(d.window[index+2])) & hashMask
357 chainHead = d.hashHead[hash]
358 d.hashPrev[index&d.windowMask] = chainHead
359 d.hashHead[hash] = index
363 length = minMatchLength - 1
365 minIndex := index - maxOffset
370 if chainHead >= minIndex &&
371 (isFastDeflate && lookahead > minMatchLength-1 ||
372 !isFastDeflate && lookahead > prevLength && prevLength < lazyMatch) {
373 if newLength, newOffset, ok := d.findMatch(index, chainHead, minMatchLength-1, lookahead); ok {
378 if isFastDeflate && length >= minMatchLength ||
379 !isFastDeflate && prevLength >= minMatchLength && length <= prevLength {
380 // There was a match at the previous step, and the current match is
381 // not better. Output the previous match.
383 tokens[ti] = matchToken(uint32(length-minMatchLength), uint32(offset-minOffsetSize))
385 tokens[ti] = matchToken(uint32(prevLength-minMatchLength), uint32(prevOffset-minOffsetSize))
388 // Insert in the hash table all strings up to the end of the match.
389 // index and index-1 are already inserted. If there is not enough
390 // lookahead, the last two strings are not inserted into the hash
392 if length <= l.fastSkipHashing {
395 newIndex = index + length
397 newIndex = prevLength - 1
399 for index++; index < newIndex; index++ {
400 if index < maxInsertIndex {
401 hash = (hash<<hashShift + int(d.window[index+2])) & hashMask
402 // Get previous value with the same hash.
403 // Our chain should point to the previous value.
404 d.hashPrev[index&d.windowMask] = d.hashHead[hash]
405 // Set the head of the hash chain to us.
406 d.hashHead[hash] = index
410 byteAvailable = false
411 length = minMatchLength - 1
414 // For matches this long, we don't bother inserting each individual
415 // item into the table.
417 hash = (int(d.window[index])<<hashShift + int(d.window[index+1]))
419 if ti == maxFlateBlockTokens {
420 // The block includes the current character
421 if err = d.writeBlock(tokens, index, false); err != nil {
427 if isFastDeflate || byteAvailable {
432 tokens[ti] = literalToken(uint32(d.window[i]) & 0xFF)
434 if ti == maxFlateBlockTokens {
435 if err = d.writeBlock(tokens, i+1, false); err != nil {
450 func (d *compressor) compress(r io.Reader, w io.Writer, level int, logWindowSize uint) (err os.Error) {
452 d.w = newHuffmanBitWriter(w)
454 d.logWindowSize = logWindowSize
457 case level == NoCompression:
458 err = d.storedDeflate()
459 case level == DefaultCompression:
462 case 1 <= level && level <= 9:
465 return WrongValueError{"level", 0, 9, int32(level)}
475 if d.w.writeStoredHeader(0, true); d.w.err != nil {
481 // NewWriter returns a new Writer compressing
482 // data at the given level. Following zlib, levels
483 // range from 1 (BestSpeed) to 9 (BestCompression);
484 // higher levels typically run slower but compress more.
485 // Level 0 (NoCompression) does not attempt any
486 // compression; it only adds the necessary DEFLATE framing.
487 func NewWriter(w io.Writer, level int) *Writer {
488 const logWindowSize = logMaxOffsetSize
490 d.syncChan = make(chan os.Error, 1)
493 err := d.compress(pr, w, level, logWindowSize)
494 pr.CloseWithError(err)
496 return &Writer{pw, &d}
499 // NewWriterDict is like NewWriter but initializes the new
500 // Writer with a preset dictionary. The returned Writer behaves
501 // as if the dictionary had been written to it without producing
502 // any compressed output. The compressed data written to w
503 // can only be decompressed by a Reader initialized with the
505 func NewWriterDict(w io.Writer, level int, dict []byte) *Writer {
506 dw := &dictWriter{w, false}
507 zw := NewWriter(dw, level)
514 type dictWriter struct {
519 func (w *dictWriter) Write(b []byte) (n int, err os.Error) {
526 // A Writer takes data written to it and writes the compressed
527 // form of that data to an underlying writer (see NewWriter).
533 // Write writes data to w, which will eventually write the
534 // compressed form of data to its underlying writer.
535 func (w *Writer) Write(data []byte) (n int, err os.Error) {
537 // no point, and nil interferes with sync
540 return w.w.Write(data)
543 // Flush flushes any pending compressed data to the underlying writer.
544 // It is useful mainly in compressed network protocols, to ensure that
545 // a remote reader has enough data to reconstruct a packet.
546 // Flush does not return until the data has been written.
547 // If the underlying writer returns an error, Flush returns that error.
549 // In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH.
550 func (w *Writer) Flush() os.Error {
551 // For more about flushing:
552 // http://www.bolet.org/~pornin/deflate-flush.html
554 panic("compress/flate: double Flush")
556 _, err := w.w.Write(nil)
557 err1 := <-w.d.syncChan
564 // Close flushes and closes the writer.
565 func (w *Writer) Close() os.Error {