OSDN Git Service

Update to current version of Go library.
[pf3gnuchains/gcc-fork.git] / libgo / go / compress / flate / deflate.go
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package flate
6
7 import (
8         "io"
9         "math"
10         "os"
11 )
12
13 const (
14         NoCompression        = 0
15         BestSpeed            = 1
16         fastCompression      = 3
17         BestCompression      = 9
18         DefaultCompression   = -1
19         logMaxOffsetSize     = 15  // Standard DEFLATE
20         wideLogMaxOffsetSize = 22  // Wide DEFLATE
21         minMatchLength       = 3   // The smallest match that the compressor looks for
22         maxMatchLength       = 258 // The longest match for the compressor
23         minOffsetSize        = 1   // The shortest offset that makes any sence
24
25         // The maximum number of tokens we put into a single flat block, just too
26         // stop things from getting too large.
27         maxFlateBlockTokens = 1 << 14
28         maxStoreBlockSize   = 65535
29         hashBits            = 15
30         hashSize            = 1 << hashBits
31         hashMask            = (1 << hashBits) - 1
32         hashShift           = (hashBits + minMatchLength - 1) / minMatchLength
33 )
34
35 type syncPipeReader struct {
36         *io.PipeReader
37         closeChan chan bool
38 }
39
40 func (sr *syncPipeReader) CloseWithError(err os.Error) os.Error {
41         retErr := sr.PipeReader.CloseWithError(err)
42         sr.closeChan <- true // finish writer close
43         return retErr
44 }
45
46 type syncPipeWriter struct {
47         *io.PipeWriter
48         closeChan chan bool
49 }
50
51 type compressionLevel struct {
52         good, lazy, nice, chain, fastSkipHashing int
53 }
54
55 var levels = []compressionLevel{
56         {}, // 0
57         // For levels 1-3 we don't bother trying with lazy matches
58         {3, 0, 8, 4, 4},
59         {3, 0, 16, 8, 5},
60         {3, 0, 32, 32, 6},
61         // Levels 4-9 use increasingly more lazy matching
62         // and increasingly stringent conditions for "good enough".
63         {4, 4, 16, 16, math.MaxInt32},
64         {8, 16, 32, 32, math.MaxInt32},
65         {8, 16, 128, 128, math.MaxInt32},
66         {8, 32, 128, 256, math.MaxInt32},
67         {32, 128, 258, 1024, math.MaxInt32},
68         {32, 258, 258, 4096, math.MaxInt32},
69 }
70
71 func (sw *syncPipeWriter) Close() os.Error {
72         err := sw.PipeWriter.Close()
73         <-sw.closeChan // wait for reader close
74         return err
75 }
76
77 func syncPipe() (*syncPipeReader, *syncPipeWriter) {
78         r, w := io.Pipe()
79         sr := &syncPipeReader{r, make(chan bool, 1)}
80         sw := &syncPipeWriter{w, sr.closeChan}
81         return sr, sw
82 }
83
84 type compressor struct {
85         level         int
86         logWindowSize uint
87         w             *huffmanBitWriter
88         r             io.Reader
89         // (1 << logWindowSize) - 1.
90         windowMask int
91
92         eof      bool // has eof been reached on input?
93         sync     bool // writer wants to flush
94         syncChan chan os.Error
95
96         // hashHead[hashValue] contains the largest inputIndex with the specified hash value
97         hashHead []int
98
99         // If hashHead[hashValue] is within the current window, then
100         // hashPrev[hashHead[hashValue] & windowMask] contains the previous index
101         // with the same hash value.
102         hashPrev []int
103
104         // If we find a match of length >= niceMatch, then we don't bother searching
105         // any further.
106         niceMatch int
107
108         // If we find a match of length >= goodMatch, we only do a half-hearted
109         // effort at doing lazy matching starting at the next character
110         goodMatch int
111
112         // The maximum number of chains we look at when finding a match
113         maxChainLength int
114
115         // The sliding window we use for matching
116         window []byte
117
118         // The index just past the last valid character
119         windowEnd int
120
121         // index in "window" at which current block starts
122         blockStart int
123 }
124
125 func (d *compressor) flush() os.Error {
126         d.w.flush()
127         return d.w.err
128 }
129
130 func (d *compressor) fillWindow(index int) (int, os.Error) {
131         if d.sync {
132                 return index, nil
133         }
134         wSize := d.windowMask + 1
135         if index >= wSize+wSize-(minMatchLength+maxMatchLength) {
136                 // shift the window by wSize
137                 copy(d.window, d.window[wSize:2*wSize])
138                 index -= wSize
139                 d.windowEnd -= wSize
140                 if d.blockStart >= wSize {
141                         d.blockStart -= wSize
142                 } else {
143                         d.blockStart = math.MaxInt32
144                 }
145                 for i, h := range d.hashHead {
146                         v := h - wSize
147                         if v < -1 {
148                                 v = -1
149                         }
150                         d.hashHead[i] = v
151                 }
152                 for i, h := range d.hashPrev {
153                         v := -h - wSize
154                         if v < -1 {
155                                 v = -1
156                         }
157                         d.hashPrev[i] = v
158                 }
159         }
160         count, err := d.r.Read(d.window[d.windowEnd:])
161         d.windowEnd += count
162         if count == 0 && err == nil {
163                 d.sync = true
164         }
165         if err == os.EOF {
166                 d.eof = true
167                 err = nil
168         }
169         return index, err
170 }
171
172 func (d *compressor) writeBlock(tokens []token, index int, eof bool) os.Error {
173         if index > 0 || eof {
174                 var window []byte
175                 if d.blockStart <= index {
176                         window = d.window[d.blockStart:index]
177                 }
178                 d.blockStart = index
179                 d.w.writeBlock(tokens, eof, window)
180                 return d.w.err
181         }
182         return nil
183 }
184
185 // Try to find a match starting at index whose length is greater than prevSize.
186 // We only look at chainCount possibilities before giving up.
187 func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) {
188         minMatchLook := maxMatchLength
189         if lookahead < minMatchLook {
190                 minMatchLook = lookahead
191         }
192
193         win := d.window[0 : pos+minMatchLook]
194
195         // We quit when we get a match that's at least nice long
196         nice := len(win) - pos
197         if d.niceMatch < nice {
198                 nice = d.niceMatch
199         }
200
201         // If we've got a match that's good enough, only look in 1/4 the chain.
202         tries := d.maxChainLength
203         length = prevLength
204         if length >= d.goodMatch {
205                 tries >>= 2
206         }
207
208         w0 := win[pos]
209         w1 := win[pos+1]
210         wEnd := win[pos+length]
211         minIndex := pos - (d.windowMask + 1)
212
213         for i := prevHead; tries > 0; tries-- {
214                 if w0 == win[i] && w1 == win[i+1] && wEnd == win[i+length] {
215                         // The hash function ensures that if win[i] and win[i+1] match, win[i+2] matches
216
217                         n := 3
218                         for pos+n < len(win) && win[i+n] == win[pos+n] {
219                                 n++
220                         }
221                         if n > length && (n > 3 || pos-i <= 4096) {
222                                 length = n
223                                 offset = pos - i
224                                 ok = true
225                                 if n >= nice {
226                                         // The match is good enough that we don't try to find a better one.
227                                         break
228                                 }
229                                 wEnd = win[pos+n]
230                         }
231                 }
232                 if i == minIndex {
233                         // hashPrev[i & windowMask] has already been overwritten, so stop now.
234                         break
235                 }
236                 if i = d.hashPrev[i&d.windowMask]; i < minIndex || i < 0 {
237                         break
238                 }
239         }
240         return
241 }
242
243 func (d *compressor) writeStoredBlock(buf []byte) os.Error {
244         if d.w.writeStoredHeader(len(buf), false); d.w.err != nil {
245                 return d.w.err
246         }
247         d.w.writeBytes(buf)
248         return d.w.err
249 }
250
251 func (d *compressor) storedDeflate() os.Error {
252         buf := make([]byte, maxStoreBlockSize)
253         for {
254                 n, err := d.r.Read(buf)
255                 if n == 0 && err == nil {
256                         d.sync = true
257                 }
258                 if n > 0 || d.sync {
259                         if err := d.writeStoredBlock(buf[0:n]); err != nil {
260                                 return err
261                         }
262                         if d.sync {
263                                 d.syncChan <- nil
264                                 d.sync = false
265                         }
266                 }
267                 if err != nil {
268                         if err == os.EOF {
269                                 break
270                         }
271                         return err
272                 }
273         }
274         return nil
275 }
276
277 func (d *compressor) doDeflate() (err os.Error) {
278         // init
279         d.windowMask = 1<<d.logWindowSize - 1
280         d.hashHead = make([]int, hashSize)
281         d.hashPrev = make([]int, 1<<d.logWindowSize)
282         d.window = make([]byte, 2<<d.logWindowSize)
283         fillInts(d.hashHead, -1)
284         tokens := make([]token, maxFlateBlockTokens, maxFlateBlockTokens+1)
285         l := levels[d.level]
286         d.goodMatch = l.good
287         d.niceMatch = l.nice
288         d.maxChainLength = l.chain
289         lazyMatch := l.lazy
290         length := minMatchLength - 1
291         offset := 0
292         byteAvailable := false
293         isFastDeflate := l.fastSkipHashing != 0
294         index := 0
295         // run
296         if index, err = d.fillWindow(index); err != nil {
297                 return
298         }
299         maxOffset := d.windowMask + 1 // (1 << logWindowSize);
300         // only need to change when you refill the window
301         windowEnd := d.windowEnd
302         maxInsertIndex := windowEnd - (minMatchLength - 1)
303         ti := 0
304
305         hash := int(0)
306         if index < maxInsertIndex {
307                 hash = int(d.window[index])<<hashShift + int(d.window[index+1])
308         }
309         chainHead := -1
310 Loop:
311         for {
312                 if index > windowEnd {
313                         panic("index > windowEnd")
314                 }
315                 lookahead := windowEnd - index
316                 if lookahead < minMatchLength+maxMatchLength {
317                         if index, err = d.fillWindow(index); err != nil {
318                                 return
319                         }
320                         windowEnd = d.windowEnd
321                         if index > windowEnd {
322                                 panic("index > windowEnd")
323                         }
324                         maxInsertIndex = windowEnd - (minMatchLength - 1)
325                         lookahead = windowEnd - index
326                         if lookahead == 0 {
327                                 // Flush current output block if any.
328                                 if byteAvailable {
329                                         // There is still one pending token that needs to be flushed
330                                         tokens[ti] = literalToken(uint32(d.window[index-1]) & 0xFF)
331                                         ti++
332                                         byteAvailable = false
333                                 }
334                                 if ti > 0 {
335                                         if err = d.writeBlock(tokens[0:ti], index, false); err != nil {
336                                                 return
337                                         }
338                                         ti = 0
339                                 }
340                                 if d.sync {
341                                         d.w.writeStoredHeader(0, false)
342                                         d.w.flush()
343                                         d.syncChan <- d.w.err
344                                         d.sync = false
345                                 }
346
347                                 // If this was only a sync (not at EOF) keep going.
348                                 if !d.eof {
349                                         continue
350                                 }
351                                 break Loop
352                         }
353                 }
354                 if index < maxInsertIndex {
355                         // Update the hash
356                         hash = (hash<<hashShift + int(d.window[index+2])) & hashMask
357                         chainHead = d.hashHead[hash]
358                         d.hashPrev[index&d.windowMask] = chainHead
359                         d.hashHead[hash] = index
360                 }
361                 prevLength := length
362                 prevOffset := offset
363                 length = minMatchLength - 1
364                 offset = 0
365                 minIndex := index - maxOffset
366                 if minIndex < 0 {
367                         minIndex = 0
368                 }
369
370                 if chainHead >= minIndex &&
371                         (isFastDeflate && lookahead > minMatchLength-1 ||
372                                 !isFastDeflate && lookahead > prevLength && prevLength < lazyMatch) {
373                         if newLength, newOffset, ok := d.findMatch(index, chainHead, minMatchLength-1, lookahead); ok {
374                                 length = newLength
375                                 offset = newOffset
376                         }
377                 }
378                 if isFastDeflate && length >= minMatchLength ||
379                         !isFastDeflate && prevLength >= minMatchLength && length <= prevLength {
380                         // There was a match at the previous step, and the current match is
381                         // not better. Output the previous match.
382                         if isFastDeflate {
383                                 tokens[ti] = matchToken(uint32(length-minMatchLength), uint32(offset-minOffsetSize))
384                         } else {
385                                 tokens[ti] = matchToken(uint32(prevLength-minMatchLength), uint32(prevOffset-minOffsetSize))
386                         }
387                         ti++
388                         // Insert in the hash table all strings up to the end of the match.
389                         // index and index-1 are already inserted. If there is not enough
390                         // lookahead, the last two strings are not inserted into the hash
391                         // table.
392                         if length <= l.fastSkipHashing {
393                                 var newIndex int
394                                 if isFastDeflate {
395                                         newIndex = index + length
396                                 } else {
397                                         newIndex = prevLength - 1
398                                 }
399                                 for index++; index < newIndex; index++ {
400                                         if index < maxInsertIndex {
401                                                 hash = (hash<<hashShift + int(d.window[index+2])) & hashMask
402                                                 // Get previous value with the same hash.
403                                                 // Our chain should point to the previous value.
404                                                 d.hashPrev[index&d.windowMask] = d.hashHead[hash]
405                                                 // Set the head of the hash chain to us.
406                                                 d.hashHead[hash] = index
407                                         }
408                                 }
409                                 if !isFastDeflate {
410                                         byteAvailable = false
411                                         length = minMatchLength - 1
412                                 }
413                         } else {
414                                 // For matches this long, we don't bother inserting each individual
415                                 // item into the table.
416                                 index += length
417                                 hash = (int(d.window[index])<<hashShift + int(d.window[index+1]))
418                         }
419                         if ti == maxFlateBlockTokens {
420                                 // The block includes the current character
421                                 if err = d.writeBlock(tokens, index, false); err != nil {
422                                         return
423                                 }
424                                 ti = 0
425                         }
426                 } else {
427                         if isFastDeflate || byteAvailable {
428                                 i := index - 1
429                                 if isFastDeflate {
430                                         i = index
431                                 }
432                                 tokens[ti] = literalToken(uint32(d.window[i]) & 0xFF)
433                                 ti++
434                                 if ti == maxFlateBlockTokens {
435                                         if err = d.writeBlock(tokens, i+1, false); err != nil {
436                                                 return
437                                         }
438                                         ti = 0
439                                 }
440                         }
441                         index++
442                         if !isFastDeflate {
443                                 byteAvailable = true
444                         }
445                 }
446         }
447         return
448 }
449
450 func (d *compressor) compress(r io.Reader, w io.Writer, level int, logWindowSize uint) (err os.Error) {
451         d.r = r
452         d.w = newHuffmanBitWriter(w)
453         d.level = level
454         d.logWindowSize = logWindowSize
455
456         switch {
457         case level == NoCompression:
458                 err = d.storedDeflate()
459         case level == DefaultCompression:
460                 d.level = 6
461                 fallthrough
462         case 1 <= level && level <= 9:
463                 err = d.doDeflate()
464         default:
465                 return WrongValueError{"level", 0, 9, int32(level)}
466         }
467
468         if d.sync {
469                 d.syncChan <- err
470                 d.sync = false
471         }
472         if err != nil {
473                 return err
474         }
475         if d.w.writeStoredHeader(0, true); d.w.err != nil {
476                 return d.w.err
477         }
478         return d.flush()
479 }
480
481 // NewWriter returns a new Writer compressing
482 // data at the given level.  Following zlib, levels
483 // range from 1 (BestSpeed) to 9 (BestCompression);
484 // higher levels typically run slower but compress more.
485 // Level 0 (NoCompression) does not attempt any
486 // compression; it only adds the necessary DEFLATE framing.
487 func NewWriter(w io.Writer, level int) *Writer {
488         const logWindowSize = logMaxOffsetSize
489         var d compressor
490         d.syncChan = make(chan os.Error, 1)
491         pr, pw := syncPipe()
492         go func() {
493                 err := d.compress(pr, w, level, logWindowSize)
494                 pr.CloseWithError(err)
495         }()
496         return &Writer{pw, &d}
497 }
498
499 // NewWriterDict is like NewWriter but initializes the new
500 // Writer with a preset dictionary.  The returned Writer behaves
501 // as if the dictionary had been written to it without producing
502 // any compressed output.  The compressed data written to w
503 // can only be decompressed by a Reader initialized with the
504 // same dictionary.
505 func NewWriterDict(w io.Writer, level int, dict []byte) *Writer {
506         dw := &dictWriter{w, false}
507         zw := NewWriter(dw, level)
508         zw.Write(dict)
509         zw.Flush()
510         dw.enabled = true
511         return zw
512 }
513
514 type dictWriter struct {
515         w       io.Writer
516         enabled bool
517 }
518
519 func (w *dictWriter) Write(b []byte) (n int, err os.Error) {
520         if w.enabled {
521                 return w.w.Write(b)
522         }
523         return len(b), nil
524 }
525
526 // A Writer takes data written to it and writes the compressed
527 // form of that data to an underlying writer (see NewWriter).
528 type Writer struct {
529         w *syncPipeWriter
530         d *compressor
531 }
532
533 // Write writes data to w, which will eventually write the
534 // compressed form of data to its underlying writer.
535 func (w *Writer) Write(data []byte) (n int, err os.Error) {
536         if len(data) == 0 {
537                 // no point, and nil interferes with sync
538                 return
539         }
540         return w.w.Write(data)
541 }
542
543 // Flush flushes any pending compressed data to the underlying writer.
544 // It is useful mainly in compressed network protocols, to ensure that
545 // a remote reader has enough data to reconstruct a packet.
546 // Flush does not return until the data has been written.
547 // If the underlying writer returns an error, Flush returns that error.
548 //
549 // In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH.
550 func (w *Writer) Flush() os.Error {
551         // For more about flushing:
552         // http://www.bolet.org/~pornin/deflate-flush.html
553         if w.d.sync {
554                 panic("compress/flate: double Flush")
555         }
556         _, err := w.w.Write(nil)
557         err1 := <-w.d.syncChan
558         if err == nil {
559                 err = err1
560         }
561         return err
562 }
563
564 // Close flushes and closes the writer.
565 func (w *Writer) Close() os.Error {
566         return w.w.Close()
567 }