1 /* Inflater.java - Decompress a data stream
2 Copyright (C) 1999, 2000, 2001, 2003, 2005 Free Software Foundation, Inc.
4 This file is part of GNU Classpath.
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library. Thus, the terms and
23 conditions of the GNU General Public License cover the whole
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module. An independent module is a module which is not derived from
33 or based on this library. If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so. If you do not wish to do so, delete this
36 exception statement from your version. */
38 package java.util.zip;
40 /* Written using on-line Java Platform 1.2 API Specification
42 * Believed complete and correct.
46 * Inflater is used to decompress data that has been compressed according
47 * to the "deflate" standard described in rfc1950.
49 * The usage is as following. First you have to set some input with
50 * <code>setInput()</code>, then inflate() it. If inflate doesn't
51 * inflate any bytes there may be three reasons:
53 * <li>needsInput() returns true because the input buffer is empty.
54 * You have to provide more input with <code>setInput()</code>.
55 * NOTE: needsInput() also returns true when, the stream is finished.
57 * <li>needsDictionary() returns true, you have to provide a preset
58 * dictionary with <code>setDictionary()</code>.</li>
59 * <li>finished() returns true, the inflater has finished.</li>
61 * Once the first output byte is produced, a dictionary will not be
62 * needed at a later stage.
64 * @author John Leuner, Jochen Hoenicke
71 /* Copy lengths for literal codes 257..285 */
72 private static final int CPLENS[] =
74 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
75 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258
78 /* Extra bits for literal codes 257..285 */
79 private static final int CPLEXT[] =
81 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
82 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
85 /* Copy offsets for distance codes 0..29 */
86 private static final int CPDIST[] = {
87 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
88 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
89 8193, 12289, 16385, 24577
92 /* Extra bits for distance codes */
93 private static final int CPDEXT[] = {
94 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
95 7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
99 /* This are the state in which the inflater can be. */
100 private static final int DECODE_HEADER = 0;
101 private static final int DECODE_DICT = 1;
102 private static final int DECODE_BLOCKS = 2;
103 private static final int DECODE_STORED_LEN1 = 3;
104 private static final int DECODE_STORED_LEN2 = 4;
105 private static final int DECODE_STORED = 5;
106 private static final int DECODE_DYN_HEADER = 6;
107 private static final int DECODE_HUFFMAN = 7;
108 private static final int DECODE_HUFFMAN_LENBITS = 8;
109 private static final int DECODE_HUFFMAN_DIST = 9;
110 private static final int DECODE_HUFFMAN_DISTBITS = 10;
111 private static final int DECODE_CHKSUM = 11;
112 private static final int FINISHED = 12;
114 /** This variable contains the current state. */
118 * The adler checksum of the dictionary or of the decompressed
119 * stream, as it is written in the header resp. footer of the
120 * compressed stream. <br>
122 * Only valid if mode is DECODE_DICT or DECODE_CHKSUM.
124 private int readAdler;
126 * The number of bits needed to complete the current state. This
127 * is valid, if mode is DECODE_DICT, DECODE_CHKSUM,
128 * DECODE_HUFFMAN_LENBITS or DECODE_HUFFMAN_DISTBITS.
130 private int neededBits;
131 private int repLength, repDist;
132 private int uncomprLen;
134 * True, if the last block flag was set in the last block of the
135 * inflated stream. This means that the stream ends after the
138 private boolean isLastBlock;
141 * The total number of inflated bytes.
143 private int totalOut;
145 * The total number of bytes set with setInput(). This is not the
146 * value returned by getTotalIn(), since this also includes the
151 * This variable stores the nowrap flag that was given to the constructor.
152 * True means, that the inflated stream doesn't contain a header nor the
153 * checksum in the footer.
155 private boolean nowrap;
157 private StreamManipulator input;
158 private OutputWindow outputWindow;
159 private InflaterDynHeader dynHeader;
160 private InflaterHuffmanTree litlenTree, distTree;
161 private Adler32 adler;
164 * Creates a new inflater.
172 * Creates a new inflater.
173 * @param nowrap true if no header and checksum field appears in the
174 * stream. This is used for GZIPed input. For compatibility with
175 * Sun JDK you should provide one byte of input more than needed in
178 public Inflater (boolean nowrap)
180 this.nowrap = nowrap;
181 this.adler = new Adler32();
182 input = new StreamManipulator();
183 outputWindow = new OutputWindow();
184 mode = nowrap ? DECODE_BLOCKS : DECODE_HEADER;
188 * Finalizes this object.
190 protected void finalize ()
192 /* Exists only for compatibility */
196 * Frees all objects allocated by the inflater. There's no reason
197 * to call this, since you can just rely on garbage collection (even
198 * for the Sun implementation). Exists only for compatibility
199 * with Sun's JDK, where the compressor allocates native memory.
200 * If you call any method (even reset) afterwards the behaviour is
214 * Returns true, if the inflater has finished. This means, that no
215 * input is needed and no output can be produced.
217 public boolean finished()
219 return mode == FINISHED && outputWindow.getAvailable() == 0;
223 * Gets the adler checksum. This is either the checksum of all
224 * uncompressed bytes returned by inflate(), or if needsDictionary()
225 * returns true (and thus no output was yet produced) this is the
226 * adler checksum of the expected dictionary.
227 * @returns the adler checksum.
229 public int getAdler()
231 return needsDictionary() ? readAdler : (int) adler.getValue();
235 * Gets the number of unprocessed input. Useful, if the end of the
236 * stream is reached and you want to further process the bytes after
237 * the deflate stream.
238 * @return the number of bytes of the input which were not processed.
240 public int getRemaining()
242 return input.getAvailableBytes();
246 * Gets the total number of processed compressed input bytes.
247 * @return the total number of bytes of processed input bytes.
249 public int getTotalIn()
251 return totalIn - getRemaining();
255 * Gets the total number of output bytes returned by inflate().
256 * @return the total number of output bytes.
258 public int getTotalOut()
264 * Inflates the compressed stream to the output buffer. If this
265 * returns 0, you should check, whether needsDictionary(),
266 * needsInput() or finished() returns true, to determine why no
267 * further output is produced.
268 * @param buf the output buffer.
269 * @return the number of bytes written to the buffer, 0 if no further
270 * output can be produced.
271 * @exception DataFormatException if deflated stream is invalid.
272 * @exception IllegalArgumentException if buf has length 0.
274 public int inflate (byte[] buf) throws DataFormatException
276 return inflate (buf, 0, buf.length);
280 * Inflates the compressed stream to the output buffer. If this
281 * returns 0, you should check, whether needsDictionary(),
282 * needsInput() or finished() returns true, to determine why no
283 * further output is produced.
284 * @param buf the output buffer.
285 * @param off the offset into buffer where the output should start.
286 * @param len the maximum length of the output.
287 * @return the number of bytes written to the buffer, 0 if no further
288 * output can be produced.
289 * @exception DataFormatException if deflated stream is invalid.
290 * @exception IndexOutOfBoundsException if the off and/or len are wrong.
292 public int inflate (byte[] buf, int off, int len) throws DataFormatException
294 /* Special case: len may be zero */
297 /* Check for correct buff, off, len triple */
298 if (0 > off || off > off + len || off + len > buf.length)
299 throw new ArrayIndexOutOfBoundsException();
304 if (mode != DECODE_CHKSUM)
306 /* Don't give away any output, if we are waiting for the
307 * checksum in the input stream.
309 * With this trick we have always:
310 * needsInput() and not finished()
311 * implies more output can be produced.
313 more = outputWindow.copyOutput(buf, off, len);
314 adler.update(buf, off, more);
323 while (decode() || (outputWindow.getAvailable() > 0
324 && mode != DECODE_CHKSUM));
329 * Returns true, if a preset dictionary is needed to inflate the input.
331 public boolean needsDictionary ()
333 return mode == DECODE_DICT && neededBits == 0;
337 * Returns true, if the input buffer is empty.
338 * You should then call setInput(). <br>
340 * <em>NOTE</em>: This method also returns true when the stream is finished.
342 public boolean needsInput ()
344 return input.needsInput ();
348 * Resets the inflater so that a new stream can be decompressed. All
349 * pending input and output will be discarded.
353 mode = nowrap ? DECODE_BLOCKS : DECODE_HEADER;
354 totalIn = totalOut = 0;
356 outputWindow.reset();
365 * Sets the preset dictionary. This should only be called, if
366 * needsDictionary() returns true and it should set the same
367 * dictionary, that was used for deflating. The getAdler()
368 * function returns the checksum of the dictionary needed.
369 * @param buffer the dictionary.
370 * @exception IllegalStateException if no dictionary is needed.
371 * @exception IllegalArgumentException if the dictionary checksum is
374 public void setDictionary (byte[] buffer)
376 setDictionary(buffer, 0, buffer.length);
380 * Sets the preset dictionary. This should only be called, if
381 * needsDictionary() returns true and it should set the same
382 * dictionary, that was used for deflating. The getAdler()
383 * function returns the checksum of the dictionary needed.
384 * @param buffer the dictionary.
385 * @param off the offset into buffer where the dictionary starts.
386 * @param len the length of the dictionary.
387 * @exception IllegalStateException if no dictionary is needed.
388 * @exception IllegalArgumentException if the dictionary checksum is
390 * @exception IndexOutOfBoundsException if the off and/or len are wrong.
392 public void setDictionary (byte[] buffer, int off, int len)
394 if (!needsDictionary())
395 throw new IllegalStateException();
397 adler.update(buffer, off, len);
398 if ((int) adler.getValue() != readAdler)
399 throw new IllegalArgumentException("Wrong adler checksum");
401 outputWindow.copyDict(buffer, off, len);
402 mode = DECODE_BLOCKS;
406 * Sets the input. This should only be called, if needsInput()
408 * @param buf the input.
409 * @exception IllegalStateException if no input is needed.
411 public void setInput (byte[] buf)
413 setInput (buf, 0, buf.length);
417 * Sets the input. This should only be called, if needsInput()
419 * @param buf the input.
420 * @param off the offset into buffer where the input starts.
421 * @param len the length of the input.
422 * @exception IllegalStateException if no input is needed.
423 * @exception IndexOutOfBoundsException if the off and/or len are wrong.
425 public void setInput (byte[] buf, int off, int len)
427 input.setInput (buf, off, len);
432 * Decodes the deflate header.
433 * @return false if more input is needed.
434 * @exception DataFormatException if header is invalid.
436 private boolean decodeHeader () throws DataFormatException
438 int header = input.peekBits(16);
443 /* The header is written in "wrong" byte order */
444 header = ((header << 8) | (header >> 8)) & 0xffff;
445 if (header % 31 != 0)
446 throw new DataFormatException("Header checksum illegal");
448 if ((header & 0x0f00) != (Deflater.DEFLATED << 8))
449 throw new DataFormatException("Compression Method unknown");
451 /* Maximum size of the backwards window in bits.
452 * We currently ignore this, but we could use it to make the
453 * inflater window more space efficient. On the other hand the
454 * full window (15 bits) is needed most times, anyway.
455 int max_wbits = ((header & 0x7000) >> 12) + 8;
458 if ((header & 0x0020) == 0) // Dictionary flag?
460 mode = DECODE_BLOCKS;
471 * Decodes the dictionary checksum after the deflate header.
472 * @return false if more input is needed.
474 private boolean decodeDict ()
476 while (neededBits > 0)
478 int dictByte = input.peekBits(8);
482 readAdler = (readAdler << 8) | dictByte;
489 * Decodes the huffman encoded symbols in the input stream.
490 * @return false if more input is needed, true if output window is
491 * full or the current block ends.
492 * @exception DataFormatException if deflated stream is invalid.
494 private boolean decodeHuffman () throws DataFormatException
496 int free = outputWindow.getFreeSpace();
503 /* This is the inner loop so it is optimized a bit */
504 while (((symbol = litlenTree.getSymbol(input)) & ~0xff) == 0)
506 outputWindow.write(symbol);
516 /* symbol == 256: end of block */
519 mode = DECODE_BLOCKS;
526 repLength = CPLENS[symbol - 257];
527 neededBits = CPLEXT[symbol - 257];
529 catch (ArrayIndexOutOfBoundsException ex)
531 throw new DataFormatException("Illegal rep length code");
534 case DECODE_HUFFMAN_LENBITS:
537 mode = DECODE_HUFFMAN_LENBITS;
538 int i = input.peekBits(neededBits);
541 input.dropBits(neededBits);
544 mode = DECODE_HUFFMAN_DIST;
546 case DECODE_HUFFMAN_DIST:
547 symbol = distTree.getSymbol(input);
552 repDist = CPDIST[symbol];
553 neededBits = CPDEXT[symbol];
555 catch (ArrayIndexOutOfBoundsException ex)
557 throw new DataFormatException("Illegal rep dist code");
560 case DECODE_HUFFMAN_DISTBITS:
563 mode = DECODE_HUFFMAN_DISTBITS;
564 int i = input.peekBits(neededBits);
567 input.dropBits(neededBits);
570 outputWindow.repeat(repLength, repDist);
572 mode = DECODE_HUFFMAN;
575 throw new IllegalStateException();
582 * Decodes the adler checksum after the deflate stream.
583 * @return false if more input is needed.
584 * @exception DataFormatException if checksum doesn't match.
586 private boolean decodeChksum () throws DataFormatException
588 while (neededBits > 0)
590 int chkByte = input.peekBits(8);
594 readAdler = (readAdler << 8) | chkByte;
597 if ((int) adler.getValue() != readAdler)
598 throw new DataFormatException("Adler chksum doesn't match: "
599 +Integer.toHexString((int)adler.getValue())
600 +" vs. "+Integer.toHexString(readAdler));
606 * Decodes the deflated stream.
607 * @return false if more input is needed, or if finished.
608 * @exception DataFormatException if deflated stream is invalid.
610 private boolean decode () throws DataFormatException
615 return decodeHeader();
619 return decodeChksum();
631 input.skipToByteBoundary();
633 mode = DECODE_CHKSUM;
638 int type = input.peekBits(3);
647 case DeflaterConstants.STORED_BLOCK:
648 input.skipToByteBoundary();
649 mode = DECODE_STORED_LEN1;
651 case DeflaterConstants.STATIC_TREES:
652 litlenTree = InflaterHuffmanTree.defLitLenTree;
653 distTree = InflaterHuffmanTree.defDistTree;
654 mode = DECODE_HUFFMAN;
656 case DeflaterConstants.DYN_TREES:
657 dynHeader = new InflaterDynHeader();
658 mode = DECODE_DYN_HEADER;
661 throw new DataFormatException("Unknown block type "+type);
665 case DECODE_STORED_LEN1:
667 if ((uncomprLen = input.peekBits(16)) < 0)
670 mode = DECODE_STORED_LEN2;
673 case DECODE_STORED_LEN2:
675 int nlen = input.peekBits(16);
679 if (nlen != (uncomprLen ^ 0xffff))
680 throw new DataFormatException("broken uncompressed block");
681 mode = DECODE_STORED;
686 int more = outputWindow.copyStored(input, uncomprLen);
690 mode = DECODE_BLOCKS;
693 return !input.needsInput();
696 case DECODE_DYN_HEADER:
697 if (!dynHeader.decode(input))
699 litlenTree = dynHeader.buildLitLenTree();
700 distTree = dynHeader.buildDistTree();
701 mode = DECODE_HUFFMAN;
704 case DECODE_HUFFMAN_LENBITS:
705 case DECODE_HUFFMAN_DIST:
706 case DECODE_HUFFMAN_DISTBITS:
707 return decodeHuffman();
711 throw new IllegalStateException();