libjava/classpath/java/util/zip/Inflater.java

   1 /* Inflater.java - Decompress a data stream
   2    Copyright (C) 1999, 2000, 2001, 2003, 2005  Free Software Foundation, Inc.
   3
   4 This file is part of GNU Classpath.
   5
   6 GNU Classpath is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2, or (at your option)
   9 any later version.
  10
  11 GNU Classpath is distributed in the hope that it will be useful, but
  12 WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GNU Classpath; see the file COPYING.  If not, write to the
  18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301 USA.
  20
  21 Linking this library statically or dynamically with other modules is
  22 making a combined work based on this library.  Thus, the terms and
  23 conditions of the GNU General Public License cover the whole
  24 combination.
  25
  26 As a special exception, the copyright holders of this library give you
  27 permission to link this library with independent modules to produce an
  28 executable, regardless of the license terms of these independent
  29 modules, and to copy and distribute the resulting executable under
  30 terms of your choice, provided that you also meet, for each linked
  31 independent module, the terms and conditions of the license of that
  32 module.  An independent module is a module which is not derived from
  33 or based on this library.  If you modify this library, you may extend
  34 this exception to your version of the library, but you are not
  35 obligated to do so.  If you do not wish to do so, delete this
  36 exception statement from your version. */
  37
  38 package java.util.zip;
  39
  40 /* Written using on-line Java Platform 1.2 API Specification
  41  * and JCL book.
  42  * Believed complete and correct.
  43  */
  44
  45 /**
  46  * Inflater is used to decompress data that has been compressed according
  47  * to the "deflate" standard described in rfc1950.
  48  *
  49  * The usage is as following.  First you have to set some input with
  50  * <code>setInput()</code>, then inflate() it.  If inflate doesn't
  51  * inflate any bytes there may be three reasons:
  52  * <ul>
  53  * <li>needsInput() returns true because the input buffer is empty.
  54  * You have to provide more input with <code>setInput()</code>.
  55  * NOTE: needsInput() also returns true when, the stream is finished.
  56  * </li>
  57  * <li>needsDictionary() returns true, you have to provide a preset
  58  *     dictionary with <code>setDictionary()</code>.</li>
  59  * <li>finished() returns true, the inflater has finished.</li>
  60  * </ul>
  61  * Once the first output byte is produced, a dictionary will not be
  62  * needed at a later stage.
  63  *
  64  * @author John Leuner, Jochen Hoenicke
  65  * @author Tom Tromey
  66  * @date May 17, 1999
  67  * @since JDK 1.1
  68  */
  69 public class Inflater
  70 {
  71   /* Copy lengths for literal codes 257..285 */
  72   private static final int CPLENS[] =
  73   {
  74     3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
  75     35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258
  76   };
  77
  78   /* Extra bits for literal codes 257..285 */
  79   private static final int CPLEXT[] =
  80   {
  81     0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
  82     3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
  83   };
  84
  85   /* Copy offsets for distance codes 0..29 */
  86   private static final int CPDIST[] = {
  87     1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
  88     257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
  89     8193, 12289, 16385, 24577
  90   };
  91
  92   /* Extra bits for distance codes */
  93   private static final int CPDEXT[] = {
  94     0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
  95     7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
  96     12, 12, 13, 13
  97   };
  98
  99   /* This are the state in which the inflater can be.  */
 100   private static final int DECODE_HEADER           = 0;
 101   private static final int DECODE_DICT             = 1;
 102   private static final int DECODE_BLOCKS           = 2;
 103   private static final int DECODE_STORED_LEN1      = 3;
 104   private static final int DECODE_STORED_LEN2      = 4;
 105   private static final int DECODE_STORED           = 5;
 106   private static final int DECODE_DYN_HEADER       = 6;
 107   private static final int DECODE_HUFFMAN          = 7;
 108   private static final int DECODE_HUFFMAN_LENBITS  = 8;
 109   private static final int DECODE_HUFFMAN_DIST     = 9;
 110   private static final int DECODE_HUFFMAN_DISTBITS = 10;
 111   private static final int DECODE_CHKSUM           = 11;
 112   private static final int FINISHED                = 12;
 113
 114   /** This variable contains the current state. */
 115   private int mode;
 116
 117   /**
 118    * The adler checksum of the dictionary or of the decompressed
 119    * stream, as it is written in the header resp. footer of the
 120    * compressed stream.  <br>
 121    *
 122    * Only valid if mode is DECODE_DICT or DECODE_CHKSUM.
 123    */
 124   private int readAdler;
 125   /**
 126    * The number of bits needed to complete the current state.  This
 127    * is valid, if mode is DECODE_DICT, DECODE_CHKSUM,
 128    * DECODE_HUFFMAN_LENBITS or DECODE_HUFFMAN_DISTBITS.
 129    */
 130   private int neededBits;
 131   private int repLength, repDist;
 132   private int uncomprLen;
 133   /**
 134    * True, if the last block flag was set in the last block of the
 135    * inflated stream.  This means that the stream ends after the
 136    * current block.
 137    */
 138   private boolean isLastBlock;
 139
 140   /**
 141    * The total number of inflated bytes.
 142    */
 143   private int totalOut;
 144   /**
 145    * The total number of bytes set with setInput().  This is not the
 146    * value returned by getTotalIn(), since this also includes the
 147    * unprocessed input.
 148    */
 149   private int totalIn;
 150   /**
 151    * This variable stores the nowrap flag that was given to the constructor.
 152    * True means, that the inflated stream doesn't contain a header nor the
 153    * checksum in the footer.
 154    */
 155   private boolean nowrap;
 156
 157   private StreamManipulator input;
 158   private OutputWindow outputWindow;
 159   private InflaterDynHeader dynHeader;
 160   private InflaterHuffmanTree litlenTree, distTree;
 161   private Adler32 adler;
 162
 163   /**
 164    * Creates a new inflater.
 165    */
 166   public Inflater ()
 167   {
 168     this (false);
 169   }
 170
 171   /**
 172    * Creates a new inflater.
 173    * @param nowrap true if no header and checksum field appears in the
 174    * stream.  This is used for GZIPed input.  For compatibility with
 175    * Sun JDK you should provide one byte of input more than needed in
 176    * this case.
 177    */
 178   public Inflater (boolean nowrap)
 179   {
 180     this.nowrap = nowrap;
 181     this.adler = new Adler32();
 182     input = new StreamManipulator();
 183     outputWindow = new OutputWindow();
 184     mode = nowrap ? DECODE_BLOCKS : DECODE_HEADER;
 185   }
 186
 187   /**
 188    * Finalizes this object.
 189    */
 190   protected void finalize ()
 191   {
 192     /* Exists only for compatibility */
 193   }
 194
 195   /**
 196    * Frees all objects allocated by the inflater.  There's no reason
 197    * to call this, since you can just rely on garbage collection (even
 198    * for the Sun implementation).  Exists only for compatibility
 199    * with Sun's JDK, where the compressor allocates native memory.
 200    * If you call any method (even reset) afterwards the behaviour is
 201    * <i>undefined</i>.
 202    */
 203   public void end ()
 204   {
 205     outputWindow = null;
 206     input = null;
 207     dynHeader = null;
 208     litlenTree = null;
 209     distTree = null;
 210     adler = null;
 211   }
 212
 213   /**
 214    * Returns true, if the inflater has finished.  This means, that no
 215    * input is needed and no output can be produced.
 216    */
 217   public boolean finished()
 218   {
 219     return mode == FINISHED && outputWindow.getAvailable() == 0;
 220   }
 221
 222   /**
 223    * Gets the adler checksum.  This is either the checksum of all
 224    * uncompressed bytes returned by inflate(), or if needsDictionary()
 225    * returns true (and thus no output was yet produced) this is the
 226    * adler checksum of the expected dictionary.
 227    * @returns the adler checksum.
 228    */
 229   public int getAdler()
 230   {
 231     return needsDictionary() ? readAdler : (int) adler.getValue();
 232   }
 233
 234   /**
 235    * Gets the number of unprocessed input.  Useful, if the end of the
 236    * stream is reached and you want to further process the bytes after
 237    * the deflate stream.
 238    * @return the number of bytes of the input which were not processed.
 239    */
 240   public int getRemaining()
 241   {
 242     return input.getAvailableBytes();
 243   }
 244
 245   /**
 246    * Gets the total number of processed compressed input bytes.
 247    * @return the total number of bytes of processed input bytes.
 248    */
 249   public int getTotalIn()
 250   {
 251     return totalIn - getRemaining();
 252   }
 253
 254   /**
 255    * Gets the total number of output bytes returned by inflate().
 256    * @return the total number of output bytes.
 257    */
 258   public int getTotalOut()
 259   {
 260     return totalOut;
 261   }
 262
 263   /**
 264    * Inflates the compressed stream to the output buffer.  If this
 265    * returns 0, you should check, whether needsDictionary(),
 266    * needsInput() or finished() returns true, to determine why no
 267    * further output is produced.
 268    * @param buf the output buffer.
 269    * @return the number of bytes written to the buffer, 0 if no further
 270    * output can be produced.
 271    * @exception DataFormatException if deflated stream is invalid.
 272    * @exception IllegalArgumentException if buf has length 0.
 273    */
 274   public int inflate (byte[] buf) throws DataFormatException
 275   {
 276     return inflate (buf, 0, buf.length);
 277   }
 278
 279   /**
 280    * Inflates the compressed stream to the output buffer.  If this
 281    * returns 0, you should check, whether needsDictionary(),
 282    * needsInput() or finished() returns true, to determine why no
 283    * further output is produced.
 284    * @param buf the output buffer.
 285    * @param off the offset into buffer where the output should start.
 286    * @param len the maximum length of the output.
 287    * @return the number of bytes written to the buffer, 0 if no further
 288    * output can be produced.
 289    * @exception DataFormatException if deflated stream is invalid.
 290    * @exception IndexOutOfBoundsException if the off and/or len are wrong.
 291    */
 292   public int inflate (byte[] buf, int off, int len) throws DataFormatException
 293   {
 294     /* Special case: len may be zero */
 295     if (len == 0)
 296       return 0;
 297     /* Check for correct buff, off, len triple */
 298     if (0 > off || off > off + len || off + len > buf.length)
 299       throw new ArrayIndexOutOfBoundsException();
 300     int count = 0;
 301     int more;
 302     do
 303       {
 304         if (mode != DECODE_CHKSUM)
 305           {
 306             /* Don't give away any output, if we are waiting for the
 307              * checksum in the input stream.
 308              *
 309              * With this trick we have always:
 310              *   needsInput() and not finished()
 311              *   implies more output can be produced.
 312              */
 313             more = outputWindow.copyOutput(buf, off, len);
 314             adler.update(buf, off, more);
 315             off += more;
 316             count += more;
 317             totalOut += more;
 318             len -= more;
 319             if (len == 0)
 320               return count;
 321           }
 322       }
 323     while (decode() || (outputWindow.getAvailable() > 0
 324                         && mode != DECODE_CHKSUM));
 325     return count;
 326   }
 327
 328   /**
 329    * Returns true, if a preset dictionary is needed to inflate the input.
 330    */
 331   public boolean needsDictionary ()
 332   {
 333     return mode == DECODE_DICT && neededBits == 0;
 334   }
 335
 336   /**
 337    * Returns true, if the input buffer is empty.
 338    * You should then call setInput(). <br>
 339    *
 340    * <em>NOTE</em>: This method also returns true when the stream is finished.
 341    */
 342   public boolean needsInput ()
 343   {
 344     return input.needsInput ();
 345   }
 346
 347   /**
 348    * Resets the inflater so that a new stream can be decompressed.  All
 349    * pending input and output will be discarded.
 350    */
 351   public void reset ()
 352   {
 353     mode = nowrap ? DECODE_BLOCKS : DECODE_HEADER;
 354     totalIn = totalOut = 0;
 355     input.reset();
 356     outputWindow.reset();
 357     dynHeader = null;
 358     litlenTree = null;
 359     distTree = null;
 360     isLastBlock = false;
 361     adler.reset();
 362   }
 363
 364   /**
 365    * Sets the preset dictionary.  This should only be called, if
 366    * needsDictionary() returns true and it should set the same
 367    * dictionary, that was used for deflating.  The getAdler()
 368    * function returns the checksum of the dictionary needed.
 369    * @param buffer the dictionary.
 370    * @exception IllegalStateException if no dictionary is needed.
 371    * @exception IllegalArgumentException if the dictionary checksum is
 372    * wrong.
 373    */
 374   public void setDictionary (byte[] buffer)
 375   {
 376     setDictionary(buffer, 0, buffer.length);
 377   }
 378
 379   /**
 380    * Sets the preset dictionary.  This should only be called, if
 381    * needsDictionary() returns true and it should set the same
 382    * dictionary, that was used for deflating.  The getAdler()
 383    * function returns the checksum of the dictionary needed.
 384    * @param buffer the dictionary.
 385    * @param off the offset into buffer where the dictionary starts.
 386    * @param len the length of the dictionary.
 387    * @exception IllegalStateException if no dictionary is needed.
 388    * @exception IllegalArgumentException if the dictionary checksum is
 389    * wrong.
 390    * @exception IndexOutOfBoundsException if the off and/or len are wrong.
 391    */
 392   public void setDictionary (byte[] buffer, int off, int len)
 393   {
 394     if (!needsDictionary())
 395       throw new IllegalStateException();
 396
 397     adler.update(buffer, off, len);
 398     if ((int) adler.getValue() != readAdler)
 399       throw new IllegalArgumentException("Wrong adler checksum");
 400     adler.reset();
 401     outputWindow.copyDict(buffer, off, len);
 402     mode = DECODE_BLOCKS;
 403   }
 404
 405   /**
 406    * Sets the input.  This should only be called, if needsInput()
 407    * returns true.
 408    * @param buf the input.
 409    * @exception IllegalStateException if no input is needed.
 410    */
 411   public void setInput (byte[] buf)
 412   {
 413     setInput (buf, 0, buf.length);
 414   }
 415
 416   /**
 417    * Sets the input.  This should only be called, if needsInput()
 418    * returns true.
 419    * @param buf the input.
 420    * @param off the offset into buffer where the input starts.
 421    * @param len the length of the input.
 422    * @exception IllegalStateException if no input is needed.
 423    * @exception IndexOutOfBoundsException if the off and/or len are wrong.
 424    */
 425   public void setInput (byte[] buf, int off, int len)
 426   {
 427     input.setInput (buf, off, len);
 428     totalIn += len;
 429   }
 430
 431   /**
 432    * Decodes the deflate header.
 433    * @return false if more input is needed.
 434    * @exception DataFormatException if header is invalid.
 435    */
 436   private boolean decodeHeader () throws DataFormatException
 437   {
 438     int header = input.peekBits(16);
 439     if (header < 0)
 440       return false;
 441     input.dropBits(16);
 442
 443     /* The header is written in "wrong" byte order */
 444     header = ((header << 8) | (header >> 8)) & 0xffff;
 445     if (header % 31 != 0)
 446       throw new DataFormatException("Header checksum illegal");
 447
 448     if ((header & 0x0f00) != (Deflater.DEFLATED << 8))
 449       throw new DataFormatException("Compression Method unknown");
 450
 451     /* Maximum size of the backwards window in bits.
 452      * We currently ignore this, but we could use it to make the
 453      * inflater window more space efficient. On the other hand the
 454      * full window (15 bits) is needed most times, anyway.
 455      int max_wbits = ((header & 0x7000) >> 12) + 8;
 456      */
 457
 458     if ((header & 0x0020) == 0) // Dictionary flag?
 459       {
 460         mode = DECODE_BLOCKS;
 461       }
 462     else
 463       {
 464         mode = DECODE_DICT;
 465         neededBits = 32;
 466       }
 467     return true;
 468   }
 469
 470   /**
 471    * Decodes the dictionary checksum after the deflate header.
 472    * @return false if more input is needed.
 473    */
 474   private boolean decodeDict ()
 475   {
 476     while (neededBits > 0)
 477       {
 478         int dictByte = input.peekBits(8);
 479         if (dictByte < 0)
 480           return false;
 481         input.dropBits(8);
 482         readAdler = (readAdler << 8) | dictByte;
 483         neededBits -= 8;
 484       }
 485     return false;
 486   }
 487
 488   /**
 489    * Decodes the huffman encoded symbols in the input stream.
 490    * @return false if more input is needed, true if output window is
 491    * full or the current block ends.
 492    * @exception DataFormatException if deflated stream is invalid.
 493    */
 494   private boolean decodeHuffman () throws DataFormatException
 495   {
 496     int free = outputWindow.getFreeSpace();
 497     while (free >= 258)
 498       {
 499         int symbol;
 500         switch (mode)
 501           {
 502           case DECODE_HUFFMAN:
 503             /* This is the inner loop so it is optimized a bit */
 504             while (((symbol = litlenTree.getSymbol(input)) & ~0xff) == 0)
 505               {
 506                 outputWindow.write(symbol);
 507                 if (--free < 258)
 508                   return true;
 509               }
 510             if (symbol < 257)
 511               {
 512                 if (symbol < 0)
 513                   return false;
 514                 else
 515                   {
 516                     /* symbol == 256: end of block */
 517                     distTree = null;
 518                     litlenTree = null;
 519                     mode = DECODE_BLOCKS;
 520                     return true;
 521                   }
 522               }
 523
 524             try
 525               {
 526                 repLength = CPLENS[symbol - 257];
 527                 neededBits = CPLEXT[symbol - 257];
 528               }
 529             catch (ArrayIndexOutOfBoundsException ex)
 530               {
 531                 throw new DataFormatException("Illegal rep length code");
 532               }
 533             /* fall through */
 534           case DECODE_HUFFMAN_LENBITS:
 535             if (neededBits > 0)
 536               {
 537                 mode = DECODE_HUFFMAN_LENBITS;
 538                 int i = input.peekBits(neededBits);
 539                 if (i < 0)
 540                   return false;
 541                 input.dropBits(neededBits);
 542                 repLength += i;
 543               }
 544             mode = DECODE_HUFFMAN_DIST;
 545             /* fall through */
 546           case DECODE_HUFFMAN_DIST:
 547             symbol = distTree.getSymbol(input);
 548             if (symbol < 0)
 549               return false;
 550             try
 551               {
 552                 repDist = CPDIST[symbol];
 553                 neededBits = CPDEXT[symbol];
 554               }
 555             catch (ArrayIndexOutOfBoundsException ex)
 556               {
 557                 throw new DataFormatException("Illegal rep dist code");
 558               }
 559             /* fall through */
 560           case DECODE_HUFFMAN_DISTBITS:
 561             if (neededBits > 0)
 562               {
 563                 mode = DECODE_HUFFMAN_DISTBITS;
 564                 int i = input.peekBits(neededBits);
 565                 if (i < 0)
 566                   return false;
 567                 input.dropBits(neededBits);
 568                 repDist += i;
 569               }
 570             outputWindow.repeat(repLength, repDist);
 571             free -= repLength;
 572             mode = DECODE_HUFFMAN;
 573             break;
 574           default:
 575             throw new IllegalStateException();
 576           }
 577       }
 578     return true;
 579   }
 580
 581   /**
 582    * Decodes the adler checksum after the deflate stream.
 583    * @return false if more input is needed.
 584    * @exception DataFormatException if checksum doesn't match.
 585    */
 586   private boolean decodeChksum () throws DataFormatException
 587   {
 588     while (neededBits > 0)
 589       {
 590         int chkByte = input.peekBits(8);
 591         if (chkByte < 0)
 592           return false;
 593         input.dropBits(8);
 594         readAdler = (readAdler << 8) | chkByte;
 595         neededBits -= 8;
 596       }
 597     if ((int) adler.getValue() != readAdler)
 598       throw new DataFormatException("Adler chksum doesn't match: "
 599                                     +Integer.toHexString((int)adler.getValue())
 600                                     +" vs. "+Integer.toHexString(readAdler));
 601     mode = FINISHED;
 602     return false;
 603   }
 604
 605   /**
 606    * Decodes the deflated stream.
 607    * @return false if more input is needed, or if finished.
 608    * @exception DataFormatException if deflated stream is invalid.
 609    */
 610   private boolean decode () throws DataFormatException
 611   {
 612     switch (mode)
 613       {
 614       case DECODE_HEADER:
 615         return decodeHeader();
 616       case DECODE_DICT:
 617         return decodeDict();
 618       case DECODE_CHKSUM:
 619         return decodeChksum();
 620
 621       case DECODE_BLOCKS:
 622         if (isLastBlock)
 623           {
 624             if (nowrap)
 625               {
 626                 mode = FINISHED;
 627                 return false;
 628               }
 629             else
 630               {
 631                 input.skipToByteBoundary();
 632                 neededBits = 32;
 633                 mode = DECODE_CHKSUM;
 634                 return true;
 635               }
 636           }
 637
 638         int type = input.peekBits(3);
 639         if (type < 0)
 640           return false;
 641         input.dropBits(3);
 642
 643         if ((type & 1) != 0)
 644           isLastBlock = true;
 645         switch (type >> 1)
 646           {
 647           case DeflaterConstants.STORED_BLOCK:
 648             input.skipToByteBoundary();
 649             mode = DECODE_STORED_LEN1;
 650             break;
 651           case DeflaterConstants.STATIC_TREES:
 652             litlenTree = InflaterHuffmanTree.defLitLenTree;
 653             distTree = InflaterHuffmanTree.defDistTree;
 654             mode = DECODE_HUFFMAN;
 655             break;
 656           case DeflaterConstants.DYN_TREES:
 657             dynHeader = new InflaterDynHeader();
 658             mode = DECODE_DYN_HEADER;
 659             break;
 660           default:
 661             throw new DataFormatException("Unknown block type "+type);
 662           }
 663         return true;
 664
 665       case DECODE_STORED_LEN1:
 666         {
 667           if ((uncomprLen = input.peekBits(16)) < 0)
 668             return false;
 669           input.dropBits(16);
 670           mode = DECODE_STORED_LEN2;
 671         }
 672         /* fall through */
 673       case DECODE_STORED_LEN2:
 674         {
 675           int nlen = input.peekBits(16);
 676           if (nlen < 0)
 677             return false;
 678           input.dropBits(16);
 679           if (nlen != (uncomprLen ^ 0xffff))
 680             throw new DataFormatException("broken uncompressed block");
 681           mode = DECODE_STORED;
 682         }
 683         /* fall through */
 684       case DECODE_STORED:
 685         {
 686           int more = outputWindow.copyStored(input, uncomprLen);
 687           uncomprLen -= more;
 688           if (uncomprLen == 0)
 689             {
 690               mode = DECODE_BLOCKS;
 691               return true;
 692             }
 693           return !input.needsInput();
 694         }
 695
 696       case DECODE_DYN_HEADER:
 697         if (!dynHeader.decode(input))
 698           return false;
 699         litlenTree = dynHeader.buildLitLenTree();
 700         distTree = dynHeader.buildDistTree();
 701         mode = DECODE_HUFFMAN;
 702         /* fall through */
 703       case DECODE_HUFFMAN:
 704       case DECODE_HUFFMAN_LENBITS:
 705       case DECODE_HUFFMAN_DIST:
 706       case DECODE_HUFFMAN_DISTBITS:
 707         return decodeHuffman();
 708       case FINISHED:
 709         return false;
 710       default:
 711         throw new IllegalStateException();
 712       }
 713   }
 714 }