diff -urNp bzip2-1.0.2/bzip2.c bzip2-1.0.3/bzip2.c --- bzip2-1.0.2/bzip2.c 2002-01-24 20:38:36.000000000 -0500 +++ bzip2-1.0.3/bzip2.c 2005-02-15 11:25:35.000000000 -0500 @@ -525,6 +525,7 @@ Bool uncompressStream ( FILE *zStream, F UChar obuf[5000]; UChar unused[BZ_MAX_UNUSED]; Int32 nUnused; + void* unusedTmpV; UChar* unusedTmp; nUnused = 0; @@ -554,9 +555,10 @@ Bool uncompressStream ( FILE *zStream, F } if (bzerr != BZ_STREAM_END) goto errhandler; - BZ2_bzReadGetUnused ( &bzerr, bzf, (void**)(&unusedTmp), &nUnused ); + BZ2_bzReadGetUnused ( &bzerr, bzf, &unusedTmpV, &nUnused ); if (bzerr != BZ_OK) panic ( "decompress:bzReadGetUnused" ); + unusedTmp = (UChar*)unusedTmpV; for (i = 0; i < nUnused; i++) unused[i] = unusedTmp[i]; BZ2_bzReadClose ( &bzerr, bzf ); @@ -639,6 +641,7 @@ Bool testStream ( FILE *zStream ) UChar obuf[5000]; UChar unused[BZ_MAX_UNUSED]; Int32 nUnused; + void* unusedTmpV; UChar* unusedTmp; nUnused = 0; @@ -662,9 +665,10 @@ Bool testStream ( FILE *zStream ) } if (bzerr != BZ_STREAM_END) goto errhandler; - BZ2_bzReadGetUnused ( &bzerr, bzf, (void**)(&unusedTmp), &nUnused ); + BZ2_bzReadGetUnused ( &bzerr, bzf, &unusedTmpV, &nUnused ); if (bzerr != BZ_OK) panic ( "test:bzReadGetUnused" ); + unusedTmp = (UChar*)unusedTmpV; for (i = 0; i < nUnused; i++) unused[i] = unusedTmp[i]; BZ2_bzReadClose ( &bzerr, bzf ); diff -urNp bzip2-1.0.2/bzlib.c bzip2-1.0.3/bzlib.c --- bzip2-1.0.2/bzlib.c 2002-01-04 19:48:41.000000000 -0500 +++ bzip2-1.0.3/bzlib.c 2005-02-15 11:24:58.000000000 -0500 @@ -574,8 +574,11 @@ int BZ_API(BZ2_bzDecompressInit) /*---------------------------------------------------*/ +/* Return True iff data corruption is discovered. + Returns False if there is no problem. +*/ static -void unRLE_obuf_to_output_FAST ( DState* s ) +Bool unRLE_obuf_to_output_FAST ( DState* s ) { UChar k1; @@ -584,7 +587,7 @@ void unRLE_obuf_to_output_FAST ( DState* while (True) { /* try to finish existing run */ while (True) { - if (s->strm->avail_out == 0) return; + if (s->strm->avail_out == 0) return False; if (s->state_out_len == 0) break; *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); @@ -594,10 +597,13 @@ void unRLE_obuf_to_output_FAST ( DState* s->strm->total_out_lo32++; if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; } - + /* can a new run be started? */ - if (s->nblock_used == s->save_nblock+1) return; + if (s->nblock_used == s->save_nblock+1) return False; + /* Only caused by corrupt data stream? */ + if (s->nblock_used > s->save_nblock+1) + return True; s->state_out_len = 1; s->state_out_ch = s->k0; @@ -667,6 +673,10 @@ void unRLE_obuf_to_output_FAST ( DState* cs_avail_out--; } } + /* Only caused by corrupt data stream? */ + if (c_nblock_used > s_save_nblockPP) + return True; + /* can a new run be started? */ if (c_nblock_used == s_save_nblockPP) { c_state_out_len = 0; goto return_notr; @@ -712,6 +722,7 @@ void unRLE_obuf_to_output_FAST ( DState* s->strm->avail_out = cs_avail_out; /* end save */ } + return False; } @@ -732,8 +743,11 @@ __inline__ Int32 BZ2_indexIntoF ( Int32 /*---------------------------------------------------*/ +/* Return True iff data corruption is discovered. + Returns False if there is no problem. +*/ static -void unRLE_obuf_to_output_SMALL ( DState* s ) +Bool unRLE_obuf_to_output_SMALL ( DState* s ) { UChar k1; @@ -742,7 +756,7 @@ void unRLE_obuf_to_output_SMALL ( DState while (True) { /* try to finish existing run */ while (True) { - if (s->strm->avail_out == 0) return; + if (s->strm->avail_out == 0) return False; if (s->state_out_len == 0) break; *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); @@ -754,8 +768,11 @@ void unRLE_obuf_to_output_SMALL ( DState } /* can a new run be started? */ - if (s->nblock_used == s->save_nblock+1) return; - + if (s->nblock_used == s->save_nblock+1) return False; + + /* Only caused by corrupt data stream? */ + if (s->nblock_used > s->save_nblock+1) + return True; s->state_out_len = 1; s->state_out_ch = s->k0; @@ -788,7 +805,7 @@ void unRLE_obuf_to_output_SMALL ( DState while (True) { /* try to finish existing run */ while (True) { - if (s->strm->avail_out == 0) return; + if (s->strm->avail_out == 0) return False; if (s->state_out_len == 0) break; *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); @@ -800,7 +817,11 @@ void unRLE_obuf_to_output_SMALL ( DState } /* can a new run be started? */ - if (s->nblock_used == s->save_nblock+1) return; + if (s->nblock_used == s->save_nblock+1) return False; + + /* Only caused by corrupt data stream? */ + if (s->nblock_used > s->save_nblock+1) + return True; s->state_out_len = 1; s->state_out_ch = s->k0; @@ -830,6 +851,7 @@ void unRLE_obuf_to_output_SMALL ( DState /*---------------------------------------------------*/ int BZ_API(BZ2_bzDecompress) ( bz_stream *strm ) { + Bool corrupt; DState* s; if (strm == NULL) return BZ_PARAM_ERROR; s = strm->state; @@ -840,12 +862,13 @@ int BZ_API(BZ2_bzDecompress) ( bz_stream if (s->state == BZ_X_IDLE) return BZ_SEQUENCE_ERROR; if (s->state == BZ_X_OUTPUT) { if (s->smallDecompress) - unRLE_obuf_to_output_SMALL ( s ); else - unRLE_obuf_to_output_FAST ( s ); + corrupt = unRLE_obuf_to_output_SMALL ( s ); else + corrupt = unRLE_obuf_to_output_FAST ( s ); + if (corrupt) return BZ_DATA_ERROR; if (s->nblock_used == s->save_nblock+1 && s->state_out_len == 0) { BZ_FINALISE_CRC ( s->calculatedBlockCRC ); if (s->verbosity >= 3) - VPrintf2 ( " {0x%x, 0x%x}", s->storedBlockCRC, + VPrintf2 ( " {0x%08x, 0x%08x}", s->storedBlockCRC, s->calculatedBlockCRC ); if (s->verbosity >= 2) VPrintf0 ( "]" ); if (s->calculatedBlockCRC != s->storedBlockCRC) @@ -863,7 +886,7 @@ int BZ_API(BZ2_bzDecompress) ( bz_stream Int32 r = BZ2_decompress ( s ); if (r == BZ_STREAM_END) { if (s->verbosity >= 3) - VPrintf2 ( "\n combined CRCs: stored = 0x%x, computed = 0x%x", + VPrintf2 ( "\n combined CRCs: stored = 0x%08x, computed = 0x%08x", s->storedCombinedCRC, s->calculatedCombinedCRC ); if (s->calculatedCombinedCRC != s->storedCombinedCRC) return BZ_DATA_ERROR; diff -urNp bzip2-1.0.2/compress.c bzip2-1.0.3/compress.c --- bzip2-1.0.2/compress.c 2001-12-30 15:43:49.000000000 -0500 +++ bzip2-1.0.3/compress.c 2005-02-15 11:23:23.000000000 -0500 @@ -488,9 +488,11 @@ void sendMTFValues ( EState* s ) /*-- Recompute the tables based on the accumulated frequencies. --*/ + /* maxLen was changed from 20 to 17 in bzip2-1.0.3. See + comment in huffman.c for details. */ for (t = 0; t < nGroups; t++) BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]), - alphaSize, 20 ); + alphaSize, 17 /*20*/ ); } @@ -527,7 +529,7 @@ void sendMTFValues ( EState* s ) if (s->len[t][i] > maxLen) maxLen = s->len[t][i]; if (s->len[t][i] < minLen) minLen = s->len[t][i]; } - AssertH ( !(maxLen > 20), 3004 ); + AssertH ( !(maxLen > 17 /*20*/ ), 3004 ); AssertH ( !(minLen < 1), 3005 ); BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]), minLen, maxLen, alphaSize ); @@ -651,8 +653,8 @@ void BZ2_compressBlock ( EState* s, Bool if (s->blockNo > 1) s->numZ = 0; if (s->verbosity >= 2) - VPrintf4( " block %d: crc = 0x%8x, " - "combined CRC = 0x%8x, size = %d\n", + VPrintf4( " block %d: crc = 0x%08x, " + "combined CRC = 0x%08x, size = %d\n", s->blockNo, s->blockCRC, s->combinedCRC, s->nblock ); BZ2_blockSort ( s ); @@ -703,7 +705,7 @@ void BZ2_compressBlock ( EState* s, Bool bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 ); bsPutUInt32 ( s, s->combinedCRC ); if (s->verbosity >= 2) - VPrintf1( " final combined CRC = 0x%x\n ", s->combinedCRC ); + VPrintf1( " final combined CRC = 0x%08x\n ", s->combinedCRC ); bsFinishWrite ( s ); } } diff -urNp bzip2-1.0.2/decompress.c bzip2-1.0.3/decompress.c --- bzip2-1.0.2/decompress.c 2001-12-30 15:45:53.000000000 -0500 +++ bzip2-1.0.3/decompress.c 2005-02-15 11:23:27.000000000 -0500 @@ -524,17 +524,23 @@ Int32 BZ2_decompress ( DState* s ) if (s->origPtr < 0 || s->origPtr >= nblock) RETURN(BZ_DATA_ERROR); + /*-- Set up cftab to facilitate generation of T^(-1) --*/ + s->cftab[0] = 0; + for (i = 1; i <= 256; i++) s->cftab[i] = s->unzftab[i-1]; + for (i = 1; i <= 256; i++) s->cftab[i] += s->cftab[i-1]; + for (i = 0; i <= 256; i++) { + if (s->cftab[i] < 0 || s->cftab[i] > nblock) { + /* s->cftab[i] can legitimately be == nblock */ + RETURN(BZ_DATA_ERROR); + } + } + s->state_out_len = 0; s->state_out_ch = 0; BZ_INITIALISE_CRC ( s->calculatedBlockCRC ); s->state = BZ_X_OUTPUT; if (s->verbosity >= 2) VPrintf0 ( "rt+rld" ); - /*-- Set up cftab to facilitate generation of T^(-1) --*/ - s->cftab[0] = 0; - for (i = 1; i <= 256; i++) s->cftab[i] = s->unzftab[i-1]; - for (i = 1; i <= 256; i++) s->cftab[i] += s->cftab[i-1]; - if (s->smallDecompress) { /*-- Make a copy of cftab, used in generation of T --*/ diff -urNp bzip2-1.0.2/huffman.c bzip2-1.0.3/huffman.c --- bzip2-1.0.2/huffman.c 2001-12-29 21:19:17.000000000 -0500 +++ bzip2-1.0.3/huffman.c 2005-02-15 11:23:34.000000000 -0500 @@ -162,7 +162,24 @@ void BZ2_hbMakeCodeLengths ( UChar *len, if (! tooLong) break; - for (i = 1; i < alphaSize; i++) { + /* 17 Oct 04: keep-going condition for the following loop used + to be 'i < alphaSize', which missed the last element, + theoretically leading to the possibility of the compressor + looping. However, this count-scaling step is only needed if + one of the generated Huffman code words is longer than + maxLen, which up to and including version 1.0.2 was 20 bits, + which is extremely unlikely. In version 1.0.3 maxLen was + changed to 17 bits, which has minimal effect on compression + ratio, but does mean this scaling step is used from time to + time, enough to verify that it works. + + This means that bzip2-1.0.3 and later will only produce + Huffman codes with a maximum length of 17 bits. However, in + order to preserve backwards compatibility with bitstreams + produced by versions pre-1.0.3, the decompressor must still + handle lengths of up to 20. */ + + for (i = 1; i <= alphaSize; i++) { j = weight[i] >> 8; j = 1 + (j / 2); weight[i] = j << 8;