1 /* 2 * IndexDecoder 3 * 4 * Author: Lasse Collin <lasse.collin@tukaani.org> 5 * 6 * This file has been put into the public domain. 7 * You can do whatever you want with this file. 8 */ 9 10 package org.tukaani.xz.index; 11 12 import java.io.IOException; 13 import java.io.EOFException; 14 import java.util.zip.CheckedInputStream; 15 import org.tukaani.xz.common.DecoderUtil; 16 import org.tukaani.xz.common.StreamFlags; 17 import org.tukaani.xz.SeekableInputStream; 18 import org.tukaani.xz.CorruptedInputException; 19 import org.tukaani.xz.MemoryLimitException; 20 import org.tukaani.xz.UnsupportedOptionsException; 21 22 public class IndexDecoder extends IndexBase { 23 private final StreamFlags streamFlags; 24 private final long streamPadding; 25 private final int memoryUsage; 26 27 // Unpadded Size and Uncompressed Size fields 28 private final long[] unpadded; 29 private final long[] uncompressed; 30 31 // Uncompressed size of the largest Block. It is used by 32 // SeekableXZInputStream to find out the largest Block of the .xz file. 33 private long largestBlockSize = 0; 34 35 // Offsets relative to the beginning of the .xz file. These are all zero 36 // for the first Stream in the file. 37 private int recordOffset = 0; 38 private long compressedOffset = 0; 39 private long uncompressedOffset = 0; 40 IndexDecoder(SeekableInputStream in, StreamFlags streamFooterFlags, long streamPadding, int memoryLimit)41 public IndexDecoder(SeekableInputStream in, StreamFlags streamFooterFlags, 42 long streamPadding, int memoryLimit) 43 throws IOException { 44 super(new CorruptedInputException("XZ Index is corrupt")); 45 this.streamFlags = streamFooterFlags; 46 this.streamPadding = streamPadding; 47 48 // If endPos is exceeded before the CRC32 field has been decoded, 49 // the Index is corrupt. 50 long endPos = in.position() + streamFooterFlags.backwardSize - 4; 51 52 java.util.zip.CRC32 crc32 = new java.util.zip.CRC32(); 53 CheckedInputStream inChecked = new CheckedInputStream(in, crc32); 54 55 // Index Indicator 56 if (inChecked.read() != 0x00) 57 throw new CorruptedInputException("XZ Index is corrupt"); 58 59 try { 60 // Number of Records 61 long count = DecoderUtil.decodeVLI(inChecked); 62 63 // Catch Record counts that obviously too high to be valid. 64 // This test isn't exact because it ignores Index Indicator, 65 // Number of Records, and CRC32 fields, but this is good enough 66 // to catch the most obvious problems. 67 if (count >= streamFooterFlags.backwardSize / 2) 68 throw new CorruptedInputException("XZ Index is corrupt"); 69 70 // If the Record count doesn't fit into an int, we cannot 71 // allocate the arrays to hold the Records. 72 if (count > Integer.MAX_VALUE) 73 throw new UnsupportedOptionsException("XZ Index has over " 74 + Integer.MAX_VALUE + " Records"); 75 76 // Calculate approximate memory requirements and check the 77 // memory usage limit. 78 memoryUsage = 1 + (int)((16L * count + 1023) / 1024); 79 if (memoryLimit >= 0 && memoryUsage > memoryLimit) 80 throw new MemoryLimitException(memoryUsage, memoryLimit); 81 82 // Allocate the arrays for the Records. 83 unpadded = new long[(int)count]; 84 uncompressed = new long[(int)count]; 85 int record = 0; 86 87 // Decode the Records. 88 for (int i = (int)count; i > 0; --i) { 89 // Get the next Record. 90 long unpaddedSize = DecoderUtil.decodeVLI(inChecked); 91 long uncompressedSize = DecoderUtil.decodeVLI(inChecked); 92 93 // Check that the input position stays sane. Since this is 94 // checked only once per loop iteration instead of for 95 // every input byte read, it's still possible that 96 // EOFException gets thrown with corrupt input. 97 if (in.position() > endPos) 98 throw new CorruptedInputException("XZ Index is corrupt"); 99 100 // Add the new Record. 101 unpadded[record] = blocksSum + unpaddedSize; 102 uncompressed[record] = uncompressedSum + uncompressedSize; 103 ++record; 104 super.add(unpaddedSize, uncompressedSize); 105 assert record == recordCount; 106 107 // Remember the uncompressed size of the largest Block. 108 if (largestBlockSize < uncompressedSize) 109 largestBlockSize = uncompressedSize; 110 } 111 } catch (EOFException e) { 112 // EOFException is caught just in case a corrupt input causes 113 // DecoderUtil.decodeVLI to read too much at once. 114 throw new CorruptedInputException("XZ Index is corrupt"); 115 } 116 117 // Validate that the size of the Index field matches 118 // Backward Size. 119 int indexPaddingSize = getIndexPaddingSize(); 120 if (in.position() + indexPaddingSize != endPos) 121 throw new CorruptedInputException("XZ Index is corrupt"); 122 123 // Index Padding 124 while (indexPaddingSize-- > 0) 125 if (inChecked.read() != 0x00) 126 throw new CorruptedInputException("XZ Index is corrupt"); 127 128 // CRC32 129 long value = crc32.getValue(); 130 for (int i = 0; i < 4; ++i) 131 if (((value >>> (i * 8)) & 0xFF) != in.read()) 132 throw new CorruptedInputException("XZ Index is corrupt"); 133 } 134 setOffsets(IndexDecoder prev)135 public void setOffsets(IndexDecoder prev) { 136 // NOTE: SeekableXZInputStream checks that the total number of Blocks 137 // in concatenated Streams fits into an int. 138 recordOffset = prev.recordOffset + (int)prev.recordCount; 139 compressedOffset = prev.compressedOffset 140 + prev.getStreamSize() + prev.streamPadding; 141 assert (compressedOffset & 3) == 0; 142 uncompressedOffset = prev.uncompressedOffset + prev.uncompressedSum; 143 } 144 getMemoryUsage()145 public int getMemoryUsage() { 146 return memoryUsage; 147 } 148 getStreamFlags()149 public StreamFlags getStreamFlags() { 150 return streamFlags; 151 } 152 getRecordCount()153 public int getRecordCount() { 154 // It was already checked in the constructor that it fits into an int. 155 // Otherwise we couldn't have allocated the arrays. 156 return (int)recordCount; 157 } 158 getUncompressedSize()159 public long getUncompressedSize() { 160 return uncompressedSum; 161 } 162 getLargestBlockSize()163 public long getLargestBlockSize() { 164 return largestBlockSize; 165 } 166 hasUncompressedOffset(long pos)167 public boolean hasUncompressedOffset(long pos) { 168 return pos >= uncompressedOffset 169 && pos < uncompressedOffset + uncompressedSum; 170 } 171 hasRecord(int blockNumber)172 public boolean hasRecord(int blockNumber) { 173 return blockNumber >= recordOffset 174 && blockNumber < recordOffset + recordCount; 175 } 176 locateBlock(BlockInfo info, long target)177 public void locateBlock(BlockInfo info, long target) { 178 assert target >= uncompressedOffset; 179 target -= uncompressedOffset; 180 assert target < uncompressedSum; 181 182 int left = 0; 183 int right = unpadded.length - 1; 184 185 while (left < right) { 186 int i = left + (right - left) / 2; 187 188 if (uncompressed[i] <= target) 189 left = i + 1; 190 else 191 right = i; 192 } 193 194 setBlockInfo(info, recordOffset + left); 195 } 196 197 public void setBlockInfo(BlockInfo info, int blockNumber) { 198 // The caller has checked that the given Block number is inside 199 // this Index. 200 assert blockNumber >= recordOffset; 201 assert blockNumber - recordOffset < recordCount; 202 203 info.index = this; 204 info.blockNumber = blockNumber; 205 206 int pos = blockNumber - recordOffset; 207 208 if (pos == 0) { 209 info.compressedOffset = 0; 210 info.uncompressedOffset = 0; 211 } else { 212 info.compressedOffset = (unpadded[pos - 1] + 3) & ~3; 213 info.uncompressedOffset = uncompressed[pos - 1]; 214 } 215 216 info.unpaddedSize = unpadded[pos] - info.compressedOffset; 217 info.uncompressedSize = uncompressed[pos] - info.uncompressedOffset; 218 219 info.compressedOffset += compressedOffset 220 + DecoderUtil.STREAM_HEADER_SIZE; 221 info.uncompressedOffset += uncompressedOffset; 222 } 223 } 224