• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Licensed to the Apache Software Foundation (ASF) under one or more
3  *  contributor license agreements.  See the NOTICE file distributed with
4  *  this work for additional information regarding copyright ownership.
5  *  The ASF licenses this file to You under the Apache License, Version 2.0
6  *  (the "License"); you may not use this file except in compliance with
7  *  the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *  Unless required by applicable law or agreed to in writing, software
12  *  distributed under the License is distributed on an "AS IS" BASIS,
13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *  See the License for the specific language governing permissions and
15  *  limitations under the License.
16  *
17  */
18 package org.apache.commons.compress.archivers.sevenz;
19 
20 import java.io.BufferedInputStream;
21 import java.io.ByteArrayInputStream;
22 import java.io.Closeable;
23 import java.io.DataInputStream;
24 import java.io.File;
25 import java.io.FilterInputStream;
26 import java.io.IOException;
27 import java.io.InputStream;
28 import java.nio.ByteBuffer;
29 import java.nio.ByteOrder;
30 import java.nio.CharBuffer;
31 import java.nio.channels.SeekableByteChannel;
32 import java.nio.charset.StandardCharsets;
33 import java.nio.charset.CharsetEncoder;
34 import java.nio.file.Files;
35 import java.nio.file.StandardOpenOption;
36 import java.util.ArrayList;
37 import java.util.Arrays;
38 import java.util.BitSet;
39 import java.util.EnumSet;
40 import java.util.LinkedList;
41 import java.util.zip.CRC32;
42 
43 import org.apache.commons.compress.utils.BoundedInputStream;
44 import org.apache.commons.compress.utils.CRC32VerifyingInputStream;
45 import org.apache.commons.compress.utils.CharsetNames;
46 import org.apache.commons.compress.utils.IOUtils;
47 import org.apache.commons.compress.utils.InputStreamStatistics;
48 
49 /**
50  * Reads a 7z file, using SeekableByteChannel under
51  * the covers.
52  * <p>
53  * The 7z file format is a flexible container
54  * that can contain many compression and
55  * encryption types, but at the moment only
56  * only Copy, LZMA, LZMA2, BZIP2, Deflate and AES-256 + SHA-256
57  * are supported.
58  * <p>
59  * The format is very Windows/Intel specific,
60  * so it uses little-endian byte order,
61  * doesn't store user/group or permission bits,
62  * and represents times using NTFS timestamps
63  * (100 nanosecond units since 1 January 1601).
64  * Hence the official tools recommend against
65  * using it for backup purposes on *nix, and
66  * recommend .tar.7z or .tar.lzma or .tar.xz
67  * instead.
68  * <p>
69  * Both the header and file contents may be
70  * compressed and/or encrypted. With both
71  * encrypted, neither file names nor file
72  * contents can be read, but the use of
73  * encryption isn't plausibly deniable.
74  *
75  * @NotThreadSafe
76  * @since 1.6
77  */
78 public class SevenZFile implements Closeable {
79     static final int SIGNATURE_HEADER_SIZE = 32;
80 
81     private final String fileName;
82     private SeekableByteChannel channel;
83     private final Archive archive;
84     private int currentEntryIndex = -1;
85     private int currentFolderIndex = -1;
86     private InputStream currentFolderInputStream = null;
87     private byte[] password;
88 
89     private long compressedBytesReadFromCurrentEntry;
90     private long uncompressedBytesReadFromCurrentEntry;
91 
92     private final ArrayList<InputStream> deferredBlockStreams = new ArrayList<>();
93 
94     // shared with SevenZOutputFile and tests, neither mutates it
95     static final byte[] sevenZSignature = { //NOSONAR
96         (byte)'7', (byte)'z', (byte)0xBC, (byte)0xAF, (byte)0x27, (byte)0x1C
97     };
98 
99     /**
100      * Reads a file as 7z archive
101      *
102      * @param filename the file to read
103      * @param password optional password if the archive is encrypted
104      * @throws IOException if reading the archive fails
105      * @since 1.17
106      */
SevenZFile(final File filename, final char[] password)107     public SevenZFile(final File filename, final char[] password) throws IOException {
108         this(Files.newByteChannel(filename.toPath(), EnumSet.of(StandardOpenOption.READ)),
109              filename.getAbsolutePath(), utf16Decode(password), true);
110     }
111 
112     /**
113      * Reads a file as 7z archive
114      *
115      * @param filename the file to read
116      * @param password optional password if the archive is encrypted -
117      * the byte array is supposed to be the UTF16-LE encoded
118      * representation of the password.
119      * @throws IOException if reading the archive fails
120      * @deprecated use the char[]-arg version for the password instead
121      */
SevenZFile(final File filename, final byte[] password)122     public SevenZFile(final File filename, final byte[] password) throws IOException {
123         this(Files.newByteChannel(filename.toPath(), EnumSet.of(StandardOpenOption.READ)),
124              filename.getAbsolutePath(), password, true);
125     }
126 
127     /**
128      * Reads a SeekableByteChannel as 7z archive
129      *
130      * <p>{@link
131      * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
132      * allows you to read from an in-memory archive.</p>
133      *
134      * @param channel the channel to read
135      * @throws IOException if reading the archive fails
136      * @since 1.13
137      */
SevenZFile(final SeekableByteChannel channel)138     public SevenZFile(final SeekableByteChannel channel) throws IOException {
139         this(channel, "unknown archive", (char[]) null);
140     }
141 
142     /**
143      * Reads a SeekableByteChannel as 7z archive
144      *
145      * <p>{@link
146      * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
147      * allows you to read from an in-memory archive.</p>
148      *
149      * @param channel the channel to read
150      * @param password optional password if the archive is encrypted
151      * @throws IOException if reading the archive fails
152      * @since 1.17
153      */
SevenZFile(final SeekableByteChannel channel, final char[] password)154     public SevenZFile(final SeekableByteChannel channel,
155                       final char[] password) throws IOException {
156         this(channel, "unknown archive", utf16Decode(password));
157     }
158 
159     /**
160      * Reads a SeekableByteChannel as 7z archive
161      *
162      * <p>{@link
163      * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
164      * allows you to read from an in-memory archive.</p>
165      *
166      * @param channel the channel to read
167      * @param filename name of the archive - only used for error reporting
168      * @param password optional password if the archive is encrypted
169      * @throws IOException if reading the archive fails
170      * @since 1.17
171      */
SevenZFile(final SeekableByteChannel channel, String filename, final char[] password)172     public SevenZFile(final SeekableByteChannel channel, String filename,
173                       final char[] password) throws IOException {
174         this(channel, filename, utf16Decode(password), false);
175     }
176 
177     /**
178      * Reads a SeekableByteChannel as 7z archive
179      *
180      * <p>{@link
181      * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
182      * allows you to read from an in-memory archive.</p>
183      *
184      * @param channel the channel to read
185      * @param filename name of the archive - only used for error reporting
186      * @throws IOException if reading the archive fails
187      * @since 1.17
188      */
SevenZFile(final SeekableByteChannel channel, String filename)189     public SevenZFile(final SeekableByteChannel channel, String filename)
190         throws IOException {
191         this(channel, filename, null, false);
192     }
193 
194     /**
195      * Reads a SeekableByteChannel as 7z archive
196      *
197      * <p>{@link
198      * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
199      * allows you to read from an in-memory archive.</p>
200      *
201      * @param channel the channel to read
202      * @param password optional password if the archive is encrypted -
203      * the byte array is supposed to be the UTF16-LE encoded
204      * representation of the password.
205      * @throws IOException if reading the archive fails
206      * @since 1.13
207      * @deprecated use the char[]-arg version for the password instead
208      */
SevenZFile(final SeekableByteChannel channel, final byte[] password)209     public SevenZFile(final SeekableByteChannel channel,
210                       final byte[] password) throws IOException {
211         this(channel, "unknown archive", password);
212     }
213 
214     /**
215      * Reads a SeekableByteChannel as 7z archive
216      *
217      * <p>{@link
218      * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
219      * allows you to read from an in-memory archive.</p>
220      *
221      * @param channel the channel to read
222      * @param filename name of the archive - only used for error reporting
223      * @param password optional password if the archive is encrypted -
224      * the byte array is supposed to be the UTF16-LE encoded
225      * representation of the password.
226      * @throws IOException if reading the archive fails
227      * @since 1.13
228      * @deprecated use the char[]-arg version for the password instead
229      */
SevenZFile(final SeekableByteChannel channel, String filename, final byte[] password)230     public SevenZFile(final SeekableByteChannel channel, String filename,
231                       final byte[] password) throws IOException {
232         this(channel, filename, password, false);
233     }
234 
SevenZFile(final SeekableByteChannel channel, String filename, final byte[] password, boolean closeOnError)235     private SevenZFile(final SeekableByteChannel channel, String filename,
236                        final byte[] password, boolean closeOnError) throws IOException {
237         boolean succeeded = false;
238         this.channel = channel;
239         this.fileName = filename;
240         try {
241             archive = readHeaders(password);
242             if (password != null) {
243                 this.password = Arrays.copyOf(password, password.length);
244             } else {
245                 this.password = null;
246             }
247             succeeded = true;
248         } finally {
249             if (!succeeded && closeOnError) {
250                 this.channel.close();
251             }
252         }
253     }
254 
255     /**
256      * Reads a file as unencrypted 7z archive
257      *
258      * @param filename the file to read
259      * @throws IOException if reading the archive fails
260      */
SevenZFile(final File filename)261     public SevenZFile(final File filename) throws IOException {
262         this(filename, (char[]) null);
263     }
264 
265     /**
266      * Closes the archive.
267      * @throws IOException if closing the file fails
268      */
269     @Override
close()270     public void close() throws IOException {
271         if (channel != null) {
272             try {
273                 channel.close();
274             } finally {
275                 channel = null;
276                 if (password != null) {
277                     Arrays.fill(password, (byte) 0);
278                 }
279                 password = null;
280             }
281         }
282     }
283 
284     /**
285      * Returns the next Archive Entry in this archive.
286      *
287      * @return the next entry,
288      *         or {@code null} if there are no more entries
289      * @throws IOException if the next entry could not be read
290      */
getNextEntry()291     public SevenZArchiveEntry getNextEntry() throws IOException {
292         if (currentEntryIndex >= archive.files.length - 1) {
293             return null;
294         }
295         ++currentEntryIndex;
296         final SevenZArchiveEntry entry = archive.files[currentEntryIndex];
297         buildDecodingStream();
298         uncompressedBytesReadFromCurrentEntry = compressedBytesReadFromCurrentEntry = 0;
299         return entry;
300     }
301 
302     /**
303      * Returns meta-data of all archive entries.
304      *
305      * <p>This method only provides meta-data, the entries can not be
306      * used to read the contents, you still need to process all
307      * entries in order using {@link #getNextEntry} for that.</p>
308      *
309      * <p>The content methods are only available for entries that have
310      * already been reached via {@link #getNextEntry}.</p>
311      *
312      * @return meta-data of all archive entries.
313      * @since 1.11
314      */
getEntries()315     public Iterable<SevenZArchiveEntry> getEntries() {
316         return Arrays.asList(archive.files);
317     }
318 
readHeaders(final byte[] password)319     private Archive readHeaders(final byte[] password) throws IOException {
320         ByteBuffer buf = ByteBuffer.allocate(12 /* signature + 2 bytes version + 4 bytes CRC */)
321             .order(ByteOrder.LITTLE_ENDIAN);
322         readFully(buf);
323         final byte[] signature = new byte[6];
324         buf.get(signature);
325         if (!Arrays.equals(signature, sevenZSignature)) {
326             throw new IOException("Bad 7z signature");
327         }
328         // 7zFormat.txt has it wrong - it's first major then minor
329         final byte archiveVersionMajor = buf.get();
330         final byte archiveVersionMinor = buf.get();
331         if (archiveVersionMajor != 0) {
332             throw new IOException(String.format("Unsupported 7z version (%d,%d)",
333                     archiveVersionMajor, archiveVersionMinor));
334         }
335 
336         final long startHeaderCrc = 0xffffFFFFL & buf.getInt();
337         final StartHeader startHeader = readStartHeader(startHeaderCrc);
338 
339         final int nextHeaderSizeInt = (int) startHeader.nextHeaderSize;
340         if (nextHeaderSizeInt != startHeader.nextHeaderSize) {
341             throw new IOException("cannot handle nextHeaderSize " + startHeader.nextHeaderSize);
342         }
343         channel.position(SIGNATURE_HEADER_SIZE + startHeader.nextHeaderOffset);
344         buf = ByteBuffer.allocate(nextHeaderSizeInt).order(ByteOrder.LITTLE_ENDIAN);
345         readFully(buf);
346         final CRC32 crc = new CRC32();
347         crc.update(buf.array());
348         if (startHeader.nextHeaderCrc != crc.getValue()) {
349             throw new IOException("NextHeader CRC mismatch");
350         }
351 
352         Archive archive = new Archive();
353         int nid = getUnsignedByte(buf);
354         if (nid == NID.kEncodedHeader) {
355             buf = readEncodedHeader(buf, archive, password);
356             // Archive gets rebuilt with the new header
357             archive = new Archive();
358             nid = getUnsignedByte(buf);
359         }
360         if (nid == NID.kHeader) {
361             readHeader(buf, archive);
362         } else {
363             throw new IOException("Broken or unsupported archive: no Header");
364         }
365         return archive;
366     }
367 
readStartHeader(final long startHeaderCrc)368     private StartHeader readStartHeader(final long startHeaderCrc) throws IOException {
369         final StartHeader startHeader = new StartHeader();
370         // using Stream rather than ByteBuffer for the benefit of the
371         // built-in CRC check
372         try (DataInputStream dataInputStream = new DataInputStream(new CRC32VerifyingInputStream(
373                 new BoundedSeekableByteChannelInputStream(channel, 20), 20, startHeaderCrc))) {
374              startHeader.nextHeaderOffset = Long.reverseBytes(dataInputStream.readLong());
375              startHeader.nextHeaderSize = Long.reverseBytes(dataInputStream.readLong());
376              startHeader.nextHeaderCrc = 0xffffFFFFL & Integer.reverseBytes(dataInputStream.readInt());
377              return startHeader;
378         }
379     }
380 
readHeader(final ByteBuffer header, final Archive archive)381     private void readHeader(final ByteBuffer header, final Archive archive) throws IOException {
382         int nid = getUnsignedByte(header);
383 
384         if (nid == NID.kArchiveProperties) {
385             readArchiveProperties(header);
386             nid = getUnsignedByte(header);
387         }
388 
389         if (nid == NID.kAdditionalStreamsInfo) {
390             throw new IOException("Additional streams unsupported");
391             //nid = header.readUnsignedByte();
392         }
393 
394         if (nid == NID.kMainStreamsInfo) {
395             readStreamsInfo(header, archive);
396             nid = getUnsignedByte(header);
397         }
398 
399         if (nid == NID.kFilesInfo) {
400             readFilesInfo(header, archive);
401             nid = getUnsignedByte(header);
402         }
403 
404         if (nid != NID.kEnd) {
405             throw new IOException("Badly terminated header, found " + nid);
406         }
407     }
408 
readArchiveProperties(final ByteBuffer input)409     private void readArchiveProperties(final ByteBuffer input) throws IOException {
410         // FIXME: the reference implementation just throws them away?
411         int nid =  getUnsignedByte(input);
412         while (nid != NID.kEnd) {
413             final long propertySize = readUint64(input);
414             final byte[] property = new byte[(int)propertySize];
415             input.get(property);
416             nid = getUnsignedByte(input);
417         }
418     }
419 
readEncodedHeader(final ByteBuffer header, final Archive archive, final byte[] password)420     private ByteBuffer readEncodedHeader(final ByteBuffer header, final Archive archive,
421                                          final byte[] password) throws IOException {
422         readStreamsInfo(header, archive);
423 
424         // FIXME: merge with buildDecodingStream()/buildDecoderStack() at some stage?
425         final Folder folder = archive.folders[0];
426         final int firstPackStreamIndex = 0;
427         final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos +
428                 0;
429 
430         channel.position(folderOffset);
431         InputStream inputStreamStack = new BoundedSeekableByteChannelInputStream(channel,
432                 archive.packSizes[firstPackStreamIndex]);
433         for (final Coder coder : folder.getOrderedCoders()) {
434             if (coder.numInStreams != 1 || coder.numOutStreams != 1) {
435                 throw new IOException("Multi input/output stream coders are not yet supported");
436             }
437             inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, //NOSONAR
438                     folder.getUnpackSizeForCoder(coder), coder, password);
439         }
440         if (folder.hasCrc) {
441             inputStreamStack = new CRC32VerifyingInputStream(inputStreamStack,
442                     folder.getUnpackSize(), folder.crc);
443         }
444         final byte[] nextHeader = new byte[(int)folder.getUnpackSize()];
445         try (DataInputStream nextHeaderInputStream = new DataInputStream(inputStreamStack)) {
446             nextHeaderInputStream.readFully(nextHeader);
447         }
448         return ByteBuffer.wrap(nextHeader).order(ByteOrder.LITTLE_ENDIAN);
449     }
450 
readStreamsInfo(final ByteBuffer header, final Archive archive)451     private void readStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException {
452         int nid = getUnsignedByte(header);
453 
454         if (nid == NID.kPackInfo) {
455             readPackInfo(header, archive);
456             nid = getUnsignedByte(header);
457         }
458 
459         if (nid == NID.kUnpackInfo) {
460             readUnpackInfo(header, archive);
461             nid = getUnsignedByte(header);
462         } else {
463             // archive without unpack/coders info
464             archive.folders = new Folder[0];
465         }
466 
467         if (nid == NID.kSubStreamsInfo) {
468             readSubStreamsInfo(header, archive);
469             nid = getUnsignedByte(header);
470         }
471 
472         if (nid != NID.kEnd) {
473             throw new IOException("Badly terminated StreamsInfo");
474         }
475     }
476 
readPackInfo(final ByteBuffer header, final Archive archive)477     private void readPackInfo(final ByteBuffer header, final Archive archive) throws IOException {
478         archive.packPos = readUint64(header);
479         final long numPackStreams = readUint64(header);
480         int nid = getUnsignedByte(header);
481         if (nid == NID.kSize) {
482             archive.packSizes = new long[(int)numPackStreams];
483             for (int i = 0; i < archive.packSizes.length; i++) {
484                 archive.packSizes[i] = readUint64(header);
485             }
486             nid = getUnsignedByte(header);
487         }
488 
489         if (nid == NID.kCRC) {
490             archive.packCrcsDefined = readAllOrBits(header, (int)numPackStreams);
491             archive.packCrcs = new long[(int)numPackStreams];
492             for (int i = 0; i < (int)numPackStreams; i++) {
493                 if (archive.packCrcsDefined.get(i)) {
494                     archive.packCrcs[i] = 0xffffFFFFL & header.getInt();
495                 }
496             }
497 
498             nid = getUnsignedByte(header);
499         }
500 
501         if (nid != NID.kEnd) {
502             throw new IOException("Badly terminated PackInfo (" + nid + ")");
503         }
504     }
505 
readUnpackInfo(final ByteBuffer header, final Archive archive)506     private void readUnpackInfo(final ByteBuffer header, final Archive archive) throws IOException {
507         int nid = getUnsignedByte(header);
508         if (nid != NID.kFolder) {
509             throw new IOException("Expected kFolder, got " + nid);
510         }
511         final long numFolders = readUint64(header);
512         final Folder[] folders = new Folder[(int)numFolders];
513         archive.folders = folders;
514         final int external = getUnsignedByte(header);
515         if (external != 0) {
516             throw new IOException("External unsupported");
517         }
518         for (int i = 0; i < (int)numFolders; i++) {
519             folders[i] = readFolder(header);
520         }
521 
522         nid = getUnsignedByte(header);
523         if (nid != NID.kCodersUnpackSize) {
524             throw new IOException("Expected kCodersUnpackSize, got " + nid);
525         }
526         for (final Folder folder : folders) {
527             folder.unpackSizes = new long[(int)folder.totalOutputStreams];
528             for (int i = 0; i < folder.totalOutputStreams; i++) {
529                 folder.unpackSizes[i] = readUint64(header);
530             }
531         }
532 
533         nid = getUnsignedByte(header);
534         if (nid == NID.kCRC) {
535             final BitSet crcsDefined = readAllOrBits(header, (int)numFolders);
536             for (int i = 0; i < (int)numFolders; i++) {
537                 if (crcsDefined.get(i)) {
538                     folders[i].hasCrc = true;
539                     folders[i].crc = 0xffffFFFFL & header.getInt();
540                 } else {
541                     folders[i].hasCrc = false;
542                 }
543             }
544 
545             nid = getUnsignedByte(header);
546         }
547 
548         if (nid != NID.kEnd) {
549             throw new IOException("Badly terminated UnpackInfo");
550         }
551     }
552 
readSubStreamsInfo(final ByteBuffer header, final Archive archive)553     private void readSubStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException {
554         for (final Folder folder : archive.folders) {
555             folder.numUnpackSubStreams = 1;
556         }
557         int totalUnpackStreams = archive.folders.length;
558 
559         int nid = getUnsignedByte(header);
560         if (nid == NID.kNumUnpackStream) {
561             totalUnpackStreams = 0;
562             for (final Folder folder : archive.folders) {
563                 final long numStreams = readUint64(header);
564                 folder.numUnpackSubStreams = (int)numStreams;
565                 totalUnpackStreams += numStreams;
566             }
567             nid = getUnsignedByte(header);
568         }
569 
570         final SubStreamsInfo subStreamsInfo = new SubStreamsInfo();
571         subStreamsInfo.unpackSizes = new long[totalUnpackStreams];
572         subStreamsInfo.hasCrc = new BitSet(totalUnpackStreams);
573         subStreamsInfo.crcs = new long[totalUnpackStreams];
574 
575         int nextUnpackStream = 0;
576         for (final Folder folder : archive.folders) {
577             if (folder.numUnpackSubStreams == 0) {
578                 continue;
579             }
580             long sum = 0;
581             if (nid == NID.kSize) {
582                 for (int i = 0; i < folder.numUnpackSubStreams - 1; i++) {
583                     final long size = readUint64(header);
584                     subStreamsInfo.unpackSizes[nextUnpackStream++] = size;
585                     sum += size;
586                 }
587             }
588             subStreamsInfo.unpackSizes[nextUnpackStream++] = folder.getUnpackSize() - sum;
589         }
590         if (nid == NID.kSize) {
591             nid = getUnsignedByte(header);
592         }
593 
594         int numDigests = 0;
595         for (final Folder folder : archive.folders) {
596             if (folder.numUnpackSubStreams != 1 || !folder.hasCrc) {
597                 numDigests += folder.numUnpackSubStreams;
598             }
599         }
600 
601         if (nid == NID.kCRC) {
602             final BitSet hasMissingCrc = readAllOrBits(header, numDigests);
603             final long[] missingCrcs = new long[numDigests];
604             for (int i = 0; i < numDigests; i++) {
605                 if (hasMissingCrc.get(i)) {
606                     missingCrcs[i] = 0xffffFFFFL & header.getInt();
607                 }
608             }
609             int nextCrc = 0;
610             int nextMissingCrc = 0;
611             for (final Folder folder: archive.folders) {
612                 if (folder.numUnpackSubStreams == 1 && folder.hasCrc) {
613                     subStreamsInfo.hasCrc.set(nextCrc, true);
614                     subStreamsInfo.crcs[nextCrc] = folder.crc;
615                     ++nextCrc;
616                 } else {
617                     for (int i = 0; i < folder.numUnpackSubStreams; i++) {
618                         subStreamsInfo.hasCrc.set(nextCrc, hasMissingCrc.get(nextMissingCrc));
619                         subStreamsInfo.crcs[nextCrc] = missingCrcs[nextMissingCrc];
620                         ++nextCrc;
621                         ++nextMissingCrc;
622                     }
623                 }
624             }
625 
626             nid = getUnsignedByte(header);
627         }
628 
629         if (nid != NID.kEnd) {
630             throw new IOException("Badly terminated SubStreamsInfo");
631         }
632 
633         archive.subStreamsInfo = subStreamsInfo;
634     }
635 
readFolder(final ByteBuffer header)636     private Folder readFolder(final ByteBuffer header) throws IOException {
637         final Folder folder = new Folder();
638 
639         final long numCoders = readUint64(header);
640         final Coder[] coders = new Coder[(int)numCoders];
641         long totalInStreams = 0;
642         long totalOutStreams = 0;
643         for (int i = 0; i < coders.length; i++) {
644             coders[i] = new Coder();
645             final int bits = getUnsignedByte(header);
646             final int idSize = bits & 0xf;
647             final boolean isSimple = (bits & 0x10) == 0;
648             final boolean hasAttributes = (bits & 0x20) != 0;
649             final boolean moreAlternativeMethods = (bits & 0x80) != 0;
650 
651             coders[i].decompressionMethodId = new byte[idSize];
652             header.get(coders[i].decompressionMethodId);
653             if (isSimple) {
654                 coders[i].numInStreams = 1;
655                 coders[i].numOutStreams = 1;
656             } else {
657                 coders[i].numInStreams = readUint64(header);
658                 coders[i].numOutStreams = readUint64(header);
659             }
660             totalInStreams += coders[i].numInStreams;
661             totalOutStreams += coders[i].numOutStreams;
662             if (hasAttributes) {
663                 final long propertiesSize = readUint64(header);
664                 coders[i].properties = new byte[(int)propertiesSize];
665                 header.get(coders[i].properties);
666             }
667             // would need to keep looping as above:
668             while (moreAlternativeMethods) {
669                 throw new IOException("Alternative methods are unsupported, please report. " +
670                         "The reference implementation doesn't support them either.");
671             }
672         }
673         folder.coders = coders;
674         folder.totalInputStreams = totalInStreams;
675         folder.totalOutputStreams = totalOutStreams;
676 
677         if (totalOutStreams == 0) {
678             throw new IOException("Total output streams can't be 0");
679         }
680         final long numBindPairs = totalOutStreams - 1;
681         final BindPair[] bindPairs = new BindPair[(int)numBindPairs];
682         for (int i = 0; i < bindPairs.length; i++) {
683             bindPairs[i] = new BindPair();
684             bindPairs[i].inIndex = readUint64(header);
685             bindPairs[i].outIndex = readUint64(header);
686         }
687         folder.bindPairs = bindPairs;
688 
689         if (totalInStreams < numBindPairs) {
690             throw new IOException("Total input streams can't be less than the number of bind pairs");
691         }
692         final long numPackedStreams = totalInStreams - numBindPairs;
693         final long packedStreams[] = new long[(int)numPackedStreams];
694         if (numPackedStreams == 1) {
695             int i;
696             for (i = 0; i < (int)totalInStreams; i++) {
697                 if (folder.findBindPairForInStream(i) < 0) {
698                     break;
699                 }
700             }
701             if (i == (int)totalInStreams) {
702                 throw new IOException("Couldn't find stream's bind pair index");
703             }
704             packedStreams[0] = i;
705         } else {
706             for (int i = 0; i < (int)numPackedStreams; i++) {
707                 packedStreams[i] = readUint64(header);
708             }
709         }
710         folder.packedStreams = packedStreams;
711 
712         return folder;
713     }
714 
readAllOrBits(final ByteBuffer header, final int size)715     private BitSet readAllOrBits(final ByteBuffer header, final int size) throws IOException {
716         final int areAllDefined = getUnsignedByte(header);
717         final BitSet bits;
718         if (areAllDefined != 0) {
719             bits = new BitSet(size);
720             for (int i = 0; i < size; i++) {
721                 bits.set(i, true);
722             }
723         } else {
724             bits = readBits(header, size);
725         }
726         return bits;
727     }
728 
readBits(final ByteBuffer header, final int size)729     private BitSet readBits(final ByteBuffer header, final int size) throws IOException {
730         final BitSet bits = new BitSet(size);
731         int mask = 0;
732         int cache = 0;
733         for (int i = 0; i < size; i++) {
734             if (mask == 0) {
735                 mask = 0x80;
736                 cache = getUnsignedByte(header);
737             }
738             bits.set(i, (cache & mask) != 0);
739             mask >>>= 1;
740         }
741         return bits;
742     }
743 
readFilesInfo(final ByteBuffer header, final Archive archive)744     private void readFilesInfo(final ByteBuffer header, final Archive archive) throws IOException {
745         final long numFiles = readUint64(header);
746         final SevenZArchiveEntry[] files = new SevenZArchiveEntry[(int)numFiles];
747         for (int i = 0; i < files.length; i++) {
748             files[i] = new SevenZArchiveEntry();
749         }
750         BitSet isEmptyStream = null;
751         BitSet isEmptyFile = null;
752         BitSet isAnti = null;
753         while (true) {
754             final int propertyType = getUnsignedByte(header);
755             if (propertyType == 0) {
756                 break;
757             }
758             final long size = readUint64(header);
759             switch (propertyType) {
760                 case NID.kEmptyStream: {
761                     isEmptyStream = readBits(header, files.length);
762                     break;
763                 }
764                 case NID.kEmptyFile: {
765                     if (isEmptyStream == null) { // protect against NPE
766                         throw new IOException("Header format error: kEmptyStream must appear before kEmptyFile");
767                     }
768                     isEmptyFile = readBits(header, isEmptyStream.cardinality());
769                     break;
770                 }
771                 case NID.kAnti: {
772                     if (isEmptyStream == null) { // protect against NPE
773                         throw new IOException("Header format error: kEmptyStream must appear before kAnti");
774                     }
775                     isAnti = readBits(header, isEmptyStream.cardinality());
776                     break;
777                 }
778                 case NID.kName: {
779                     final int external = getUnsignedByte(header);
780                     if (external != 0) {
781                         throw new IOException("Not implemented");
782                     }
783                     if (((size - 1) & 1) != 0) {
784                         throw new IOException("File names length invalid");
785                     }
786                     final byte[] names = new byte[(int)(size - 1)];
787                     header.get(names);
788                     int nextFile = 0;
789                     int nextName = 0;
790                     for (int i = 0; i < names.length; i += 2) {
791                         if (names[i] == 0 && names[i+1] == 0) {
792                             files[nextFile++].setName(new String(names, nextName, i-nextName, CharsetNames.UTF_16LE));
793                             nextName = i + 2;
794                         }
795                     }
796                     if (nextName != names.length || nextFile != files.length) {
797                         throw new IOException("Error parsing file names");
798                     }
799                     break;
800                 }
801                 case NID.kCTime: {
802                     final BitSet timesDefined = readAllOrBits(header, files.length);
803                     final int external = getUnsignedByte(header);
804                     if (external != 0) {
805                         throw new IOException("Unimplemented");
806                     }
807                     for (int i = 0; i < files.length; i++) {
808                         files[i].setHasCreationDate(timesDefined.get(i));
809                         if (files[i].getHasCreationDate()) {
810                             files[i].setCreationDate(header.getLong());
811                         }
812                     }
813                     break;
814                 }
815                 case NID.kATime: {
816                     final BitSet timesDefined = readAllOrBits(header, files.length);
817                     final int external = getUnsignedByte(header);
818                     if (external != 0) {
819                         throw new IOException("Unimplemented");
820                     }
821                     for (int i = 0; i < files.length; i++) {
822                         files[i].setHasAccessDate(timesDefined.get(i));
823                         if (files[i].getHasAccessDate()) {
824                             files[i].setAccessDate(header.getLong());
825                         }
826                     }
827                     break;
828                 }
829                 case NID.kMTime: {
830                     final BitSet timesDefined = readAllOrBits(header, files.length);
831                     final int external = getUnsignedByte(header);
832                     if (external != 0) {
833                         throw new IOException("Unimplemented");
834                     }
835                     for (int i = 0; i < files.length; i++) {
836                         files[i].setHasLastModifiedDate(timesDefined.get(i));
837                         if (files[i].getHasLastModifiedDate()) {
838                             files[i].setLastModifiedDate(header.getLong());
839                         }
840                     }
841                     break;
842                 }
843                 case NID.kWinAttributes: {
844                     final BitSet attributesDefined = readAllOrBits(header, files.length);
845                     final int external = getUnsignedByte(header);
846                     if (external != 0) {
847                         throw new IOException("Unimplemented");
848                     }
849                     for (int i = 0; i < files.length; i++) {
850                         files[i].setHasWindowsAttributes(attributesDefined.get(i));
851                         if (files[i].getHasWindowsAttributes()) {
852                             files[i].setWindowsAttributes(header.getInt());
853                         }
854                     }
855                     break;
856                 }
857                 case NID.kStartPos: {
858                     throw new IOException("kStartPos is unsupported, please report");
859                 }
860                 case NID.kDummy: {
861                     // 7z 9.20 asserts the content is all zeros and ignores the property
862                     // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287
863 
864                     if (skipBytesFully(header, size) < size) {
865                         throw new IOException("Incomplete kDummy property");
866                     }
867                     break;
868                 }
869 
870                 default: {
871                     // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287
872                     if (skipBytesFully(header, size) < size) {
873                         throw new IOException("Incomplete property of type " + propertyType);
874                     }
875                     break;
876                 }
877             }
878         }
879         int nonEmptyFileCounter = 0;
880         int emptyFileCounter = 0;
881         for (int i = 0; i < files.length; i++) {
882             files[i].setHasStream(isEmptyStream == null || !isEmptyStream.get(i));
883             if (files[i].hasStream()) {
884                 files[i].setDirectory(false);
885                 files[i].setAntiItem(false);
886                 files[i].setHasCrc(archive.subStreamsInfo.hasCrc.get(nonEmptyFileCounter));
887                 files[i].setCrcValue(archive.subStreamsInfo.crcs[nonEmptyFileCounter]);
888                 files[i].setSize(archive.subStreamsInfo.unpackSizes[nonEmptyFileCounter]);
889                 ++nonEmptyFileCounter;
890             } else {
891                 files[i].setDirectory(isEmptyFile == null || !isEmptyFile.get(emptyFileCounter));
892                 files[i].setAntiItem(isAnti != null && isAnti.get(emptyFileCounter));
893                 files[i].setHasCrc(false);
894                 files[i].setSize(0);
895                 ++emptyFileCounter;
896             }
897         }
898         archive.files = files;
899         calculateStreamMap(archive);
900     }
901 
calculateStreamMap(final Archive archive)902     private void calculateStreamMap(final Archive archive) throws IOException {
903         final StreamMap streamMap = new StreamMap();
904 
905         int nextFolderPackStreamIndex = 0;
906         final int numFolders = archive.folders != null ? archive.folders.length : 0;
907         streamMap.folderFirstPackStreamIndex = new int[numFolders];
908         for (int i = 0; i < numFolders; i++) {
909             streamMap.folderFirstPackStreamIndex[i] = nextFolderPackStreamIndex;
910             nextFolderPackStreamIndex += archive.folders[i].packedStreams.length;
911         }
912 
913         long nextPackStreamOffset = 0;
914         final int numPackSizes = archive.packSizes != null ? archive.packSizes.length : 0;
915         streamMap.packStreamOffsets = new long[numPackSizes];
916         for (int i = 0; i < numPackSizes; i++) {
917             streamMap.packStreamOffsets[i] = nextPackStreamOffset;
918             nextPackStreamOffset += archive.packSizes[i];
919         }
920 
921         streamMap.folderFirstFileIndex = new int[numFolders];
922         streamMap.fileFolderIndex = new int[archive.files.length];
923         int nextFolderIndex = 0;
924         int nextFolderUnpackStreamIndex = 0;
925         for (int i = 0; i < archive.files.length; i++) {
926             if (!archive.files[i].hasStream() && nextFolderUnpackStreamIndex == 0) {
927                 streamMap.fileFolderIndex[i] = -1;
928                 continue;
929             }
930             if (nextFolderUnpackStreamIndex == 0) {
931                 for (; nextFolderIndex < archive.folders.length; ++nextFolderIndex) {
932                     streamMap.folderFirstFileIndex[nextFolderIndex] = i;
933                     if (archive.folders[nextFolderIndex].numUnpackSubStreams > 0) {
934                         break;
935                     }
936                 }
937                 if (nextFolderIndex >= archive.folders.length) {
938                     throw new IOException("Too few folders in archive");
939                 }
940             }
941             streamMap.fileFolderIndex[i] = nextFolderIndex;
942             if (!archive.files[i].hasStream()) {
943                 continue;
944             }
945             ++nextFolderUnpackStreamIndex;
946             if (nextFolderUnpackStreamIndex >= archive.folders[nextFolderIndex].numUnpackSubStreams) {
947                 ++nextFolderIndex;
948                 nextFolderUnpackStreamIndex = 0;
949             }
950         }
951 
952         archive.streamMap = streamMap;
953     }
954 
buildDecodingStream()955     private void buildDecodingStream() throws IOException {
956         final int folderIndex = archive.streamMap.fileFolderIndex[currentEntryIndex];
957         if (folderIndex < 0) {
958             deferredBlockStreams.clear();
959             // TODO: previously it'd return an empty stream?
960             // new BoundedInputStream(new ByteArrayInputStream(new byte[0]), 0);
961             return;
962         }
963         final SevenZArchiveEntry file = archive.files[currentEntryIndex];
964         if (currentFolderIndex == folderIndex) {
965             // (COMPRESS-320).
966             // The current entry is within the same (potentially opened) folder. The
967             // previous stream has to be fully decoded before we can start reading
968             // but don't do it eagerly -- if the user skips over the entire folder nothing
969             // is effectively decompressed.
970 
971             file.setContentMethods(archive.files[currentEntryIndex - 1].getContentMethods());
972         } else {
973             // We're opening a new folder. Discard any queued streams/ folder stream.
974             currentFolderIndex = folderIndex;
975             deferredBlockStreams.clear();
976             if (currentFolderInputStream != null) {
977                 currentFolderInputStream.close();
978                 currentFolderInputStream = null;
979             }
980 
981             final Folder folder = archive.folders[folderIndex];
982             final int firstPackStreamIndex = archive.streamMap.folderFirstPackStreamIndex[folderIndex];
983             final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos +
984                     archive.streamMap.packStreamOffsets[firstPackStreamIndex];
985             currentFolderInputStream = buildDecoderStack(folder, folderOffset, firstPackStreamIndex, file);
986         }
987 
988         InputStream fileStream = new BoundedInputStream(currentFolderInputStream, file.getSize());
989         if (file.getHasCrc()) {
990             fileStream = new CRC32VerifyingInputStream(fileStream, file.getSize(), file.getCrcValue());
991         }
992 
993         deferredBlockStreams.add(fileStream);
994     }
995 
buildDecoderStack(final Folder folder, final long folderOffset, final int firstPackStreamIndex, final SevenZArchiveEntry entry)996     private InputStream buildDecoderStack(final Folder folder, final long folderOffset,
997                 final int firstPackStreamIndex, final SevenZArchiveEntry entry) throws IOException {
998         channel.position(folderOffset);
999         InputStream inputStreamStack = new FilterInputStream(new BufferedInputStream(
1000               new BoundedSeekableByteChannelInputStream(channel,
1001                   archive.packSizes[firstPackStreamIndex]))) {
1002             @Override
1003             public int read() throws IOException {
1004                 final int r = in.read();
1005                 if (r >= 0) {
1006                     count(1);
1007                 }
1008                 return r;
1009             }
1010             @Override
1011             public int read(final byte[] b) throws IOException {
1012                 return read(b, 0, b.length);
1013             }
1014             @Override
1015             public int read(final byte[] b, final int off, final int len) throws IOException {
1016                 final int r = in.read(b, off, len);
1017                 if (r >= 0) {
1018                     count(r);
1019                 }
1020                 return r;
1021             }
1022             private void count(int c) {
1023                 compressedBytesReadFromCurrentEntry += c;
1024             }
1025         };
1026         final LinkedList<SevenZMethodConfiguration> methods = new LinkedList<>();
1027         for (final Coder coder : folder.getOrderedCoders()) {
1028             if (coder.numInStreams != 1 || coder.numOutStreams != 1) {
1029                 throw new IOException("Multi input/output stream coders are not yet supported");
1030             }
1031             final SevenZMethod method = SevenZMethod.byId(coder.decompressionMethodId);
1032             inputStreamStack = Coders.addDecoder(fileName, inputStreamStack,
1033                     folder.getUnpackSizeForCoder(coder), coder, password);
1034             methods.addFirst(new SevenZMethodConfiguration(method,
1035                      Coders.findByMethod(method).getOptionsFromCoder(coder, inputStreamStack)));
1036         }
1037         entry.setContentMethods(methods);
1038         if (folder.hasCrc) {
1039             return new CRC32VerifyingInputStream(inputStreamStack,
1040                     folder.getUnpackSize(), folder.crc);
1041         }
1042         return inputStreamStack;
1043     }
1044 
1045     /**
1046      * Reads a byte of data.
1047      *
1048      * @return the byte read, or -1 if end of input is reached
1049      * @throws IOException
1050      *             if an I/O error has occurred
1051      */
read()1052     public int read() throws IOException {
1053         int b = getCurrentStream().read();
1054         if (b >= 0) {
1055             uncompressedBytesReadFromCurrentEntry++;
1056         }
1057         return b;
1058     }
1059 
getCurrentStream()1060     private InputStream getCurrentStream() throws IOException {
1061         if (archive.files[currentEntryIndex].getSize() == 0) {
1062             return new ByteArrayInputStream(new byte[0]);
1063         }
1064         if (deferredBlockStreams.isEmpty()) {
1065             throw new IllegalStateException("No current 7z entry (call getNextEntry() first).");
1066         }
1067 
1068         while (deferredBlockStreams.size() > 1) {
1069             // In solid compression mode we need to decompress all leading folder'
1070             // streams to get access to an entry. We defer this until really needed
1071             // so that entire blocks can be skipped without wasting time for decompression.
1072             try (final InputStream stream = deferredBlockStreams.remove(0)) {
1073                 IOUtils.skip(stream, Long.MAX_VALUE);
1074             }
1075             compressedBytesReadFromCurrentEntry = 0;
1076         }
1077 
1078         return deferredBlockStreams.get(0);
1079     }
1080 
1081     /**
1082      * Reads data into an array of bytes.
1083      *
1084      * @param b the array to write data to
1085      * @return the number of bytes read, or -1 if end of input is reached
1086      * @throws IOException
1087      *             if an I/O error has occurred
1088      */
read(final byte[] b)1089     public int read(final byte[] b) throws IOException {
1090         return read(b, 0, b.length);
1091     }
1092 
1093     /**
1094      * Reads data into an array of bytes.
1095      *
1096      * @param b the array to write data to
1097      * @param off offset into the buffer to start filling at
1098      * @param len of bytes to read
1099      * @return the number of bytes read, or -1 if end of input is reached
1100      * @throws IOException
1101      *             if an I/O error has occurred
1102      */
read(final byte[] b, final int off, final int len)1103     public int read(final byte[] b, final int off, final int len) throws IOException {
1104         int cnt = getCurrentStream().read(b, off, len);
1105         if (cnt > 0) {
1106             uncompressedBytesReadFromCurrentEntry += cnt;
1107         }
1108         return cnt;
1109     }
1110 
1111     /**
1112      * Provides statistics for bytes read from the current entry.
1113      *
1114      * @return statistics for bytes read from the current entry
1115      * @since 1.17
1116      */
getStatisticsForCurrentEntry()1117     public InputStreamStatistics getStatisticsForCurrentEntry() {
1118         return new InputStreamStatistics() {
1119             @Override
1120             public long getCompressedCount() {
1121                 return compressedBytesReadFromCurrentEntry;
1122             }
1123             @Override
1124             public long getUncompressedCount() {
1125                 return uncompressedBytesReadFromCurrentEntry;
1126             }
1127         };
1128     }
1129 
1130     private static long readUint64(final ByteBuffer in) throws IOException {
1131         // long rather than int as it might get shifted beyond the range of an int
1132         final long firstByte = getUnsignedByte(in);
1133         int mask = 0x80;
1134         long value = 0;
1135         for (int i = 0; i < 8; i++) {
1136             if ((firstByte & mask) == 0) {
1137                 return value | ((firstByte & (mask - 1)) << (8 * i));
1138             }
1139             final long nextByte = getUnsignedByte(in);
1140             value |= nextByte << (8 * i);
1141             mask >>>= 1;
1142         }
1143         return value;
1144     }
1145 
1146     private static int getUnsignedByte(ByteBuffer buf) {
1147         return buf.get() & 0xff;
1148     }
1149 
1150     /**
1151      * Checks if the signature matches what is expected for a 7z file.
1152      *
1153      * @param signature
1154      *            the bytes to check
1155      * @param length
1156      *            the number of bytes to check
1157      * @return true, if this is the signature of a 7z archive.
1158      * @since 1.8
1159      */
1160     public static boolean matches(final byte[] signature, final int length) {
1161         if (length < sevenZSignature.length) {
1162             return false;
1163         }
1164 
1165         for (int i = 0; i < sevenZSignature.length; i++) {
1166             if (signature[i] != sevenZSignature[i]) {
1167                 return false;
1168             }
1169         }
1170         return true;
1171     }
1172 
1173     private static long skipBytesFully(final ByteBuffer input, long bytesToSkip) throws IOException {
1174         if (bytesToSkip < 1) {
1175             return 0;
1176         }
1177         int current = input.position();
1178         int maxSkip = input.remaining();
1179         if (maxSkip < bytesToSkip) {
1180             bytesToSkip = maxSkip;
1181         }
1182         input.position(current + (int) bytesToSkip);
1183         return bytesToSkip;
1184     }
1185 
1186     private void readFully(ByteBuffer buf) throws IOException {
1187         buf.rewind();
1188         IOUtils.readFully(channel, buf);
1189         buf.flip();
1190     }
1191 
1192     @Override
1193     public String toString() {
1194       return archive.toString();
1195     }
1196 
1197     private static final CharsetEncoder PASSWORD_ENCODER = StandardCharsets.UTF_16LE.newEncoder();
1198 
1199     private static byte[] utf16Decode(char[] chars) throws IOException {
1200         if (chars == null) {
1201             return null;
1202         }
1203         ByteBuffer encoded = PASSWORD_ENCODER.encode(CharBuffer.wrap(chars));
1204         if (encoded.hasArray()) {
1205             return encoded.array();
1206         }
1207         byte[] e = new byte[encoded.remaining()];
1208         encoded.get(e);
1209         return e;
1210     }
1211 }
1212