1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, 13 * software distributed under the License is distributed on an 14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 * KIND, either express or implied. See the License for the 16 * specific language governing permissions and limitations 17 * under the License. 18 */ 19 package org.apache.commons.compress.archivers; 20 21 import java.io.ByteArrayInputStream; 22 import java.io.IOException; 23 import java.io.InputStream; 24 import java.io.OutputStream; 25 import java.security.AccessController; 26 import java.security.PrivilegedAction; 27 import java.util.ArrayList; 28 import java.util.Collections; 29 import java.util.Iterator; 30 import java.util.Locale; 31 import java.util.Set; 32 import java.util.SortedMap; 33 import java.util.TreeMap; 34 35 import org.apache.commons.compress.archivers.ar.ArArchiveInputStream; 36 import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream; 37 import org.apache.commons.compress.archivers.arj.ArjArchiveInputStream; 38 import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream; 39 import org.apache.commons.compress.archivers.cpio.CpioArchiveOutputStream; 40 import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream; 41 import org.apache.commons.compress.archivers.jar.JarArchiveInputStream; 42 import org.apache.commons.compress.archivers.jar.JarArchiveOutputStream; 43 import org.apache.commons.compress.archivers.sevenz.SevenZFile; 44 import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; 45 import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; 46 import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; 47 import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; 48 import org.apache.commons.compress.utils.IOUtils; 49 import org.apache.commons.compress.utils.Lists; 50 import org.apache.commons.compress.utils.ServiceLoaderIterator; 51 import org.apache.commons.compress.utils.Sets; 52 53 /** 54 * Factory to create Archive[In|Out]putStreams from names or the first bytes of 55 * the InputStream. In order to add other implementations, you should extend 56 * ArchiveStreamFactory and override the appropriate methods (and call their 57 * implementation from super of course). 58 * 59 * Compressing a ZIP-File: 60 * 61 * <pre> 62 * final OutputStream out = Files.newOutputStream(output.toPath()); 63 * ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream(ArchiveStreamFactory.ZIP, out); 64 * 65 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test1.xml")); 66 * IOUtils.copy(Files.newInputStream(file1.toPath()), os); 67 * os.closeArchiveEntry(); 68 * 69 * os.putArchiveEntry(new ZipArchiveEntry("testdata/test2.xml")); 70 * IOUtils.copy(Files.newInputStream(file2.toPath()), os); 71 * os.closeArchiveEntry(); 72 * os.close(); 73 * </pre> 74 * 75 * Decompressing a ZIP-File: 76 * 77 * <pre> 78 * final InputStream is = Files.newInputStream(input.toPath()); 79 * ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, is); 80 * ZipArchiveEntry entry = (ZipArchiveEntry)in.getNextEntry(); 81 * OutputStream out = Files.newOutputStream(dir.toPath().resolve(entry.getName())); 82 * IOUtils.copy(in, out); 83 * out.close(); 84 * in.close(); 85 * </pre> 86 * @Immutable provided that the deprecated method setEntryEncoding is not used. 87 * @ThreadSafe even if the deprecated method setEntryEncoding is used 88 */ 89 public class ArchiveStreamFactory implements ArchiveStreamProvider { 90 91 private static final int TAR_HEADER_SIZE = 512; 92 93 private static final int DUMP_SIGNATURE_SIZE = 32; 94 95 private static final int SIGNATURE_SIZE = 12; 96 97 private static final ArchiveStreamFactory SINGLETON = new ArchiveStreamFactory(); 98 99 /** 100 * Constant (value {@value}) used to identify the AR archive format. 101 * @since 1.1 102 */ 103 public static final String AR = "ar"; 104 105 /** 106 * Constant (value {@value}) used to identify the ARJ archive format. 107 * Not supported as an output stream type. 108 * @since 1.6 109 */ 110 public static final String ARJ = "arj"; 111 112 /** 113 * Constant (value {@value}) used to identify the CPIO archive format. 114 * @since 1.1 115 */ 116 public static final String CPIO = "cpio"; 117 118 /** 119 * Constant (value {@value}) used to identify the Unix DUMP archive format. 120 * Not supported as an output stream type. 121 * @since 1.3 122 */ 123 public static final String DUMP = "dump"; 124 125 /** 126 * Constant (value {@value}) used to identify the JAR archive format. 127 * @since 1.1 128 */ 129 public static final String JAR = "jar"; 130 131 /** 132 * Constant used to identify the TAR archive format. 133 * @since 1.1 134 */ 135 public static final String TAR = "tar"; 136 137 /** 138 * Constant (value {@value}) used to identify the ZIP archive format. 139 * @since 1.1 140 */ 141 public static final String ZIP = "zip"; 142 143 /** 144 * Constant (value {@value}) used to identify the 7z archive format. 145 * @since 1.8 146 */ 147 public static final String SEVEN_Z = "7z"; 148 149 /** 150 * Entry encoding, null for the platform default. 151 */ 152 private final String encoding; 153 154 /** 155 * Entry encoding, null for the default. 156 */ 157 private volatile String entryEncoding; 158 159 private SortedMap<String, ArchiveStreamProvider> archiveInputStreamProviders; 160 161 private SortedMap<String, ArchiveStreamProvider> archiveOutputStreamProviders; 162 findArchiveStreamProviders()163 private static ArrayList<ArchiveStreamProvider> findArchiveStreamProviders() { 164 return Lists.newArrayList(serviceLoaderIterator()); 165 } 166 putAll(Set<String> names, ArchiveStreamProvider provider, TreeMap<String, ArchiveStreamProvider> map)167 static void putAll(Set<String> names, ArchiveStreamProvider provider, 168 TreeMap<String, ArchiveStreamProvider> map) { 169 for (String name : names) { 170 map.put(toKey(name), provider); 171 } 172 } 173 serviceLoaderIterator()174 private static Iterator<ArchiveStreamProvider> serviceLoaderIterator() { 175 return new ServiceLoaderIterator<>(ArchiveStreamProvider.class); 176 } 177 toKey(final String name)178 private static String toKey(final String name) { 179 return name.toUpperCase(Locale.ROOT); 180 } 181 182 /** 183 * Constructs a new sorted map from input stream provider names to provider 184 * objects. 185 * 186 * <p> 187 * The map returned by this method will have one entry for each provider for 188 * which support is available in the current Java virtual machine. If two or 189 * more supported provider have the same name then the resulting map will 190 * contain just one of them; which one it will contain is not specified. 191 * </p> 192 * 193 * <p> 194 * The invocation of this method, and the subsequent use of the resulting 195 * map, may cause time-consuming disk or network I/O operations to occur. 196 * This method is provided for applications that need to enumerate all of 197 * the available providers, for example to allow user provider selection. 198 * </p> 199 * 200 * <p> 201 * This method may return different results at different times if new 202 * providers are dynamically made available to the current Java virtual 203 * machine. 204 * </p> 205 * 206 * @return An immutable, map from names to provider objects 207 * @since 1.13 208 */ findAvailableArchiveInputStreamProviders()209 public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveInputStreamProviders() { 210 return AccessController.doPrivileged(new PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>() { 211 @Override 212 public SortedMap<String, ArchiveStreamProvider> run() { 213 TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>(); 214 putAll(SINGLETON.getInputStreamArchiveNames(), SINGLETON, map); 215 for (ArchiveStreamProvider provider : findArchiveStreamProviders()) { 216 putAll(provider.getInputStreamArchiveNames(), provider, map); 217 } 218 return map; 219 } 220 }); 221 } 222 223 /** 224 * Constructs a new sorted map from output stream provider names to provider 225 * objects. 226 * 227 * <p> 228 * The map returned by this method will have one entry for each provider for 229 * which support is available in the current Java virtual machine. If two or 230 * more supported provider have the same name then the resulting map will 231 * contain just one of them; which one it will contain is not specified. 232 * </p> 233 * 234 * <p> 235 * The invocation of this method, and the subsequent use of the resulting 236 * map, may cause time-consuming disk or network I/O operations to occur. 237 * This method is provided for applications that need to enumerate all of 238 * the available providers, for example to allow user provider selection. 239 * </p> 240 * 241 * <p> 242 * This method may return different results at different times if new 243 * providers are dynamically made available to the current Java virtual 244 * machine. 245 * </p> 246 * 247 * @return An immutable, map from names to provider objects 248 * @since 1.13 249 */ 250 public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveOutputStreamProviders() { 251 return AccessController.doPrivileged(new PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>() { 252 @Override 253 public SortedMap<String, ArchiveStreamProvider> run() { 254 TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>(); 255 putAll(SINGLETON.getOutputStreamArchiveNames(), SINGLETON, map); 256 for (ArchiveStreamProvider provider : findArchiveStreamProviders()) { 257 putAll(provider.getOutputStreamArchiveNames(), provider, map); 258 } 259 return map; 260 } 261 }); 262 } 263 264 /** 265 * Create an instance using the platform default encoding. 266 */ 267 public ArchiveStreamFactory() { 268 this(null); 269 } 270 271 /** 272 * Create an instance using the specified encoding. 273 * 274 * @param encoding the encoding to be used. 275 * 276 * @since 1.10 277 */ 278 public ArchiveStreamFactory(final String encoding) { 279 super(); 280 this.encoding = encoding; 281 // Also set the original field so can continue to use it. 282 this.entryEncoding = encoding; 283 } 284 285 /** 286 * Returns the encoding to use for arj, jar, zip, dump, cpio and tar 287 * files, or null for the archiver default. 288 * 289 * @return entry encoding, or null for the archiver default 290 * @since 1.5 291 */ 292 public String getEntryEncoding() { 293 return entryEncoding; 294 } 295 296 /** 297 * Sets the encoding to use for arj, jar, zip, dump, cpio and tar files. Use null for the archiver default. 298 * 299 * @param entryEncoding the entry encoding, null uses the archiver default. 300 * @since 1.5 301 * @deprecated 1.10 use {@link #ArchiveStreamFactory(String)} to specify the encoding 302 * @throws IllegalStateException if the constructor {@link #ArchiveStreamFactory(String)} 303 * was used to specify the factory encoding. 304 */ 305 @Deprecated 306 public void setEntryEncoding(final String entryEncoding) { 307 // Note: this does not detect new ArchiveStreamFactory(null) but that does not set the encoding anyway 308 if (encoding != null) { 309 throw new IllegalStateException("Cannot overide encoding set by the constructor"); 310 } 311 this.entryEncoding = entryEncoding; 312 } 313 314 /** 315 * Creates an archive input stream from an archiver name and an input stream. 316 * 317 * @param archiverName the archive name, 318 * i.e. {@value #AR}, {@value #ARJ}, {@value #ZIP}, {@value #TAR}, {@value #JAR}, {@value #CPIO}, {@value #DUMP} or {@value #SEVEN_Z} 319 * @param in the input stream 320 * @return the archive input stream 321 * @throws ArchiveException if the archiver name is not known 322 * @throws StreamingNotSupportedException if the format cannot be 323 * read from a stream 324 * @throws IllegalArgumentException if the archiver name or stream is null 325 */ 326 public ArchiveInputStream createArchiveInputStream(final String archiverName, final InputStream in) 327 throws ArchiveException { 328 return createArchiveInputStream(archiverName, in, entryEncoding); 329 } 330 331 @Override 332 public ArchiveInputStream createArchiveInputStream(final String archiverName, final InputStream in, 333 final String actualEncoding) throws ArchiveException { 334 335 if (archiverName == null) { 336 throw new IllegalArgumentException("Archivername must not be null."); 337 } 338 339 if (in == null) { 340 throw new IllegalArgumentException("InputStream must not be null."); 341 } 342 343 if (AR.equalsIgnoreCase(archiverName)) { 344 return new ArArchiveInputStream(in); 345 } 346 if (ARJ.equalsIgnoreCase(archiverName)) { 347 if (actualEncoding != null) { 348 return new ArjArchiveInputStream(in, actualEncoding); 349 } 350 return new ArjArchiveInputStream(in); 351 } 352 if (ZIP.equalsIgnoreCase(archiverName)) { 353 if (actualEncoding != null) { 354 return new ZipArchiveInputStream(in, actualEncoding); 355 } 356 return new ZipArchiveInputStream(in); 357 } 358 if (TAR.equalsIgnoreCase(archiverName)) { 359 if (actualEncoding != null) { 360 return new TarArchiveInputStream(in, actualEncoding); 361 } 362 return new TarArchiveInputStream(in); 363 } 364 if (JAR.equalsIgnoreCase(archiverName)) { 365 if (actualEncoding != null) { 366 return new JarArchiveInputStream(in, actualEncoding); 367 } 368 return new JarArchiveInputStream(in); 369 } 370 if (CPIO.equalsIgnoreCase(archiverName)) { 371 if (actualEncoding != null) { 372 return new CpioArchiveInputStream(in, actualEncoding); 373 } 374 return new CpioArchiveInputStream(in); 375 } 376 if (DUMP.equalsIgnoreCase(archiverName)) { 377 if (actualEncoding != null) { 378 return new DumpArchiveInputStream(in, actualEncoding); 379 } 380 return new DumpArchiveInputStream(in); 381 } 382 if (SEVEN_Z.equalsIgnoreCase(archiverName)) { 383 throw new StreamingNotSupportedException(SEVEN_Z); 384 } 385 386 final ArchiveStreamProvider archiveStreamProvider = getArchiveInputStreamProviders().get(toKey(archiverName)); 387 if (archiveStreamProvider != null) { 388 return archiveStreamProvider.createArchiveInputStream(archiverName, in, actualEncoding); 389 } 390 391 throw new ArchiveException("Archiver: " + archiverName + " not found."); 392 } 393 394 /** 395 * Creates an archive output stream from an archiver name and an output stream. 396 * 397 * @param archiverName the archive name, 398 * i.e. {@value #AR}, {@value #ZIP}, {@value #TAR}, {@value #JAR} or {@value #CPIO} 399 * @param out the output stream 400 * @return the archive output stream 401 * @throws ArchiveException if the archiver name is not known 402 * @throws StreamingNotSupportedException if the format cannot be 403 * written to a stream 404 * @throws IllegalArgumentException if the archiver name or stream is null 405 */ 406 public ArchiveOutputStream createArchiveOutputStream(final String archiverName, final OutputStream out) 407 throws ArchiveException { 408 return createArchiveOutputStream(archiverName, out, entryEncoding); 409 } 410 411 @Override 412 public ArchiveOutputStream createArchiveOutputStream( 413 final String archiverName, final OutputStream out, final String actualEncoding) 414 throws ArchiveException { 415 if (archiverName == null) { 416 throw new IllegalArgumentException("Archivername must not be null."); 417 } 418 if (out == null) { 419 throw new IllegalArgumentException("OutputStream must not be null."); 420 } 421 422 if (AR.equalsIgnoreCase(archiverName)) { 423 return new ArArchiveOutputStream(out); 424 } 425 if (ZIP.equalsIgnoreCase(archiverName)) { 426 final ZipArchiveOutputStream zip = new ZipArchiveOutputStream(out); 427 if (actualEncoding != null) { 428 zip.setEncoding(actualEncoding); 429 } 430 return zip; 431 } 432 if (TAR.equalsIgnoreCase(archiverName)) { 433 if (actualEncoding != null) { 434 return new TarArchiveOutputStream(out, actualEncoding); 435 } 436 return new TarArchiveOutputStream(out); 437 } 438 if (JAR.equalsIgnoreCase(archiverName)) { 439 if (actualEncoding != null) { 440 return new JarArchiveOutputStream(out, actualEncoding); 441 } 442 return new JarArchiveOutputStream(out); 443 } 444 if (CPIO.equalsIgnoreCase(archiverName)) { 445 if (actualEncoding != null) { 446 return new CpioArchiveOutputStream(out, actualEncoding); 447 } 448 return new CpioArchiveOutputStream(out); 449 } 450 if (SEVEN_Z.equalsIgnoreCase(archiverName)) { 451 throw new StreamingNotSupportedException(SEVEN_Z); 452 } 453 454 final ArchiveStreamProvider archiveStreamProvider = getArchiveOutputStreamProviders().get(toKey(archiverName)); 455 if (archiveStreamProvider != null) { 456 return archiveStreamProvider.createArchiveOutputStream(archiverName, out, actualEncoding); 457 } 458 459 throw new ArchiveException("Archiver: " + archiverName + " not found."); 460 } 461 462 /** 463 * Create an archive input stream from an input stream, autodetecting 464 * the archive type from the first few bytes of the stream. The InputStream 465 * must support marks, like BufferedInputStream. 466 * 467 * @param in the input stream 468 * @return the archive input stream 469 * @throws ArchiveException if the archiver name is not known 470 * @throws StreamingNotSupportedException if the format cannot be 471 * read from a stream 472 * @throws IllegalArgumentException if the stream is null or does not support mark 473 */ 474 public ArchiveInputStream createArchiveInputStream(final InputStream in) 475 throws ArchiveException { 476 return createArchiveInputStream(detect(in), in); 477 } 478 479 /** 480 * Try to determine the type of Archiver 481 * @param in input stream 482 * @return type of archiver if found 483 * @throws ArchiveException if an archiver cannot be detected in the stream 484 * @since 1.14 485 */ 486 public static String detect(InputStream in) throws ArchiveException { 487 if (in == null) { 488 throw new IllegalArgumentException("Stream must not be null."); 489 } 490 491 if (!in.markSupported()) { 492 throw new IllegalArgumentException("Mark is not supported."); 493 } 494 495 final byte[] signature = new byte[SIGNATURE_SIZE]; 496 in.mark(signature.length); 497 int signatureLength = -1; 498 try { 499 signatureLength = IOUtils.readFully(in, signature); 500 in.reset(); 501 } catch (IOException e) { 502 throw new ArchiveException("IOException while reading signature.", e); 503 } 504 505 if (ZipArchiveInputStream.matches(signature, signatureLength)) { 506 return ZIP; 507 } else if (JarArchiveInputStream.matches(signature, signatureLength)) { 508 return JAR; 509 } else if (ArArchiveInputStream.matches(signature, signatureLength)) { 510 return AR; 511 } else if (CpioArchiveInputStream.matches(signature, signatureLength)) { 512 return CPIO; 513 } else if (ArjArchiveInputStream.matches(signature, signatureLength)) { 514 return ARJ; 515 } else if (SevenZFile.matches(signature, signatureLength)) { 516 return SEVEN_Z; 517 } 518 519 // Dump needs a bigger buffer to check the signature; 520 final byte[] dumpsig = new byte[DUMP_SIGNATURE_SIZE]; 521 in.mark(dumpsig.length); 522 try { 523 signatureLength = IOUtils.readFully(in, dumpsig); 524 in.reset(); 525 } catch (IOException e) { 526 throw new ArchiveException("IOException while reading dump signature", e); 527 } 528 if (DumpArchiveInputStream.matches(dumpsig, signatureLength)) { 529 return DUMP; 530 } 531 532 // Tar needs an even bigger buffer to check the signature; read the first block 533 final byte[] tarHeader = new byte[TAR_HEADER_SIZE]; 534 in.mark(tarHeader.length); 535 try { 536 signatureLength = IOUtils.readFully(in, tarHeader); 537 in.reset(); 538 } catch (IOException e) { 539 throw new ArchiveException("IOException while reading tar signature", e); 540 } 541 if (TarArchiveInputStream.matches(tarHeader, signatureLength)) { 542 return TAR; 543 } 544 545 // COMPRESS-117 - improve auto-recognition 546 if (signatureLength >= TAR_HEADER_SIZE) { 547 TarArchiveInputStream tais = null; 548 try { 549 tais = new TarArchiveInputStream(new ByteArrayInputStream(tarHeader)); 550 // COMPRESS-191 - verify the header checksum 551 if (tais.getNextTarEntry().isCheckSumOK()) { 552 return TAR; 553 } 554 } catch (final Exception e) { // NOPMD // NOSONAR 555 // can generate IllegalArgumentException as well 556 // as IOException 557 // autodetection, simply not a TAR 558 // ignored 559 } finally { 560 IOUtils.closeQuietly(tais); 561 } 562 } 563 throw new ArchiveException("No Archiver found for the stream signature"); 564 } 565 566 public SortedMap<String, ArchiveStreamProvider> getArchiveInputStreamProviders() { 567 if (archiveInputStreamProviders == null) { 568 archiveInputStreamProviders = Collections 569 .unmodifiableSortedMap(findAvailableArchiveInputStreamProviders()); 570 } 571 return archiveInputStreamProviders; 572 } 573 574 public SortedMap<String, ArchiveStreamProvider> getArchiveOutputStreamProviders() { 575 if (archiveOutputStreamProviders == null) { 576 archiveOutputStreamProviders = Collections 577 .unmodifiableSortedMap(findAvailableArchiveOutputStreamProviders()); 578 } 579 return archiveOutputStreamProviders; 580 } 581 582 @Override 583 public Set<String> getInputStreamArchiveNames() { 584 return Sets.newHashSet(AR, ARJ, ZIP, TAR, JAR, CPIO, DUMP, SEVEN_Z); 585 } 586 587 @Override 588 public Set<String> getOutputStreamArchiveNames() { 589 return Sets.newHashSet(AR, ZIP, TAR, JAR, CPIO, SEVEN_Z); 590 } 591 592 } 593