7z' V $ 9/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.commons.compress.compressors.bzip2; import java.util.BitSet; /** * Encapsulates the Burrows-Wheeler sorting algorithm needed by {@link * BZip2CompressorOutputStream}. * *
This class is based on a Java port of Julian Seward's * blocksort.c in his libbzip2
* *The Burrows-Wheeler transform is a reversible transform of the * original data that is supposed to group similiar bytes close to * each other. The idea is to sort all permutations of the input and * only keep the last byte of each permutation. E.g. for "Commons * Compress" you'd get:
* ** CompressCommons * Commons Compress * CompressCommons * essCommons Compr * mmons CompressCo * mons CompressCom * mpressCommons Co * ns CompressCommo * ommons CompressC * ompressCommons C * ons CompressComm * pressCommons Com * ressCommons Comp * s CompressCommon * sCommons Compres * ssCommons Compre ** *
Which results in a new text "ss romooCCmmpnse", in adition the * index of the first line that contained the original text is kept - * in this case it is 1. The idea is that in a long English text all * permutations that start with "he" are likely suffixes of a "the" and * thus they end in "t" leading to a larger block of "t"s that can * better be compressed by the subsequent Move-to-Front, run-length * und Huffman encoding steps.
* *For more information see for example:
** This is the version using unrolled loops. Normally I never use such ones * in Java code. The unrolling has shown a noticable performance improvement * on JRE 1.4.2 (Linux i586 / HotSpot Client). Of course it depends on the * JIT compiler of the vm. *
*/ private boolean mainSimpleSort(final BZip2CompressorOutputStream.Data dataShadow, final int lo, final int hi, final int d, final int lastShadow) { final int bigN = hi - lo + 1; if (bigN < 2) { return this.firstAttempt && (this.workDone > this.workLimit); } int hp = 0; while (INCS[hp] < bigN) { hp++; } final int[] fmap = dataShadow.fmap; final char[] quadrant = this.quadrant; final byte[] block = dataShadow.block; final int lastPlus1 = lastShadow + 1; final boolean firstAttemptShadow = this.firstAttempt; final int workLimitShadow = this.workLimit; int workDoneShadow = this.workDone; // Following block contains unrolled code which could be shortened by // coding it in additional loops. HP: while (--hp >= 0) { final int h = INCS[hp]; final int mj = lo + h - 1; for (int i = lo + h; i <= hi;) { // copy for (int k = 3; (i <= hi) && (--k >= 0); i++) { final int v = fmap[i]; final int vd = v + d; int j = i; // for (int a; // (j > mj) && mainGtU((a = fmap[j - h]) + d, vd, // block, quadrant, lastShadow); // j -= h) { // fmap[j] = a; // } // // unrolled version: // start inline mainGTU boolean onceRunned = false; int a = 0; HAMMER: while (true) { if (onceRunned) { fmap[j] = a; if ((j -= h) <= mj) { break HAMMER; } } else { onceRunned = true; } a = fmap[j - h]; int i1 = a + d; int i2 = vd; // following could be done in a loop, but // unrolled it for performance: if (block[i1 + 1] == block[i2 + 1]) { if (block[i1 + 2] == block[i2 + 2]) { if (block[i1 + 3] == block[i2 + 3]) { if (block[i1 + 4] == block[i2 + 4]) { if (block[i1 + 5] == block[i2 + 5]) { if (block[(i1 += 6)] == block[(i2 += 6)]) { int x = lastShadow; X: while (x > 0) { x -= 4; if (block[i1 + 1] == block[i2 + 1]) { if (quadrant[i1] == quadrant[i2]) { if (block[i1 + 2] == block[i2 + 2]) { if (quadrant[i1 + 1] == quadrant[i2 + 1]) { if (block[i1 + 3] == block[i2 + 3]) { if (quadrant[i1 + 2] == quadrant[i2 + 2]) { if (block[i1 + 4] == block[i2 + 4]) { if (quadrant[i1 + 3] == quadrant[i2 + 3]) { if ((i1 += 4) >= lastPlus1) { i1 -= lastPlus1; } if ((i2 += 4) >= lastPlus1) { i2 -= lastPlus1; } workDoneShadow++; continue X; } else if ((quadrant[i1 + 3] > quadrant[i2 + 3])) { continue HAMMER; } else { break HAMMER; } } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) { continue HAMMER; } else { break HAMMER; } } else if ((quadrant[i1 + 2] > quadrant[i2 + 2])) { continue HAMMER; } else { break HAMMER; } } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) { continue HAMMER; } else { break HAMMER; } } else if ((quadrant[i1 + 1] > quadrant[i2 + 1])) { continue HAMMER; } else { break HAMMER; } } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) { continue HAMMER; } else { break HAMMER; } } else if ((quadrant[i1] > quadrant[i2])) { continue HAMMER; } else { break HAMMER; } } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) { continue HAMMER; } else { break HAMMER; } } break HAMMER; } // while x > 0 else { if ((block[i1] & 0xff) > (block[i2] & 0xff)) { continue HAMMER; } else { break HAMMER; } } } else if ((block[i1 + 5] & 0xff) > (block[i2 + 5] & 0xff)) { continue HAMMER; } else { break HAMMER; } } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) { continue HAMMER; } else { break HAMMER; } } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) { continue HAMMER; } else { break HAMMER; } } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) { continue HAMMER; } else { break HAMMER; } } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) { continue HAMMER; } else { break HAMMER; } } // HAMMER // end inline mainGTU fmap[j] = v; } if (firstAttemptShadow && (i <= hi) && (workDoneShadow > workLimitShadow)) { break HP; } } } this.workDone = workDoneShadow; return firstAttemptShadow && (workDoneShadow > workLimitShadow); } /*-- LBZ2: The following is an implementation of an elegant 3-way quicksort for strings, described in a paper "Fast Algorithms for Sorting and Searching Strings", by Robert Sedgewick and Jon L. Bentley. --*/ private static void vswap(int[] fmap, int p1, int p2, int n) { n += p1; while (p1 < n) { int t = fmap[p1]; fmap[p1++] = fmap[p2]; fmap[p2++] = t; } } private static byte med3(byte a, byte b, byte c) { return (a < b) ? (b < c ? b : a < c ? c : a) : (b > c ? b : a > c ? c : a); } private static final int SMALL_THRESH = 20; private static final int DEPTH_THRESH = 10; private static final int WORK_FACTOR = 30; /** * Method "mainQSort3", file "blocksort.c", BZip2 1.0.2 */ private void mainQSort3(final BZip2CompressorOutputStream.Data dataShadow, final int loSt, final int hiSt, final int dSt, final int last) { final int[] stack_ll = this.stack_ll; final int[] stack_hh = this.stack_hh; final int[] stack_dd = this.stack_dd; final int[] fmap = dataShadow.fmap; final byte[] block = dataShadow.block; stack_ll[0] = loSt; stack_hh[0] = hiSt; stack_dd[0] = dSt; for (int sp = 1; --sp >= 0;) { final int lo = stack_ll[sp]; final int hi = stack_hh[sp]; final int d = stack_dd[sp]; if ((hi - lo < SMALL_THRESH) || (d > DEPTH_THRESH)) { if (mainSimpleSort(dataShadow, lo, hi, d, last)) { return; } } else { final int d1 = d + 1; final int med = med3(block[fmap[lo] + d1], block[fmap[hi] + d1], block[fmap[(lo + hi) >>> 1] + d1]) & 0xff; int unLo = lo; int unHi = hi; int ltLo = lo; int gtHi = hi; while (true) { while (unLo <= unHi) { final int n = (block[fmap[unLo] + d1] & 0xff) - med; if (n == 0) { final int temp = fmap[unLo]; fmap[unLo++] = fmap[ltLo]; fmap[ltLo++] = temp; } else if (n < 0) { unLo++; } else { break; } } while (unLo <= unHi) { final int n = (block[fmap[unHi] + d1] & 0xff) - med; if (n == 0) { final int temp = fmap[unHi]; fmap[unHi--] = fmap[gtHi]; fmap[gtHi--] = temp; } else if (n > 0) { unHi--; } else { break; } } if (unLo <= unHi) { final int temp = fmap[unLo]; fmap[unLo++] = fmap[unHi]; fmap[unHi--] = temp; } else { break; } } if (gtHi < ltLo) { stack_ll[sp] = lo; stack_hh[sp] = hi; stack_dd[sp] = d1; sp++; } else { int n = ((ltLo - lo) < (unLo - ltLo)) ? (ltLo - lo) : (unLo - ltLo); vswap(fmap, lo, unLo - n, n); int m = ((hi - gtHi) < (gtHi - unHi)) ? (hi - gtHi) : (gtHi - unHi); vswap(fmap, unLo, hi - m + 1, m); n = lo + unLo - ltLo - 1; m = hi - (gtHi - unHi) + 1; stack_ll[sp] = lo; stack_hh[sp] = n; stack_dd[sp] = d; sp++; stack_ll[sp] = n + 1; stack_hh[sp] = m - 1; stack_dd[sp] = d1; sp++; stack_ll[sp] = m; stack_hh[sp] = hi; stack_dd[sp] = d; sp++; } } } } private static final int SETMASK = (1 << 21); private static final int CLEARMASK = (~SETMASK); final void mainSort(final BZip2CompressorOutputStream.Data dataShadow, final int lastShadow) { final int[] runningOrder = this.mainSort_runningOrder; final int[] copy = this.mainSort_copy; final boolean[] bigDone = this.mainSort_bigDone; final int[] ftab = this.ftab; final byte[] block = dataShadow.block; final int[] fmap = dataShadow.fmap; final char[] quadrant = this.quadrant; final int workLimitShadow = this.workLimit; final boolean firstAttemptShadow = this.firstAttempt; // LBZ2: Set up the 2-byte frequency table for (int i = 65537; --i >= 0;) { ftab[i] = 0; } /* * In the various block-sized structures, live data runs from 0 to * last+NUM_OVERSHOOT_BYTES inclusive. First, set up the overshoot area * for block. */ for (int i = 0; i < BZip2Constants.NUM_OVERSHOOT_BYTES; i++) { block[lastShadow + i + 2] = block[(i % (lastShadow + 1)) + 1]; } for (int i = lastShadow + BZip2Constants.NUM_OVERSHOOT_BYTES +1; --i >= 0;) { quadrant[i] = 0; } block[0] = block[lastShadow + 1]; // LBZ2: Complete the initial radix sort: int c1 = block[0] & 0xff; for (int i = 0; i <= lastShadow; i++) { final int c2 = block[i + 1] & 0xff; ftab[(c1 << 8) + c2]++; c1 = c2; } for (int i = 1; i <= 65536; i++) { ftab[i] += ftab[i - 1]; } c1 = block[1] & 0xff; for (int i = 0; i < lastShadow; i++) { final int c2 = block[i + 2] & 0xff; fmap[--ftab[(c1 << 8) + c2]] = i; c1 = c2; } fmap[--ftab[((block[lastShadow + 1] & 0xff) << 8) + (block[1] & 0xff)]] = lastShadow; /* * LBZ2: Now ftab contains the first loc of every small bucket. Calculate the * running order, from smallest to largest big bucket. */ for (int i = 256; --i >= 0;) { bigDone[i] = false; runningOrder[i] = i; } for (int h = 364; h != 1;) { h /= 3; for (int i = h; i <= 255; i++) { final int vv = runningOrder[i]; final int a = ftab[(vv + 1) << 8] - ftab[vv << 8]; final int b = h - 1; int j = i; for (int ro = runningOrder[j - h]; (ftab[(ro + 1) << 8] - ftab[ro << 8]) > a; ro = runningOrder[j - h]) { runningOrder[j] = ro; j -= h; if (j <= b) { break; } } runningOrder[j] = vv; } } /* * LBZ2: The main sorting loop. */ for (int i = 0; i <= 255; i++) { /* * LBZ2: Process big buckets, starting with the least full. */ final int ss = runningOrder[i]; // Step 1: /* * LBZ2: Complete the big bucket [ss] by quicksorting any unsorted small * buckets [ss, j]. Hopefully previous pointer-scanning phases have * already completed many of the small buckets [ss, j], so we don't * have to sort them at all. */ for (int j = 0; j <= 255; j++) { final int sb = (ss << 8) + j; final int ftab_sb = ftab[sb]; if ((ftab_sb & SETMASK) != SETMASK) { final int lo = ftab_sb & CLEARMASK; final int hi = (ftab[sb + 1] & CLEARMASK) - 1; if (hi > lo) { mainQSort3(dataShadow, lo, hi, 2, lastShadow); if (firstAttemptShadow && (this.workDone > workLimitShadow)) { return; } } ftab[sb] = ftab_sb | SETMASK; } } // Step 2: // LBZ2: Now scan this big bucket so as to synthesise the // sorted order for small buckets [t, ss] for all t != ss. for (int j = 0; j <= 255; j++) { copy[j] = ftab[(j << 8) + ss] & CLEARMASK; } for (int j = ftab[ss << 8] & CLEARMASK, hj = (ftab[(ss + 1) << 8] & CLEARMASK); j < hj; j++) { final int fmap_j = fmap[j]; c1 = block[fmap_j] & 0xff; if (!bigDone[c1]) { fmap[copy[c1]] = (fmap_j == 0) ? lastShadow : (fmap_j - 1); copy[c1]++; } } for (int j = 256; --j >= 0;) { ftab[(j << 8) + ss] |= SETMASK; } // Step 3: /* * LBZ2: The ss big bucket is now done. Record this fact, and update the * quadrant descriptors. Remember to update quadrants in the * overshoot area too, if necessary. The "if (i < 255)" test merely * skips this updating for the last bucket processed, since updating * for the last bucket is pointless. */ bigDone[ss] = true; if (i < 255) { final int bbStart = ftab[ss << 8] & CLEARMASK; final int bbSize = (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart; int shifts = 0; while ((bbSize >> shifts) > 65534) { shifts++; } for (int j = 0; j < bbSize; j++) { final int a2update = fmap[bbStart + j]; final char qVal = (char) (j >> shifts); quadrant[a2update] = qVal; if (a2update < BZip2Constants.NUM_OVERSHOOT_BYTES) { quadrant[a2update + lastShadow + 1] = qVal; } } } } } } /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ /* * This package is based on the work done by Keiron Liddle, Aftex Software ** The compression requires large amounts of memory. Thus you should call the * {@link #close() close()} method as soon as possible, to force * {@code BZip2CompressorOutputStream} to release the allocated memory. *
* *You can shrink the amount of allocated memory and maybe raise * the compression speed by choosing a lower blocksize, which in turn * may cause a lower compression ratio. You can avoid unnecessary * memory allocation by avoiding using a blocksize which is bigger * than the size of the input.
* *You can compute the memory usage for compressing by the * following formula:
* ** <code>400k + (9 * blocksize)</code>. ** *
To get the memory required for decompression by {@link * BZip2CompressorInputStream} use
* ** <code>65k + (5 * blocksize)</code>. ** *
Memory usage by blocksize | *||
---|---|---|
Blocksize | Compression * memory usage | Decompression * memory usage |
*
100k | *1300k | *565k | *
200k | *2200k | *1065k | *
300k | *3100k | *1565k | *
400k | *4000k | *2065k | *
500k | *4900k | *2565k | *
600k | *5800k | *3065k | *
700k | *6700k | *3565k | *
800k | *7600k | *4065k | *
900k | *8500k | *4565k | *
* For decompression {@code BZip2CompressorInputStream} allocates less memory if the * bzipped input is smaller than one block. *
* ** Instances of this class are not threadsafe. *
* ** TODO: Update to BZip2 1.0.1 *
* @NotThreadSafe */ public class BZip2CompressorOutputStream extends CompressorOutputStream implements BZip2Constants { /** * The minimum supported blocksize {@code == 1}. */ public static final int MIN_BLOCKSIZE = 1; /** * The maximum supported blocksize {@code == 9}. */ public static final int MAX_BLOCKSIZE = 9; private static final int GREATER_ICOST = 15; private static final int LESSER_ICOST = 0; private static void hbMakeCodeLengths(final byte[] len, final int[] freq, final Data dat, final int alphaSize, final int maxLen) { /* * Nodes and heap entries run from 1. Entry 0 for both the heap and * nodes is a sentinel. */ final int[] heap = dat.heap; final int[] weight = dat.weight; final int[] parent = dat.parent; for (int i = alphaSize; --i >= 0;) { weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8; } for (boolean tooLong = true; tooLong;) { tooLong = false; int nNodes = alphaSize; int nHeap = 0; heap[0] = 0; weight[0] = 0; parent[0] = -2; for (int i = 1; i <= alphaSize; i++) { parent[i] = -1; nHeap++; heap[nHeap] = i; int zz = nHeap; int tmp = heap[zz]; while (weight[tmp] < weight[heap[zz >> 1]]) { heap[zz] = heap[zz >> 1]; zz >>= 1; } heap[zz] = tmp; } while (nHeap > 1) { int n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; int yy = 0; int zz = 1; int tmp = heap[1]; while (true) { yy = zz << 1; if (yy > nHeap) { break; } if ((yy < nHeap) && (weight[heap[yy + 1]] < weight[heap[yy]])) { yy++; } if (weight[tmp] < weight[heap[yy]]) { break; } heap[zz] = heap[yy]; zz = yy; } heap[zz] = tmp; int n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; yy = 0; zz = 1; tmp = heap[1]; while (true) { yy = zz << 1; if (yy > nHeap) { break; } if ((yy < nHeap) && (weight[heap[yy + 1]] < weight[heap[yy]])) { yy++; } if (weight[tmp] < weight[heap[yy]]) { break; } heap[zz] = heap[yy]; zz = yy; } heap[zz] = tmp; nNodes++; parent[n1] = parent[n2] = nNodes; final int weight_n1 = weight[n1]; final int weight_n2 = weight[n2]; weight[nNodes] = ((weight_n1 & 0xffffff00) + (weight_n2 & 0xffffff00)) | (1 + (((weight_n1 & 0x000000ff) > (weight_n2 & 0x000000ff)) ? (weight_n1 & 0x000000ff) : (weight_n2 & 0x000000ff))); parent[nNodes] = -1; nHeap++; heap[nHeap] = nNodes; tmp = 0; zz = nHeap; tmp = heap[zz]; final int weight_tmp = weight[tmp]; while (weight_tmp < weight[heap[zz >> 1]]) { heap[zz] = heap[zz >> 1]; zz >>= 1; } heap[zz] = tmp; } for (int i = 1; i <= alphaSize; i++) { int j = 0; int k = i; for (int parent_k; (parent_k = parent[k]) >= 0;) { k = parent_k; j++; } len[i - 1] = (byte) j; if (j > maxLen) { tooLong = true; } } if (tooLong) { for (int i = 1; i < alphaSize; i++) { int j = weight[i] >> 8; j = 1 + (j >> 1); weight[i] = j << 8; } } } } /** * Index of the last char in the block, so the block size == last + 1. */ private int last; /** * Always: in the range 0 .. 9. The current block size is 100000 * this * number. */ private final int blockSize100k; private int bsBuff; private int bsLive; private final CRC crc = new CRC(); private int nInUse; private int nMTF; private int currentChar = -1; private int runLength = 0; private int blockCRC; private int combinedCRC; private final int allowableBlockSize; /** * All memory intensive stuff. */ private Data data; private BlockSort blockSorter; private OutputStream out; /** * Chooses a blocksize based on the given length of the data to compress. * * @return The blocksize, between {@link #MIN_BLOCKSIZE} and * {@link #MAX_BLOCKSIZE} both inclusive. For a negative * {@code inputLength} this method returns {@code MAX_BLOCKSIZE} * always. * * @param inputLength * The length of the data which will be compressed by * {@code BZip2CompressorOutputStream}. */ public static int chooseBlockSize(long inputLength) { return (inputLength > 0) ? (int) Math .min((inputLength / 132000) + 1, 9) : MAX_BLOCKSIZE; } /** * Constructs a new {@code BZip2CompressorOutputStream} with a blocksize of 900k. * * @param out * the destination stream. * * @throws IOException * if an I/O error occurs in the specified stream. * @throws NullPointerException * ifout == null
.
*/
public BZip2CompressorOutputStream(final OutputStream out)
throws IOException {
this(out, MAX_BLOCKSIZE);
}
/**
* Constructs a new {@code BZip2CompressorOutputStream} with specified blocksize.
*
* @param out
* the destination stream.
* @param blockSize
* the blockSize as 100k units.
*
* @throws IOException
* if an I/O error occurs in the specified stream.
* @throws IllegalArgumentException
* if (blockSize < 1) || (blockSize > 9)
.
* @throws NullPointerException
* if out == null
.
*
* @see #MIN_BLOCKSIZE
* @see #MAX_BLOCKSIZE
*/
public BZip2CompressorOutputStream(final OutputStream out, final int blockSize) throws IOException {
if (blockSize < 1) {
throw new IllegalArgumentException("blockSize(" + blockSize + ") < 1");
}
if (blockSize > 9) {
throw new IllegalArgumentException("blockSize(" + blockSize + ") > 9");
}
this.blockSize100k = blockSize;
this.out = out;
/* 20 is just a paranoia constant */
this.allowableBlockSize = (this.blockSize100k * BZip2Constants.BASEBLOCKSIZE) - 20;
init();
}
@Override
public void write(final int b) throws IOException {
if (this.out != null) {
write0(b);
} else {
throw new IOException("closed");
}
}
/**
* Writes the current byte to the buffer, run-length encoding it
* if it has been repeated at least four times (the first step
* RLEs sequences of four identical bytes).
*
* Flushes the current block before writing data if it is * full.
* *"write to the buffer" means adding to data.buffer starting * two steps "after" this.last - initially starting at index 1 * (not 0) - and updating this.last to point to the last index * written minus 1.
*/ private void writeRun() throws IOException { final int lastShadow = this.last; if (lastShadow < this.allowableBlockSize) { final int currentCharShadow = this.currentChar; final Data dataShadow = this.data; dataShadow.inUse[currentCharShadow] = true; final byte ch = (byte) currentCharShadow; int runLengthShadow = this.runLength; this.crc.updateCRC(currentCharShadow, runLengthShadow); switch (runLengthShadow) { case 1: dataShadow.block[lastShadow + 2] = ch; this.last = lastShadow + 1; break; case 2: dataShadow.block[lastShadow + 2] = ch; dataShadow.block[lastShadow + 3] = ch; this.last = lastShadow + 2; break; case 3: { final byte[] block = dataShadow.block; block[lastShadow + 2] = ch; block[lastShadow + 3] = ch; block[lastShadow + 4] = ch; this.last = lastShadow + 3; } break; default: { runLengthShadow -= 4; dataShadow.inUse[runLengthShadow] = true; final byte[] block = dataShadow.block; block[lastShadow + 2] = ch; block[lastShadow + 3] = ch; block[lastShadow + 4] = ch; block[lastShadow + 5] = ch; block[lastShadow + 6] = (byte) runLengthShadow; this.last = lastShadow + 5; } break; } } else { endBlock(); initBlock(); writeRun(); } } /** * Overriden to close the stream. */ @Override protected void finalize() throws Throwable { finish(); super.finalize(); } public void finish() throws IOException { if (out != null) { try { if (this.runLength > 0) { writeRun(); } this.currentChar = -1; endBlock(); endCompression(); } finally { this.out = null; this.data = null; this.blockSorter = null; } } } @Override public void close() throws IOException { if (out != null) { OutputStream outShadow = this.out; finish(); outShadow.close(); } } @Override public void flush() throws IOException { OutputStream outShadow = this.out; if (outShadow != null) { outShadow.flush(); } } /** * Writes magic bytes like BZ on the first position of the stream * and bytes indiciating the file-format, which is * huffmanised, followed by a digit indicating blockSize100k. * @throws IOException if the magic bytes could not been written */ private void init() throws IOException { bsPutUByte('B'); bsPutUByte('Z'); this.data = new Data(this.blockSize100k); this.blockSorter = new BlockSort(this.data); // huffmanised magic bytes bsPutUByte('h'); bsPutUByte('0' + this.blockSize100k); this.combinedCRC = 0; initBlock(); } private void initBlock() { // blockNo++; this.crc.initialiseCRC(); this.last = -1; // ch = 0; boolean[] inUse = this.data.inUse; for (int i = 256; --i >= 0;) { inUse[i] = false; } } private void endBlock() throws IOException { this.blockCRC = this.crc.getFinalCRC(); this.combinedCRC = (this.combinedCRC << 1) | (this.combinedCRC >>> 31); this.combinedCRC ^= this.blockCRC; // empty block at end of file if (this.last == -1) { return; } /* sort the block and establish posn of original string */ blockSort(); /* * A 6-byte block header, the value chosen arbitrarily as 0x314159265359 * :-). A 32 bit value does not really give a strong enough guarantee * that the value will not appear by chance in the compressed * datastream. Worst-case probability of this event, for a 900k block, * is about 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 * bits. For a compressed file of size 100Gb -- about 100000 blocks -- * only a 48-bit marker will do. NB: normal compression/ decompression * donot rely on these statistical properties. They are only important * when trying to recover blocks from damaged files. */ bsPutUByte(0x31); bsPutUByte(0x41); bsPutUByte(0x59); bsPutUByte(0x26); bsPutUByte(0x53); bsPutUByte(0x59); /* Now the block's CRC, so it is in a known place. */ bsPutInt(this.blockCRC); /* Now a single bit indicating no randomisation. */ bsW(1, 0); /* Finally, block's contents proper. */ moveToFrontCodeAndSend(); } private void endCompression() throws IOException { /* * Now another magic 48-bit number, 0x177245385090, to indicate the end * of the last block. (sqrt(pi), if you want to know. I did want to use * e, but it contains too much repetition -- 27 18 28 18 28 46 -- for me * to feel statistically comfortable. Call me paranoid.) */ bsPutUByte(0x17); bsPutUByte(0x72); bsPutUByte(0x45); bsPutUByte(0x38); bsPutUByte(0x50); bsPutUByte(0x90); bsPutInt(this.combinedCRC); bsFinishedWithStream(); } /** * Returns the blocksize parameter specified at construction time. */ public final int getBlockSize() { return this.blockSize100k; } @Override public void write(final byte[] buf, int offs, final int len) throws IOException { if (offs < 0) { throw new IndexOutOfBoundsException("offs(" + offs + ") < 0."); } if (len < 0) { throw new IndexOutOfBoundsException("len(" + len + ") < 0."); } if (offs + len > buf.length) { throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" + len + ") > buf.length(" + buf.length + ")."); } if (this.out == null) { throw new IOException("stream closed"); } for (int hi = offs + len; offs < hi;) { write0(buf[offs++]); } } /** * Keeps track of the last bytes written and implicitly performs * run-length encoding as the first step of the bzip2 algorithm. */ private void write0(int b) throws IOException { if (this.currentChar != -1) { b &= 0xff; if (this.currentChar == b) { if (++this.runLength > 254) { writeRun(); this.currentChar = -1; this.runLength = 0; } // else nothing to do } else { writeRun(); this.runLength = 1; this.currentChar = b; } } else { this.currentChar = b & 0xff; this.runLength++; } } private static void hbAssignCodes(final int[] code, final byte[] length, final int minLen, final int maxLen, final int alphaSize) { int vec = 0; for (int n = minLen; n <= maxLen; n++) { for (int i = 0; i < alphaSize; i++) { if ((length[i] & 0xff) == n) { code[i] = vec; vec++; } } vec <<= 1; } } private void bsFinishedWithStream() throws IOException { while (this.bsLive > 0) { int ch = this.bsBuff >> 24; this.out.write(ch); // write 8-bit this.bsBuff <<= 8; this.bsLive -= 8; } } private void bsW(final int n, final int v) throws IOException { final OutputStream outShadow = this.out; int bsLiveShadow = this.bsLive; int bsBuffShadow = this.bsBuff; while (bsLiveShadow >= 8) { outShadow.write(bsBuffShadow >> 24); // write 8-bit bsBuffShadow <<= 8; bsLiveShadow -= 8; } this.bsBuff = bsBuffShadow | (v << (32 - bsLiveShadow - n)); this.bsLive = bsLiveShadow + n; } private void bsPutUByte(final int c) throws IOException { bsW(8, c); } private void bsPutInt(final int u) throws IOException { bsW(8, (u >> 24) & 0xff); bsW(8, (u >> 16) & 0xff); bsW(8, (u >> 8) & 0xff); bsW(8, u & 0xff); } private void sendMTFValues() throws IOException { final byte[][] len = this.data.sendMTFValues_len; final int alphaSize = this.nInUse + 2; for (int t = N_GROUPS; --t >= 0;) { byte[] len_t = len[t]; for (int v = alphaSize; --v >= 0;) { len_t[v] = GREATER_ICOST; } } /* Decide how many coding tables to use */ // assert (this.nMTF > 0) : this.nMTF; final int nGroups = (this.nMTF < 200) ? 2 : (this.nMTF < 600) ? 3 : (this.nMTF < 1200) ? 4 : (this.nMTF < 2400) ? 5 : 6; /* Generate an initial set of coding tables */ sendMTFValues0(nGroups, alphaSize); /* * Iterate up to N_ITERS times to improve the tables. */ final int nSelectors = sendMTFValues1(nGroups, alphaSize); /* Compute MTF values for the selectors. */ sendMTFValues2(nGroups, nSelectors); /* Assign actual codes for the tables. */ sendMTFValues3(nGroups, alphaSize); /* Transmit the mapping table. */ sendMTFValues4(); /* Now the selectors. */ sendMTFValues5(nGroups, nSelectors); /* Now the coding tables. */ sendMTFValues6(nGroups, alphaSize); /* And finally, the block data proper */ sendMTFValues7(); } private void sendMTFValues0(final int nGroups, final int alphaSize) { final byte[][] len = this.data.sendMTFValues_len; final int[] mtfFreq = this.data.mtfFreq; int remF = this.nMTF; int gs = 0; for (int nPart = nGroups; nPart > 0; nPart--) { final int tFreq = remF / nPart; int ge = gs - 1; int aFreq = 0; for (final int a = alphaSize - 1; (aFreq < tFreq) && (ge < a);) { aFreq += mtfFreq[++ge]; } if ((ge > gs) && (nPart != nGroups) && (nPart != 1) && (((nGroups - nPart) & 1) != 0)) { aFreq -= mtfFreq[ge--]; } final byte[] len_np = len[nPart - 1]; for (int v = alphaSize; --v >= 0;) { if ((v >= gs) && (v <= ge)) { len_np[v] = LESSER_ICOST; } else { len_np[v] = GREATER_ICOST; } } gs = ge + 1; remF -= aFreq; } } private int sendMTFValues1(final int nGroups, final int alphaSize) { final Data dataShadow = this.data; final int[][] rfreq = dataShadow.sendMTFValues_rfreq; final int[] fave = dataShadow.sendMTFValues_fave; final short[] cost = dataShadow.sendMTFValues_cost; final char[] sfmap = dataShadow.sfmap; final byte[] selector = dataShadow.selector; final byte[][] len = dataShadow.sendMTFValues_len; final byte[] len_0 = len[0]; final byte[] len_1 = len[1]; final byte[] len_2 = len[2]; final byte[] len_3 = len[3]; final byte[] len_4 = len[4]; final byte[] len_5 = len[5]; final int nMTFShadow = this.nMTF; int nSelectors = 0; for (int iter = 0; iter < N_ITERS; iter++) { for (int t = nGroups; --t >= 0;) { fave[t] = 0; int[] rfreqt = rfreq[t]; for (int i = alphaSize; --i >= 0;) { rfreqt[i] = 0; } } nSelectors = 0; for (int gs = 0; gs < this.nMTF;) { /* Set group start & end marks. */ /* * Calculate the cost of this group as coded by each of the * coding tables. */ final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); if (nGroups == N_GROUPS) { // unrolled version of the else-block short cost0 = 0; short cost1 = 0; short cost2 = 0; short cost3 = 0; short cost4 = 0; short cost5 = 0; for (int i = gs; i <= ge; i++) { final int icv = sfmap[i]; cost0 += len_0[icv] & 0xff; cost1 += len_1[icv] & 0xff; cost2 += len_2[icv] & 0xff; cost3 += len_3[icv] & 0xff; cost4 += len_4[icv] & 0xff; cost5 += len_5[icv] & 0xff; } cost[0] = cost0; cost[1] = cost1; cost[2] = cost2; cost[3] = cost3; cost[4] = cost4; cost[5] = cost5; } else { for (int t = nGroups; --t >= 0;) { cost[t] = 0; } for (int i = gs; i <= ge; i++) { final int icv = sfmap[i]; for (int t = nGroups; --t >= 0;) { cost[t] += len[t][icv] & 0xff; } } } /* * Find the coding table which is best for this group, and * record its identity in the selector table. */ int bt = -1; for (int t = nGroups, bc = 999999999; --t >= 0;) { final int cost_t = cost[t]; if (cost_t < bc) { bc = cost_t; bt = t; } } fave[bt]++; selector[nSelectors] = (byte) bt; nSelectors++; /* * Increment the symbol frequencies for the selected table. */ final int[] rfreq_bt = rfreq[bt]; for (int i = gs; i <= ge; i++) { rfreq_bt[sfmap[i]]++; } gs = ge + 1; } /* * Recompute the tables based on the accumulated frequencies. */ for (int t = 0; t < nGroups; t++) { hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20); } } return nSelectors; } private void sendMTFValues2(final int nGroups, final int nSelectors) { // assert (nGroups < 8) : nGroups; final Data dataShadow = this.data; byte[] pos = dataShadow.sendMTFValues2_pos; for (int i = nGroups; --i >= 0;) { pos[i] = (byte) i; } for (int i = 0; i < nSelectors; i++) { final byte ll_i = dataShadow.selector[i]; byte tmp = pos[0]; int j = 0; while (ll_i != tmp) { j++; byte tmp2 = tmp; tmp = pos[j]; pos[j] = tmp2; } pos[0] = tmp; dataShadow.selectorMtf[i] = (byte) j; } } private void sendMTFValues3(final int nGroups, final int alphaSize) { int[][] code = this.data.sendMTFValues_code; byte[][] len = this.data.sendMTFValues_len; for (int t = 0; t < nGroups; t++) { int minLen = 32; int maxLen = 0; final byte[] len_t = len[t]; for (int i = alphaSize; --i >= 0;) { final int l = len_t[i] & 0xff; if (l > maxLen) { maxLen = l; } if (l < minLen) { minLen = l; } } // assert (maxLen <= 20) : maxLen; // assert (minLen >= 1) : minLen; hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize); } } private void sendMTFValues4() throws IOException { final boolean[] inUse = this.data.inUse; final boolean[] inUse16 = this.data.sentMTFValues4_inUse16; for (int i = 16; --i >= 0;) { inUse16[i] = false; final int i16 = i * 16; for (int j = 16; --j >= 0;) { if (inUse[i16 + j]) { inUse16[i] = true; } } } for (int i = 0; i < 16; i++) { bsW(1, inUse16[i] ? 1 : 0); } final OutputStream outShadow = this.out; int bsLiveShadow = this.bsLive; int bsBuffShadow = this.bsBuff; for (int i = 0; i < 16; i++) { if (inUse16[i]) { final int i16 = i * 16; for (int j = 0; j < 16; j++) { // inlined: bsW(1, inUse[i16 + j] ? 1 : 0); while (bsLiveShadow >= 8) { outShadow.write(bsBuffShadow >> 24); // write 8-bit bsBuffShadow <<= 8; bsLiveShadow -= 8; } if (inUse[i16 + j]) { bsBuffShadow |= 1 << (32 - bsLiveShadow - 1); } bsLiveShadow++; } } } this.bsBuff = bsBuffShadow; this.bsLive = bsLiveShadow; } private void sendMTFValues5(final int nGroups, final int nSelectors) throws IOException { bsW(3, nGroups); bsW(15, nSelectors); final OutputStream outShadow = this.out; final byte[] selectorMtf = this.data.selectorMtf; int bsLiveShadow = this.bsLive; int bsBuffShadow = this.bsBuff; for (int i = 0; i < nSelectors; i++) { for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) { // inlined: bsW(1, 1); while (bsLiveShadow >= 8) { outShadow.write(bsBuffShadow >> 24); bsBuffShadow <<= 8; bsLiveShadow -= 8; } bsBuffShadow |= 1 << (32 - bsLiveShadow - 1); bsLiveShadow++; } // inlined: bsW(1, 0); while (bsLiveShadow >= 8) { outShadow.write(bsBuffShadow >> 24); bsBuffShadow <<= 8; bsLiveShadow -= 8; } // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); bsLiveShadow++; } this.bsBuff = bsBuffShadow; this.bsLive = bsLiveShadow; } private void sendMTFValues6(final int nGroups, final int alphaSize) throws IOException { final byte[][] len = this.data.sendMTFValues_len; final OutputStream outShadow = this.out; int bsLiveShadow = this.bsLive; int bsBuffShadow = this.bsBuff; for (int t = 0; t < nGroups; t++) { byte[] len_t = len[t]; int curr = len_t[0] & 0xff; // inlined: bsW(5, curr); while (bsLiveShadow >= 8) { outShadow.write(bsBuffShadow >> 24); // write 8-bit bsBuffShadow <<= 8; bsLiveShadow -= 8; } bsBuffShadow |= curr << (32 - bsLiveShadow - 5); bsLiveShadow += 5; for (int i = 0; i < alphaSize; i++) { int lti = len_t[i] & 0xff; while (curr < lti) { // inlined: bsW(2, 2); while (bsLiveShadow >= 8) { outShadow.write(bsBuffShadow >> 24); // write 8-bit bsBuffShadow <<= 8; bsLiveShadow -= 8; } bsBuffShadow |= 2 << (32 - bsLiveShadow - 2); bsLiveShadow += 2; curr++; /* 10 */ } while (curr > lti) { // inlined: bsW(2, 3); while (bsLiveShadow >= 8) { outShadow.write(bsBuffShadow >> 24); // write 8-bit bsBuffShadow <<= 8; bsLiveShadow -= 8; } bsBuffShadow |= 3 << (32 - bsLiveShadow - 2); bsLiveShadow += 2; curr--; /* 11 */ } // inlined: bsW(1, 0); while (bsLiveShadow >= 8) { outShadow.write(bsBuffShadow >> 24); // write 8-bit bsBuffShadow <<= 8; bsLiveShadow -= 8; } // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); bsLiveShadow++; } } this.bsBuff = bsBuffShadow; this.bsLive = bsLiveShadow; } private void sendMTFValues7() throws IOException { final Data dataShadow = this.data; final byte[][] len = dataShadow.sendMTFValues_len; final int[][] code = dataShadow.sendMTFValues_code; final OutputStream outShadow = this.out; final byte[] selector = dataShadow.selector; final char[] sfmap = dataShadow.sfmap; final int nMTFShadow = this.nMTF; int selCtr = 0; int bsLiveShadow = this.bsLive; int bsBuffShadow = this.bsBuff; for (int gs = 0; gs < nMTFShadow;) { final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); final int selector_selCtr = selector[selCtr] & 0xff; final int[] code_selCtr = code[selector_selCtr]; final byte[] len_selCtr = len[selector_selCtr]; while (gs <= ge) { final int sfmap_i = sfmap[gs]; // // inlined: bsW(len_selCtr[sfmap_i] & 0xff, // code_selCtr[sfmap_i]); // while (bsLiveShadow >= 8) { outShadow.write(bsBuffShadow >> 24); bsBuffShadow <<= 8; bsLiveShadow -= 8; } final int n = len_selCtr[sfmap_i] & 0xFF; bsBuffShadow |= code_selCtr[sfmap_i] << (32 - bsLiveShadow - n); bsLiveShadow += n; gs++; } gs = ge + 1; selCtr++; } this.bsBuff = bsBuffShadow; this.bsLive = bsLiveShadow; } private void moveToFrontCodeAndSend() throws IOException { bsW(24, this.data.origPtr); generateMTFValues(); sendMTFValues(); } private void blockSort() { blockSorter.blockSort(data, last); } /* * Performs Move-To-Front on the Burrows-Wheeler transformed * buffer, storing the MTFed data in data.sfmap in RUNA/RUNB * run-length-encoded form. * *Keeps track of byte frequencies in data.mtfFreq at the same time.
*/ private void generateMTFValues() { final int lastShadow = this.last; final Data dataShadow = this.data; final boolean[] inUse = dataShadow.inUse; final byte[] block = dataShadow.block; final int[] fmap = dataShadow.fmap; final char[] sfmap = dataShadow.sfmap; final int[] mtfFreq = dataShadow.mtfFreq; final byte[] unseqToSeq = dataShadow.unseqToSeq; final byte[] yy = dataShadow.generateMTFValues_yy; // make maps int nInUseShadow = 0; for (int i = 0; i < 256; i++) { if (inUse[i]) { unseqToSeq[i] = (byte) nInUseShadow; nInUseShadow++; } } this.nInUse = nInUseShadow; final int eob = nInUseShadow + 1; for (int i = eob; i >= 0; i--) { mtfFreq[i] = 0; } for (int i = nInUseShadow; --i >= 0;) { yy[i] = (byte) i; } int wr = 0; int zPend = 0; for (int i = 0; i <= lastShadow; i++) { final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff]; byte tmp = yy[0]; int j = 0; while (ll_i != tmp) { j++; byte tmp2 = tmp; tmp = yy[j]; yy[j] = tmp2; } yy[0] = tmp; if (j == 0) { zPend++; } else { if (zPend > 0) { zPend--; while (true) { if ((zPend & 1) == 0) { sfmap[wr] = RUNA; wr++; mtfFreq[RUNA]++; } else { sfmap[wr] = RUNB; wr++; mtfFreq[RUNB]++; } if (zPend >= 2) { zPend = (zPend - 2) >> 1; } else { break; } } zPend = 0; } sfmap[wr] = (char) (j + 1); wr++; mtfFreq[j + 1]++; } } if (zPend > 0) { zPend--; while (true) { if ((zPend & 1) == 0) { sfmap[wr] = RUNA; wr++; mtfFreq[RUNA]++; } else { sfmap[wr] = RUNB; wr++; mtfFreq[RUNB]++; } if (zPend >= 2) { zPend = (zPend - 2) >> 1; } else { break; } } } sfmap[wr] = (char) eob; mtfFreq[eob]++; this.nMTF = wr + 1; } static final class Data { // with blockSize 900k /* maps unsigned byte => "does it occur in block" */ final boolean[] inUse = new boolean[256]; // 256 byte final byte[] unseqToSeq = new byte[256]; // 256 byte final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte final byte[] generateMTFValues_yy = new byte[256]; // 256 byte final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548 // byte final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 // byte final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 // byte final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte // ------------ // 333408 byte /* holds the RLEd block of original data starting at index 1. * After sorting the last byte added to the buffer is at index * 0. */ final byte[] block; // 900021 byte /* maps index in Burrows-Wheeler transformed block => index of * byte in original block */ final int[] fmap; // 3600000 byte final char[] sfmap; // 3600000 byte // ------------ // 8433529 byte // ============ /** * Index of original line in Burrows-Wheeler table. * *This is the index in fmap that points to the last byte * of the original data.
*/ int origPtr; Data(int blockSize100k) { final int n = blockSize100k * BZip2Constants.BASEBLOCKSIZE; this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)]; this.fmap = new int[n]; this.sfmap = new char[2 * n]; } } } /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.commons.compress.compressors.bzip2; /** * Constants for both the compress and decompress BZip2 classes. */ interface BZip2Constants { int BASEBLOCKSIZE = 100000; int MAX_ALPHA_SIZE = 258; int MAX_CODE_LEN = 23; int RUNA = 0; int RUNB = 1; int N_GROUPS = 6; int G_SIZE = 50; int N_ITERS = 4; int MAX_SELECTORS = (2 + (900000 / G_SIZE)); int NUM_OVERSHOOT_BYTES = 20; }/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.commons.compress.compressors.bzip2; import java.util.LinkedHashMap; import java.util.Map; import org.apache.commons.compress.compressors.FileNameUtil; /** * Utility code for the BZip2 compression format. * @ThreadSafe * @since 1.1 */ public abstract class BZip2Utils { private static final FileNameUtil fileNameUtil; static { MapFactory to create Compressor[In|Out]putStreams from names. To add other * implementations you should extend CompressorStreamFactory and override the * appropriate methods (and call their implementation from super of course).
* * Example (Compressing a file): * ** final OutputStream out = new FileOutputStream(output); * CompressorOutputStream cos = * new CompressorStreamFactory().createCompressorOutputStream(CompressorStreamFactory.BZIP2, out); * IOUtils.copy(new FileInputStream(input), cos); * cos.close(); ** * Example (Decompressing a file): *
* final InputStream is = new FileInputStream(input); * CompressorInputStream in = * new CompressorStreamFactory().createCompressorInputStream(CompressorStreamFactory.BZIP2, is); * IOUtils.copy(in, new FileOutputStream(output)); * in.close(); ** @Immutable provided that the deprecated method setDecompressConcatenated is not used. * @ThreadSafe even if the deprecated method setDecompressConcatenated is used */ public class CompressorStreamFactory { /** * Constant (value {@value}) used to identify the BZIP2 compression algorithm. * @since 1.1 */ public static final String BZIP2 = "bzip2"; /** * Constant (value {@value}) used to identify the GZIP compression algorithm. * Not supported as an output stream type. * @since 1.1 */ public static final String GZIP = "gz"; /** * Constant (value {@value}) used to identify the PACK200 compression algorithm. * @since 1.3 */ public static final String PACK200 = "pack200"; /** * Constant (value {@value}) used to identify the XZ compression method. * @since 1.4 */ public static final String XZ = "xz"; /** * Constant (value {@value}) used to identify the LZMA compression method. * Not supported as an output stream type. * @since 1.6 */ public static final String LZMA = "lzma"; /** * Constant (value {@value}) used to identify the "framed" Snappy compression method. * Not supported as an output stream type. * @since 1.7 */ public static final String SNAPPY_FRAMED = "snappy-framed"; /** * Constant (value {@value}) used to identify the "raw" Snappy compression method. * Not supported as an output stream type. * @since 1.7 */ public static final String SNAPPY_RAW = "snappy-raw"; /** * Constant (value {@value}) used to identify the traditional Unix compress method. * Not supported as an output stream type. * @since 1.7 */ public static final String Z = "z"; /** * Constant (value {@value}) used to identify the Deflate compress method. * @since 1.9 */ public static final String DEFLATE = "deflate"; /** * If true, decompress until the end of the input. * If false, stop after the first stream and leave the * input position to point to the next byte after the stream */ private final Boolean decompressUntilEOF; // This is Boolean so setDecompressConcatenated can determine whether it has been set by the ctor // once the setDecompressConcatenated method has been removed, it can revert to boolean /** * If true, decompress until the end of the input. * If false, stop after the first stream and leave the * input position to point to the next byte after the stream */ private volatile boolean decompressConcatenated = false; /** * Create an instance with the decompress Concatenated option set to false. */ public CompressorStreamFactory() { this.decompressUntilEOF = null; } /** * Create an instance with the provided decompress Concatenated option. * @param decompressUntilEOF * if true, decompress until the end of the * input; if false, stop after the first * stream and leave the input position to point * to the next byte after the stream. * This setting applies to the gzip, bzip2 and xz formats only. * @since 1.10 */ public CompressorStreamFactory(boolean decompressUntilEOF) { this.decompressUntilEOF = Boolean.valueOf(decompressUntilEOF); // Also copy to existing variable so can continue to use that as the current value this.decompressConcatenated = decompressUntilEOF; } /** * Whether to decompress the full input or only the first stream * in formats supporting multiple concatenated input streams. * *
This setting applies to the gzip, bzip2 and xz formats only.
* * @param decompressConcatenated * if true, decompress until the end of the * input; if false, stop after the first * stream and leave the input position to point * to the next byte after the stream * @since 1.5 * @deprecated 1.10 use the {@link #CompressorStreamFactory(boolean)} constructor instead * @throws IllegalStateException if the constructor {@link #CompressorStreamFactory(boolean)} * was used to create the factory */ @Deprecated public void setDecompressConcatenated(boolean decompressConcatenated) { if (this.decompressUntilEOF != null) { throw new IllegalStateException("Cannot override the setting defined by the constructor"); } this.decompressConcatenated = decompressConcatenated; } /** * Create an compressor input stream from an input stream, autodetecting * the compressor type from the first few bytes of the stream. The InputStream * must support marks, like BufferedInputStream. * * @param in the input stream * @return the compressor input stream * @throws CompressorException if the compressor name is not known * @throws IllegalArgumentException if the stream is null or does not support mark * @since 1.1 */ public CompressorInputStream createCompressorInputStream(final InputStream in) throws CompressorException { if (in == null) { throw new IllegalArgumentException("Stream must not be null."); } if (!in.markSupported()) { throw new IllegalArgumentException("Mark is not supported."); } final byte[] signature = new byte[12]; in.mark(signature.length); try { int signatureLength = IOUtils.readFully(in, signature); in.reset(); if (BZip2CompressorInputStream.matches(signature, signatureLength)) { return new BZip2CompressorInputStream(in, decompressConcatenated); } if (GzipCompressorInputStream.matches(signature, signatureLength)) { return new GzipCompressorInputStream(in, decompressConcatenated); } if (Pack200CompressorInputStream.matches(signature, signatureLength)) { return new Pack200CompressorInputStream(in); } if (FramedSnappyCompressorInputStream.matches(signature, signatureLength)) { return new FramedSnappyCompressorInputStream(in); } if (ZCompressorInputStream.matches(signature, signatureLength)) { return new ZCompressorInputStream(in); } if (DeflateCompressorInputStream.matches(signature, signatureLength)) { return new DeflateCompressorInputStream(in); } if (XZUtils.matches(signature, signatureLength) && XZUtils.isXZCompressionAvailable()) { return new XZCompressorInputStream(in, decompressConcatenated); } if (LZMAUtils.matches(signature, signatureLength) && LZMAUtils.isLZMACompressionAvailable()) { return new LZMACompressorInputStream(in); } } catch (IOException e) { throw new CompressorException("Failed to detect Compressor from InputStream.", e); } throw new CompressorException("No Compressor found for the stream signature."); } /** * Create a compressor input stream from a compressor name and an input stream. * * @param name of the compressor, * i.e. {@value #GZIP}, {@value #BZIP2}, {@value #XZ}, {@value #LZMA}, * {@value #PACK200}, {@value #SNAPPY_RAW}, {@value #SNAPPY_FRAMED}, * {@value #Z} or {@value #DEFLATE} * @param in the input stream * @return compressor input stream * @throws CompressorException if the compressor name is not known * @throws IllegalArgumentException if the name or input stream is null */ public CompressorInputStream createCompressorInputStream(final String name, final InputStream in) throws CompressorException { if (name == null || in == null) { throw new IllegalArgumentException( "Compressor name and stream must not be null."); } try { if (GZIP.equalsIgnoreCase(name)) { return new GzipCompressorInputStream(in, decompressConcatenated); } if (BZIP2.equalsIgnoreCase(name)) { return new BZip2CompressorInputStream(in, decompressConcatenated); } if (XZ.equalsIgnoreCase(name)) { return new XZCompressorInputStream(in, decompressConcatenated); } if (LZMA.equalsIgnoreCase(name)) { return new LZMACompressorInputStream(in); } if (PACK200.equalsIgnoreCase(name)) { return new Pack200CompressorInputStream(in); } if (SNAPPY_RAW.equalsIgnoreCase(name)) { return new SnappyCompressorInputStream(in); } if (SNAPPY_FRAMED.equalsIgnoreCase(name)) { return new FramedSnappyCompressorInputStream(in); } if (Z.equalsIgnoreCase(name)) { return new ZCompressorInputStream(in); } if (DEFLATE.equalsIgnoreCase(name)) { return new DeflateCompressorInputStream(in); } } catch (IOException e) { throw new CompressorException( "Could not create CompressorInputStream.", e); } throw new CompressorException("Compressor: " + name + " not found."); } /** * Create an compressor output stream from an compressor name and an output stream. * * @param name the compressor name, * i.e. {@value #GZIP}, {@value #BZIP2}, {@value #XZ}, * {@value #PACK200} or {@value #DEFLATE} * @param out the output stream * @return the compressor output stream * @throws CompressorException if the archiver name is not known * @throws IllegalArgumentException if the archiver name or stream is null */ public CompressorOutputStream createCompressorOutputStream( final String name, final OutputStream out) throws CompressorException { if (name == null || out == null) { throw new IllegalArgumentException( "Compressor name and stream must not be null."); } try { if (GZIP.equalsIgnoreCase(name)) { return new GzipCompressorOutputStream(out); } if (BZIP2.equalsIgnoreCase(name)) { return new BZip2CompressorOutputStream(out); } if (XZ.equalsIgnoreCase(name)) { return new XZCompressorOutputStream(out); } if (PACK200.equalsIgnoreCase(name)) { return new Pack200CompressorOutputStream(out); } if (DEFLATE.equalsIgnoreCase(name)) { return new DeflateCompressorOutputStream(out); } } catch (IOException e) { throw new CompressorException( "Could not create CompressorOutputStream", e); } throw new CompressorException("Compressor: " + name + " not found."); } // For Unit tests boolean getDecompressConcatenated() { return decompressConcatenated; } } /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.commons.compress.compressors.bzip2; /** * A simple class the hold and calculate the CRC for sanity checking of the * data. * @NotThreadSafe */ class CRC { private static final int crc32Table[] = { 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005, 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd, 0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9, 0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75, 0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011, 0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd, 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039, 0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5, 0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81, 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d, 0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49, 0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95, 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1, 0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d, 0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae, 0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072, 0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16, 0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca, 0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde, 0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02, 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066, 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba, 0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e, 0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692, 0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6, 0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a, 0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e, 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2, 0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686, 0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a, 0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637, 0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb, 0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f, 0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53, 0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47, 0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b, 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff, 0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623, 0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7, 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b, 0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f, 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3, 0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7, 0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b, 0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f, 0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3, 0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640, 0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c, 0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8, 0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24, 0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30, 0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec, 0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088, 0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654, 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0, 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c, 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18, 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4, 0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0, 0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c, 0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668, 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4 }; CRC() { initialiseCRC(); } void initialiseCRC() { globalCrc = 0xffffffff; } int getFinalCRC() { return ~globalCrc; } int getGlobalCRC() { return globalCrc; } void setGlobalCRC(int newCrc) { globalCrc = newCrc; } void updateCRC(int inCh) { int temp = (globalCrc >> 24) ^ inCh; if (temp < 0) { temp = 256 + temp; } globalCrc = (globalCrc << 8) ^ CRC.crc32Table[temp]; } void updateCRC(int inCh, int repeat) { int globalCrcShadow = this.globalCrc; while (repeat-- > 0) { int temp = (globalCrcShadow >> 24) ^ inCh; globalCrcShadow = (globalCrcShadow << 8) ^ crc32Table[(temp >= 0) ? temp : (temp + 256)]; } this.globalCrc = globalCrcShadow; } private int globalCrc; }/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.commons.compress.compressors.deflate; import java.io.IOException; import java.io.InputStream; import java.util.zip.Inflater; import java.util.zip.InflaterInputStream; import org.apache.commons.compress.compressors.CompressorInputStream; /** * Deflate decompressor. * @since 1.9 */ public class DeflateCompressorInputStream extends CompressorInputStream { private static final int MAGIC_1 = 0x78; private static final int MAGIC_2a = 0x01; private static final int MAGIC_2b = 0x5e; private static final int MAGIC_2c = 0x9c; private static final int MAGIC_2d = 0xda; private final InputStream in; /** * Creates a new input stream that decompresses Deflate-compressed data * from the specified input stream. * * @param inputStream where to read the compressed data * */ public DeflateCompressorInputStream(InputStream inputStream) { this(inputStream, new DeflateParameters()); } /** * Creates a new input stream that decompresses Deflate-compressed data * from the specified input stream. * * @param inputStream where to read the compressed data * @param parameters parameters */ public DeflateCompressorInputStream(InputStream inputStream, DeflateParameters parameters) { in = new InflaterInputStream(inputStream, new Inflater(!parameters.withZlibHeader())); } /** {@inheritDoc} */ @Override public int read() throws IOException { int ret = in.read(); count(ret == -1 ? 0 : 1); return ret; } /** {@inheritDoc} */ @Override public int read(byte[] buf, int off, int len) throws IOException { int ret = in.read(buf, off, len); count(ret); return ret; } /** {@inheritDoc} */ @Override public long skip(long n) throws IOException { return in.skip(n); } /** {@inheritDoc} */ @Override public int available() throws IOException { return in.available(); } /** {@inheritDoc} */ @Override public void close() throws IOException { in.close(); } /** * Checks if the signature matches what is expected for a zlib / deflated file * with the zlib header. * * @param signature * the bytes to check * @param length * the number of bytes to check * @return true, if this stream is zlib / deflate compressed with a header * stream, false otherwise * * @since 1.10 */ public static boolean matches(byte[] signature, int length) { return length > 3 && signature[0] == MAGIC_1 && ( signature[1] == (byte) MAGIC_2a || signature[1] == (byte) MAGIC_2b || signature[1] == (byte) MAGIC_2c || signature[1] == (byte) MAGIC_2d); } } /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.commons.compress.compressors.deflate; import java.io.IOException; import java.io.OutputStream; import java.util.zip.Deflater; import java.util.zip.DeflaterOutputStream; import org.apache.commons.compress.compressors.CompressorOutputStream; /** * Deflate compressor. * @since 1.9 */ public class DeflateCompressorOutputStream extends CompressorOutputStream { private final DeflaterOutputStream out; /** * Creates a Deflate compressed output stream with the default parameters. * @param outputStream the stream to wrap * @throws IOException on error */ public DeflateCompressorOutputStream(OutputStream outputStream) throws IOException { this(outputStream, new DeflateParameters()); } /** * Creates a Deflate compressed output stream with the specified parameters. * @param outputStream the stream to wrap * @param parameters the deflate parameters to apply * @throws IOException on error */ public DeflateCompressorOutputStream(OutputStream outputStream, DeflateParameters parameters) throws IOException { this.out = new DeflaterOutputStream(outputStream, new Deflater(parameters.getCompressionLevel(), !parameters.withZlibHeader())); } @Override public void write(int b) throws IOException { out.write(b); } @Override public void write(byte[] buf, int off, int len) throws IOException { out.write(buf, off, len); } /** * Flushes the encoder and callsoutputStream.flush()
.
* All buffered pending data will then be decompressible from
* the output stream. Calling this function very often may increase
* the compressed file size a lot.
*/
@Override
public void flush() throws IOException {
out.flush();
}
/**
* Finishes compression without closing the underlying stream.
* No more data can be written to this stream after finishing.
*/
public void finish() throws IOException {
out.finish();
}
@Override
public void close() throws IOException {
out.close();
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.commons.compress.compressors.deflate;
import java.util.zip.Deflater;
/**
* Parameters for the Deflate compressor.
* @since 1.9
*/
public class DeflateParameters {
private boolean zlibHeader = true;
private int compressionLevel = Deflater.DEFAULT_COMPRESSION;
/**
* Whether or not the zlib header shall be written (when
* compressing) or expected (when decompressing).
*/
public boolean withZlibHeader() {
return zlibHeader;
}
/**
* Sets the zlib header presence parameter.
*
* This affects whether or not the zlib header will be written * (when compressing) or expected (when decompressing).
* * @param zlibHeader */ public void setWithZlibHeader(boolean zlibHeader) { this.zlibHeader = zlibHeader; } /** * The compression level. * @see #setCompressionLevel */ public int getCompressionLevel() { return compressionLevel; } /** * Sets the compression level. * * @param compressionLevel the compression level (between 0 and 9) * @see Deflater#NO_COMPRESSION * @see Deflater#BEST_SPEED * @see Deflater#DEFAULT_COMPRESSION * @see Deflater#BEST_COMPRESSION */ public void setCompressionLevel(int compressionLevel) { if (compressionLevel < -1 || compressionLevel > 9) { throw new IllegalArgumentException("Invalid Deflate compression level: " + compressionLevel); } this.compressionLevel = compressionLevel; } } /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.commons.compress.compressors; import java.util.Collections; import java.util.HashMap; import java.util.Locale; import java.util.Map; /** * File name mapping code for the compression formats. * @ThreadSafe * @since 1.4 */ public class FileNameUtil { /** * Map from common filename suffixes to the suffixes that identify compressed * versions of those file types. For example: from ".tar" to ".tgz". */ private final Map
* This map also contains format-specific suffixes like ".gz" and "-z".
* These suffixes are mapped to the empty string, as they should simply
* be removed from the filename when the file is uncompressed.
*/
private final Map Based on the "spec" in the version "Last revised: 2013-10-25" .sz files start with a chunk with tag 0xff and content sNaPpY.
* {@link java.util.zip.GZIPInputStream} doesn't decompress concatenated .gz
* files: it stops after the first member and silently ignores the rest.
* It doesn't leave the read position to point to the beginning of the next
* member, which makes it difficult workaround the lack of concatenation
* support.
*
* Instead of using
* This is equivalent to
*
* If This defaults to {@code false} in an OSGi environment and {@code true} otherwise. Generic LZW implementation. It is used internally for
* the Z decompressor and the Unshrinking Zip file compression method,
* but may be useful for third-party projects in implementing their own LZW variations. The {@link CompressorInputStream#getCount getCount} and {@link
* CompressorInputStream#getBytesRead getBytesRead} methods always
* return 0. When reading from a file the File-arg constructor may
* provide better performance. When reading from a file the File-arg constructor may
* provide better performance. When reading from a file the File-arg constructor may
* provide better performance. When reading from a file the File-arg constructor may
* provide better performance. As stated in Pack200.Packer's
* javadocs applying a Pack200 compression to a JAR archive will
* in general make its sigantures invalid. In order to prepare a
* JAR for signing it should be "normalized" by packing and
* unpacking it. This is what this method does. Note this methods implicitly sets the segment length to
* -1. As stated in Pack200.Packer's
* javadocs applying a Pack200 compression to a JAR archive will
* in general make its sigantures invalid. In order to prepare a
* JAR for signing it should be "normalized" by packing and
* unpacking it. This is what this method does. As stated in Pack200.Packer's
* javadocs applying a Pack200 compression to a JAR archive will
* in general make its sigantures invalid. In order to prepare a
* JAR for signing it should be "normalized" by packing and
* unpacking it. This is what this method does. This method does not replace the existing archive but creates
* a new one. Note this methods implicitly sets the segment length to
* -1. As stated in Pack200.Packer's
* javadocs applying a Pack200 compression to a JAR archive will
* in general make its sigantures invalid. In order to prepare a
* JAR for signing it should be "normalized" by packing and
* unpacking it. This is what this method does. This method does not replace the existing archive but creates
* a new one. This file is a copy of the implementation at the Apache Hadoop project. This implementation uses an internal buffer in order to handle
* the back-references that are at the heart of the LZ77 algorithm.
* The size of the buffer must be at least as big as the biggest
* offset used in the compressed stream. The current version of the
* Snappy algorithm as defined by Google works on 32k blocks and
* doesn't contain offsets bigger than 32k which is the default block
* size used by this class. Move all bytes of the buffer after the first block down to
* the beginning of the buffer.
* The presets 0-3 are fast presets with medium compression.
* The presets 4-6 are fairly slow presets with high compression.
* The default preset is 6.
*
* The presets 7-9 are like the preset 6 but use bigger dictionaries
* and have higher compressor and decompressor memory requirements.
* Unless the uncompressed size of the file exceeds 8 MiB,
* 16 MiB, or 32 MiB, it is waste of memory to use the
* presets 7, 8, or 9, respectively.
* @param outputStream the stream to wrap
* @param preset the preset
* @throws IOException on error
*/
public XZCompressorOutputStream(OutputStream outputStream, int preset)
throws IOException {
out = new XZOutputStream(outputStream, new LZMA2Options(preset));
}
@Override
public void write(int b) throws IOException {
out.write(b);
}
@Override
public void write(byte[] buf, int off, int len) throws IOException {
out.write(buf, off, len);
}
/**
* Flushes the encoder and calls This is a copy of {@code org.tukaani.xz.XZ.HEADER_MAGIC} in
* XZ for Java version 1.5. This is more or less a copy of the version found in {@link
* XZCompressorInputStream} but doesn't depend on the presence of
* XZ for Java. This defaults to {@code false} in an OSGi environment and {@code true} otherwise. This method is only protected for technical reasons
* and is not part of Commons Compress' published API. It may
* change or disappear without warning. This method is only protected for technical reasons
* and is not part of Commons Compress' published API. It may
* change or disappear without warning. This method is only protected for technical reasons
* and is not part of Commons Compress' published API. It may
* change or disappear without warning. Provides stream classes for compressing and decompressing
streams using the BZip2 algorithm. Provides a stream classes that allow (de)compressing streams
using the DEFLATE algorithm. Provides stream classes for compressing and decompressing
streams using the GZip algorithm. The classes in this package are wrappers around {@link
java.util.zip.GZIPInputStream java.util.zip.GZIPInputStream} and
{@link java.util.zip.GZIPOutputStream
java.util.zip.GZIPOutputStream}. Provides a stream class decompressing streams using the
"stand-alone" LZMA algorithm. The class in this package is a wrapper around {@link
org.tukaani.xz.LZMAInputStream org.tukaani.xz.LZMAInputStream}
and provided by the public
domain XZ for Java
library. In general you should prefer the more modern and robust XZ
format over stand-alone LZMA compression. Generic LZW implementation. Provides a unified API and factories for dealing with
compressed streams. Provides stream classes for compressing and decompressing
streams using the Pack200 algorithm used to compress Java
archives. The streams of this package only work on JAR archives, i.e. a
{@link
org.apache.commons.compress.compressors.pack200.Pack200CompressorOutputStream
Pack200CompressorOutputStream} expects to be wrapped around a
stream that a valid JAR archive will be written to and a {@link
org.apache.commons.compress.compressors.pack200.Pack200CompressorInputStream
Pack200CompressorInputStream} provides a stream to read from a
JAR archive. JAR archives compressed with Pack200 will in general be
different from the original archive when decompressed again.
For details see
the API
documentation of Pack200. The streams of this package work on non-deflated streams,
i.e. archives like those created with the The Pack200 API provided by the Java class library doesn't lend
itself to real stream
processing. Two different caching modes are available - "in memory", which
is the default, and "temporary file". By default data is cached
in memory but you should switch to the temporary file option if
your archives are really big. Given there always is an intermediate result
the During development of the initial version several attempts have
been made to use a real streaming API based for example
on Provides stream classes for decompressing streams using the
Snappy
algorithm. The raw Snappy format which only contains the compressed data
is supported by the Only the "framing format" can be auto-detected this means you
have to speficy the format explicitly if you want to read a
"raw" Snappy stream
via Provides stream classes for compressing and decompressing
streams using the XZ algorithm. The classes in this package are wrappers around {@link
org.tukaani.xz.XZInputStream org.tukaani.xz.XZInputStream} and
{@link org.tukaani.xz.XZOutputStream
org.tukaani.xz.XZOutputStream} provided by the public
domain XZ for Java
library. Provides stream classes for decompressing
streams using the "compress" algorithm used to write .Z files.GZIPInputStream
, this class has its own .gz
* container format decoder. The actual decompression is done with
* {@link java.util.zip.Inflater}.
*/
public class GzipCompressorInputStream extends CompressorInputStream {
// Header flags
// private static final int FTEXT = 0x01; // Uninteresting for us
private static final int FHCRC = 0x02;
private static final int FEXTRA = 0x04;
private static final int FNAME = 0x08;
private static final int FCOMMENT = 0x10;
private static final int FRESERVED = 0xE0;
// Compressed input stream, possibly wrapped in a BufferedInputStream
private final InputStream in;
// True if decompressing multimember streams.
private final boolean decompressConcatenated;
// Buffer to hold the input data
private final byte[] buf = new byte[8192];
// Amount of data in buf.
private int bufUsed = 0;
// Decompressor
private Inflater inf = new Inflater(true);
// CRC32 from uncompressed data
private final CRC32 crc = new CRC32();
// True once everything has been decompressed
private boolean endReached = false;
// used in no-arg read method
private final byte[] oneByte = new byte[1];
private final GzipParameters parameters = new GzipParameters();
/**
* Constructs a new input stream that decompresses gzip-compressed data
* from the specified input stream.
* GzipCompressorInputStream(inputStream, false)
and thus
* will not decompress concatenated .gz files.
*
* @param inputStream the InputStream from which this object should
* be created of
*
* @throws IOException if the stream could not be created
*/
public GzipCompressorInputStream(InputStream inputStream)
throws IOException {
this(inputStream, false);
}
/**
* Constructs a new input stream that decompresses gzip-compressed data
* from the specified input stream.
* decompressConcatenated
is {@code false}:
* This decompressor might read more input than it will actually use.
* If inputStream
supports mark
and
* reset
, then the input position will be adjusted
* so that it is right after the last byte of the compressed stream.
* If mark
isn't supported, the input position will be
* undefined.
*
* @param inputStream the InputStream from which this object should
* be created of
* @param decompressConcatenated
* if true, decompress until the end of the input;
* if false, stop after the first .gz member
*
* @throws IOException if the stream could not be created
*/
public GzipCompressorInputStream(InputStream inputStream,
boolean decompressConcatenated)
throws IOException {
// Mark support is strictly needed for concatenated files only,
// but it's simpler if it is always available.
if (inputStream.markSupported()) {
in = inputStream;
} else {
in = new BufferedInputStream(inputStream);
}
this.decompressConcatenated = decompressConcatenated;
init(true);
}
/**
* Provides the stream's meta data - may change with each stream
* when decompressing concatenated streams.
* @return the stream's meta data
* @since 1.8
*/
public GzipParameters getMetaData() {
return parameters;
}
private boolean init(boolean isFirstMember) throws IOException {
assert isFirstMember || decompressConcatenated;
// Check the magic bytes without a possibility of EOFException.
int magic0 = in.read();
int magic1 = in.read();
// If end of input was reached after decompressing at least
// one .gz member, we have reached the end of the file successfully.
if (magic0 == -1 && !isFirstMember) {
return false;
}
if (magic0 != 31 || magic1 != 139) {
throw new IOException(isFirstMember
? "Input is not in the .gz format"
: "Garbage after a valid .gz stream");
}
// Parsing the rest of the header may throw EOFException.
DataInputStream inData = new DataInputStream(in);
int method = inData.readUnsignedByte();
if (method != Deflater.DEFLATED) {
throw new IOException("Unsupported compression method "
+ method + " in the .gz header");
}
int flg = inData.readUnsignedByte();
if ((flg & FRESERVED) != 0) {
throw new IOException(
"Reserved flags are set in the .gz header");
}
parameters.setModificationTime(readLittleEndianInt(inData) * 1000);
switch (inData.readUnsignedByte()) { // extra flags
case 2:
parameters.setCompressionLevel(Deflater.BEST_COMPRESSION);
break;
case 4:
parameters.setCompressionLevel(Deflater.BEST_SPEED);
break;
default:
// ignored for now
break;
}
parameters.setOperatingSystem(inData.readUnsignedByte());
// Extra field, ignored
if ((flg & FEXTRA) != 0) {
int xlen = inData.readUnsignedByte();
xlen |= inData.readUnsignedByte() << 8;
// This isn't as efficient as calling in.skip would be,
// but it's lazier to handle unexpected end of input this way.
// Most files don't have an extra field anyway.
while (xlen-- > 0) {
inData.readUnsignedByte();
}
}
// Original file name
if ((flg & FNAME) != 0) {
parameters.setFilename(new String(readToNull(inData),
CharsetNames.ISO_8859_1));
}
// Comment
if ((flg & FCOMMENT) != 0) {
parameters.setComment(new String(readToNull(inData),
CharsetNames.ISO_8859_1));
}
// Header "CRC16" which is actually a truncated CRC32 (which isn't
// as good as real CRC16). I don't know if any encoder implementation
// sets this, so it's not worth trying to verify it. GNU gzip 1.4
// doesn't support this field, but zlib seems to be able to at least
// skip over it.
if ((flg & FHCRC) != 0) {
inData.readShort();
}
// Reset
inf.reset();
crc.reset();
return true;
}
private byte[] readToNull(DataInputStream inData) throws IOException {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
int b = 0;
while ((b = inData.readUnsignedByte()) != 0x00) { // NOPMD
bos.write(b);
}
return bos.toByteArray();
}
private long readLittleEndianInt(DataInputStream inData) throws IOException {
return inData.readUnsignedByte()
| (inData.readUnsignedByte() << 8)
| (inData.readUnsignedByte() << 16)
| (((long) inData.readUnsignedByte()) << 24);
}
@Override
public int read() throws IOException {
return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF;
}
/**
* {@inheritDoc}
*
* @since 1.1
*/
@Override
public int read(byte[] b, int off, int len) throws IOException {
if (endReached) {
return -1;
}
int size = 0;
while (len > 0) {
if (inf.needsInput()) {
// Remember the current position because we may need to
// rewind after reading too much input.
in.mark(buf.length);
bufUsed = in.read(buf);
if (bufUsed == -1) {
throw new EOFException();
}
inf.setInput(buf, 0, bufUsed);
}
int ret;
try {
ret = inf.inflate(b, off, len);
} catch (DataFormatException e) {
throw new IOException("Gzip-compressed data is corrupt");
}
crc.update(b, off, ret);
off += ret;
len -= ret;
size += ret;
count(ret);
if (inf.finished()) {
// We may have read too many bytes. Rewind the read
// position to match the actual amount used.
//
// NOTE: The "if" is there just in case. Since we used
// in.mark earler, it should always skip enough.
in.reset();
int skipAmount = bufUsed - inf.getRemaining();
if (in.skip(skipAmount) != skipAmount) {
throw new IOException();
}
bufUsed = 0;
DataInputStream inData = new DataInputStream(in);
// CRC32
long crcStored = readLittleEndianInt(inData);
if (crcStored != crc.getValue()) {
throw new IOException("Gzip-compressed data is corrupt "
+ "(CRC32 error)");
}
// Uncompressed size modulo 2^32 (ISIZE in the spec)
long isize = readLittleEndianInt(inData);
if (isize != (inf.getBytesWritten() & 0xffffffffl)) {
throw new IOException("Gzip-compressed data is corrupt"
+ "(uncompressed size mismatch)");
}
// See if this is the end of the file.
if (!decompressConcatenated || !init(false)) {
inf.end();
inf = null;
endReached = true;
return size == 0 ? -1 : size;
}
}
}
return size;
}
/**
* Checks if the signature matches what is expected for a .gz file.
*
* @param signature the bytes to check
* @param length the number of bytes to check
* @return true if this is a .gz stream, false otherwise
*
* @since 1.1
*/
public static boolean matches(byte[] signature, int length) {
if (length < 2) {
return false;
}
if (signature[0] != 31) {
return false;
}
if (signature[1] != -117) {
return false;
}
return true;
}
/**
* Closes the input stream (unless it is System.in).
*
* @since 1.2
*/
@Override
public void close() throws IOException {
if (inf != null) {
inf.end();
inf = null;
}
if (this.in != System.in) {
this.in.close();
}
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.commons.compress.compressors.gzip;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.zip.CRC32;
import java.util.zip.Deflater;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import org.apache.commons.compress.compressors.CompressorOutputStream;
import org.apache.commons.compress.utils.CharsetNames;
/**
* Compressed output stream using the gzip format. This implementation improves
* over the standard {@link GZIPOutputStream} class by allowing
* the configuration of the compression level and the header metadata (filename,
* comment, modification time, operating system and extra flags).
*
* @see GZIP File Format Specification
*/
public class GzipCompressorOutputStream extends CompressorOutputStream {
/** Header flag indicating a file name follows the header */
private static final int FNAME = 1 << 3;
/** Header flag indicating a comment follows the header */
private static final int FCOMMENT = 1 << 4;
/** The underlying stream */
private final OutputStream out;
/** Deflater used to compress the data */
private final Deflater deflater;
/** The buffer receiving the compressed data from the deflater */
private final byte[] deflateBuffer = new byte[512];
/** Indicates if the stream has been closed */
private boolean closed;
/** The checksum of the uncompressed data */
private final CRC32 crc = new CRC32();
/**
* Creates a gzip compressed output stream with the default parameters.
*/
public GzipCompressorOutputStream(OutputStream out) throws IOException {
this(out, new GzipParameters());
}
/**
* Creates a gzip compressed output stream with the specified parameters.
*
* @since 1.7
*/
public GzipCompressorOutputStream(OutputStream out, GzipParameters parameters) throws IOException {
this.out = out;
this.deflater = new Deflater(parameters.getCompressionLevel(), true);
writeHeader(parameters);
}
private void writeHeader(GzipParameters parameters) throws IOException {
String filename = parameters.getFilename();
String comment = parameters.getComment();
ByteBuffer buffer = ByteBuffer.allocate(10);
buffer.order(ByteOrder.LITTLE_ENDIAN);
buffer.putShort((short) GZIPInputStream.GZIP_MAGIC);
buffer.put((byte) Deflater.DEFLATED); // compression method (8: deflate)
buffer.put((byte) ((filename != null ? FNAME : 0) | (comment != null ? FCOMMENT : 0))); // flags
buffer.putInt((int) (parameters.getModificationTime() / 1000));
// extra flags
int compressionLevel = parameters.getCompressionLevel();
if (compressionLevel == Deflater.BEST_COMPRESSION) {
buffer.put((byte) 2);
} else if (compressionLevel == Deflater.BEST_SPEED) {
buffer.put((byte) 4);
} else {
buffer.put((byte) 0);
}
buffer.put((byte) parameters.getOperatingSystem());
out.write(buffer.array());
if (filename != null) {
out.write(filename.getBytes(CharsetNames.ISO_8859_1));
out.write(0);
}
if (comment != null) {
out.write(comment.getBytes(CharsetNames.ISO_8859_1));
out.write(0);
}
}
private void writeTrailer() throws IOException {
ByteBuffer buffer = ByteBuffer.allocate(8);
buffer.order(ByteOrder.LITTLE_ENDIAN);
buffer.putInt((int) crc.getValue());
buffer.putInt(deflater.getTotalIn());
out.write(buffer.array());
}
@Override
public void write(int b) throws IOException {
write(new byte[]{(byte) (b & 0xff)}, 0, 1);
}
/**
* {@inheritDoc}
*
* @since 1.1
*/
@Override
public void write(byte[] buffer) throws IOException {
write(buffer, 0, buffer.length);
}
/**
* {@inheritDoc}
*
* @since 1.1
*/
@Override
public void write(byte[] buffer, int offset, int length) throws IOException {
if (deflater.finished()) {
throw new IOException("Cannot write more data, the end of the compressed data stream has been reached");
} else if (length > 0) {
deflater.setInput(buffer, offset, length);
while (!deflater.needsInput()) {
deflate();
}
crc.update(buffer, offset, length);
}
}
private void deflate() throws IOException {
int length = deflater.deflate(deflateBuffer, 0, deflateBuffer.length);
if (length > 0) {
out.write(deflateBuffer, 0, length);
}
}
/**
* Finishes writing compressed data to the underlying stream without closing it.
*
* @since 1.7
*/
public void finish() throws IOException {
if (!deflater.finished()) {
deflater.finish();
while (!deflater.finished()) {
deflate();
}
writeTrailer();
}
}
/**
* {@inheritDoc}
*
* @since 1.7
*/
@Override
public void flush() throws IOException {
out.flush();
}
@Override
public void close() throws IOException {
if (!closed) {
finish();
deflater.end();
out.close();
closed = true;
}
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.commons.compress.compressors.gzip;
import java.util.zip.Deflater;
/**
* Parameters for the GZIP compressor.
*
* @since 1.7
*/
public class GzipParameters {
private int compressionLevel = Deflater.DEFAULT_COMPRESSION;
private long modificationTime;
private String filename;
private String comment;
private int operatingSystem = 255; // Unknown OS by default
public int getCompressionLevel() {
return compressionLevel;
}
/**
* Sets the compression level.
*
* @param compressionLevel the compression level (between 0 and 9)
* @see Deflater#NO_COMPRESSION
* @see Deflater#BEST_SPEED
* @see Deflater#DEFAULT_COMPRESSION
* @see Deflater#BEST_COMPRESSION
*/
public void setCompressionLevel(int compressionLevel) {
if (compressionLevel < -1 || compressionLevel > 9) {
throw new IllegalArgumentException("Invalid gzip compression level: " + compressionLevel);
}
this.compressionLevel = compressionLevel;
}
public long getModificationTime() {
return modificationTime;
}
/**
* Sets the modification time of the compressed file.
*
* @param modificationTime the modification time, in milliseconds
*/
public void setModificationTime(long modificationTime) {
this.modificationTime = modificationTime;
}
public String getFilename() {
return filename;
}
/**
* Sets the name of the compressed file.
*
* @param filename the name of the file without the directory path
*/
public void setFilename(String filename) {
this.filename = filename;
}
public String getComment() {
return comment;
}
public void setComment(String comment) {
this.comment = comment;
}
public int getOperatingSystem() {
return operatingSystem;
}
/**
* Sets the operating system on which the compression took place.
* The defined values are:
*
*
*
* @param operatingSystem the code of the operating system
*/
public void setOperatingSystem(int operatingSystem) {
this.operatingSystem = operatingSystem;
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.commons.compress.compressors.gzip;
import java.util.LinkedHashMap;
import java.util.Map;
import org.apache.commons.compress.compressors.FileNameUtil;
/**
* Utility code for the gzip compression format.
* @ThreadSafe
*/
public class GzipUtils {
private static final FileNameUtil fileNameUtil;
static {
// using LinkedHashMap so .tgz is preferred over .taz as
// compressed extension of .tar as FileNameUtil will use the
// first one found
MapinputStream
throws an exception
*/
public LZMACompressorInputStream(InputStream inputStream)
throws IOException {
in = new LZMAInputStream(inputStream);
}
/** {@inheritDoc} */
@Override
public int read() throws IOException {
int ret = in.read();
count(ret == -1 ? 0 : 1);
return ret;
}
/** {@inheritDoc} */
@Override
public int read(byte[] buf, int off, int len) throws IOException {
int ret = in.read(buf, off, len);
count(ret);
return ret;
}
/** {@inheritDoc} */
@Override
public long skip(long n) throws IOException {
return in.skip(n);
}
/** {@inheritDoc} */
@Override
public int available() throws IOException {
return in.available();
}
/** {@inheritDoc} */
@Override
public void close() throws IOException {
in.close();
}
/**
* Checks if the signature matches what is expected for an lzma file.
*
* @param signature
* the bytes to check
* @param length
* the number of bytes to check
* @return true, if this stream is an lzma compressed stream, false otherwise
*
* @since 1.10
*/
public static boolean matches(byte[] signature, int length) {
if (signature == null || length < 3) {
return false;
}
if (signature[0] != 0x5d) {
return false;
}
if (signature[1] != 0) {
return false;
}
if (signature[2] != 0) {
return false;
}
return true;
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.commons.compress.compressors.lzma;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.compress.compressors.FileNameUtil;
/**
* Utility code for the lzma compression format.
* @ThreadSafe
* @since 1.10
*/
public class LZMAUtils {
private static final FileNameUtil fileNameUtil;
/**
* LZMA Header Magic Bytes begin a LZMA file.
*/
private static final byte[] HEADER_MAGIC = {
(byte) 0x5D, 0, 0
};
static enum CachedAvailability {
DONT_CACHE, CACHED_AVAILABLE, CACHED_UNAVAILABLE
}
private static volatile CachedAvailability cachedLZMAAvailability;
static {
Mapint
in the range 0
* to 255
. If no byte is available because the end of the
* stream has been reached, an Exception is thrown.
*
* @return The next byte of data
* @throws IOException
* EOF is reached or error reading the stream
*/
private int readOneByte() throws IOException {
int b = in.read();
if (b == -1) {
throw new IOException("Premature end of stream");
}
count(1);
return b & 0xFF;
}
/**
* The stream starts with the uncompressed length (up to a maximum of 2^32 -
* 1), stored as a little-endian varint. Varints consist of a series of
* bytes, where the lower 7 bits are data and the upper bit is set iff there
* are more bytes to be read. In other words, an uncompressed length of 64
* would be stored as 0x40, and an uncompressed length of 2097150 (0x1FFFFE)
* would be stored as 0xFE 0xFF 0x7F.
*
* @return The size of the uncompressed data
*
* @throws IOException
* Could not read a byte
*/
private long readSize() throws IOException {
int index = 0;
long sz = 0;
int b = 0;
do {
b = readOneByte();
sz |= (b & 0x7f) << (index++ * 7);
} while (0 != (b & 0x80));
return sz;
}
/**
* Get the uncompressed size of the stream
*
* @return the uncompressed size
*/
public int getSize() {
return size;
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.commons.compress.compressors.pack200;
import java.io.FilterOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
/**
* Provides an InputStream to read all data written to this
* OutputStream.
*
* @ThreadSafe
* @since 1.3
*/
abstract class StreamBridge extends FilterOutputStream {
private InputStream input;
private final Object INPUT_LOCK = new Object();
protected StreamBridge(OutputStream out) {
super(out);
}
protected StreamBridge() {
this(null);
}
/**
* Provides the input view.
*/
InputStream getInput() throws IOException {
synchronized (INPUT_LOCK) {
if (input == null) {
input = getInputView();
}
}
return input;
}
/**
* Creates the input view.
*/
abstract InputStream getInputView() throws IOException;
/**
* Closes input and output and releases all associated resources.
*/
void stop() throws IOException {
close();
synchronized (INPUT_LOCK) {
if (input != null) {
input.close();
input = null;
}
}
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.commons.compress.compressors.pack200;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
/**
* StreamSwitcher that caches all data written to the output side in
* a temporary file.
* @since 1.3
*/
class TempFileCachingStreamBridge extends StreamBridge {
private final File f;
TempFileCachingStreamBridge() throws IOException {
f = File.createTempFile("commons-compress", "packtemp");
f.deleteOnExit();
out = new FileOutputStream(f);
}
@Override
InputStream getInputView() throws IOException {
out.close();
return new FileInputStream(f) {
@Override
public void close() throws IOException {
super.close();
f.delete();
}
};
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.commons.compress.compressors.xz;
import java.io.IOException;
import java.io.InputStream;
import org.tukaani.xz.XZ;
import org.tukaani.xz.SingleXZInputStream;
import org.tukaani.xz.XZInputStream;
import org.apache.commons.compress.compressors.CompressorInputStream;
/**
* XZ decompressor.
* @since 1.4
*/
public class XZCompressorInputStream extends CompressorInputStream {
private final InputStream in;
/**
* Checks if the signature matches what is expected for a .xz file.
*
* @param signature the bytes to check
* @param length the number of bytes to check
* @return true if signature matches the .xz magic bytes, false otherwise
*/
public static boolean matches(byte[] signature, int length) {
if (length < XZ.HEADER_MAGIC.length) {
return false;
}
for (int i = 0; i < XZ.HEADER_MAGIC.length; ++i) {
if (signature[i] != XZ.HEADER_MAGIC[i]) {
return false;
}
}
return true;
}
/**
* Creates a new input stream that decompresses XZ-compressed data
* from the specified input stream. This doesn't support
* concatenated .xz files.
*
* @param inputStream where to read the compressed data
*
* @throws IOException if the input is not in the .xz format,
* the input is corrupt or truncated, the .xz
* headers specify options that are not supported
* by this implementation, or the underlying
* inputStream
throws an exception
*/
public XZCompressorInputStream(InputStream inputStream)
throws IOException {
this(inputStream, false);
}
/**
* Creates a new input stream that decompresses XZ-compressed data
* from the specified input stream.
*
* @param inputStream where to read the compressed data
* @param decompressConcatenated
* if true, decompress until the end of the
* input; if false, stop after the first .xz
* stream and leave the input position to point
* to the next byte after the .xz stream
*
* @throws IOException if the input is not in the .xz format,
* the input is corrupt or truncated, the .xz
* headers specify options that are not supported
* by this implementation, or the underlying
* inputStream
throws an exception
*/
public XZCompressorInputStream(InputStream inputStream,
boolean decompressConcatenated)
throws IOException {
if (decompressConcatenated) {
in = new XZInputStream(inputStream);
} else {
in = new SingleXZInputStream(inputStream);
}
}
@Override
public int read() throws IOException {
int ret = in.read();
count(ret == -1 ? -1 : 1);
return ret;
}
@Override
public int read(byte[] buf, int off, int len) throws IOException {
int ret = in.read(buf, off, len);
count(ret);
return ret;
}
@Override
public long skip(long n) throws IOException {
return in.skip(n);
}
@Override
public int available() throws IOException {
return in.available();
}
@Override
public void close() throws IOException {
in.close();
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.commons.compress.compressors.xz;
import java.io.IOException;
import java.io.OutputStream;
import org.tukaani.xz.LZMA2Options;
import org.tukaani.xz.XZOutputStream;
import org.apache.commons.compress.compressors.CompressorOutputStream;
/**
* XZ compressor.
* @since 1.4
*/
public class XZCompressorOutputStream extends CompressorOutputStream {
private final XZOutputStream out;
/**
* Creates a new XZ compressor using the default LZMA2 options.
* This is equivalent to XZCompressorOutputStream(outputStream, 6)
.
* @param outputStream the stream to wrap
* @throws IOException on error
*/
public XZCompressorOutputStream(OutputStream outputStream)
throws IOException {
out = new XZOutputStream(outputStream, new LZMA2Options());
}
/**
* Creates a new XZ compressor using the specified LZMA2 preset level.
* outputStream.flush()
.
* All buffered pending data will then be decompressible from
* the output stream. Calling this function very often may increase
* the compressed file size a lot.
*/
@Override
public void flush() throws IOException {
out.flush();
}
/**
* Finishes compression without closing the underlying stream.
* No more data can be written to this stream after finishing.
*/
public void finish() throws IOException {
out.finish();
}
@Override
public void close() throws IOException {
out.close();
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.commons.compress.compressors.xz;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.compress.compressors.FileNameUtil;
/**
* Utility code for the xz compression format.
* @ThreadSafe
* @since 1.4
*/
public class XZUtils {
private static final FileNameUtil fileNameUtil;
/**
* XZ Header Magic Bytes begin a XZ file.
*
* --no-gzip
option of the JDK's pack200
command line tool. If
you want to work on deflated streams you must use an additional
stream layer - for example by using Apache Commons Compress'
gzip package.Pack200CompressorInputStream
will
uncompress its input immediately and then provide
an InputStream
to a cached result.
Likewise Pack200CompressorOutputStream
will not
write anything to the given OutputStream
until finish
or close
is called - at
which point the cached output written so far gets
compressed.getBytesRead
and getCount
methods
of Pack200CompressorInputStream
are meaningless
(read from the real stream or from the intermediate result?)
and always return 0.Piped(In|Out)putStream
or explicit stream
pumping like Commons Exec's InputStreamPumper
but
they have all failed because they rely on the output end to be
consumed completely or else the (un)pack
will block
forever. Especially for Pack200InputStream
it is
very likely that it will be wrapped in
a ZipArchiveInputStream
which will never read the
archive completely as it is not interested in the ZIP central
directory data at the end of the JAR archive.SnappyCompressorInputStream
class while the so called "framing format" is implemented
by FramedSnappyCompressorInputStream
. Note there
have been different versions of the fraing format specification,
the implementation in Commons Compress is based on the
specification "Last revised: 2013-10-25".CompressorStreamFactory
.