• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements.  See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership.  The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License.  You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied.  See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 package org.apache.commons.compress.archivers.tar;
20 
21 import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUMLEN;
22 import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUM_OFFSET;
23 
24 import java.io.IOException;
25 import java.math.BigInteger;
26 import java.nio.ByteBuffer;
27 import org.apache.commons.compress.archivers.zip.ZipEncoding;
28 import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
29 
30 /**
31  * This class provides static utility methods to work with byte streams.
32  *
33  * @Immutable
34  */
35 // CheckStyle:HideUtilityClassConstructorCheck OFF (bc)
36 public class TarUtils {
37 
38     private static final int BYTE_MASK = 255;
39 
40     static final ZipEncoding DEFAULT_ENCODING =
41         ZipEncodingHelper.getZipEncoding(null);
42 
43     /**
44      * Encapsulates the algorithms used up to Commons Compress 1.3 as
45      * ZipEncoding.
46      */
47     static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() {
48             @Override
49             public boolean canEncode(final String name) { return true; }
50 
51             @Override
52             public ByteBuffer encode(final String name) {
53                 final int length = name.length();
54                 final byte[] buf = new byte[length];
55 
56                 // copy until end of input or output is reached.
57                 for (int i = 0; i < length; ++i) {
58                     buf[i] = (byte) name.charAt(i);
59                 }
60                 return ByteBuffer.wrap(buf);
61             }
62 
63             @Override
64             public String decode(final byte[] buffer) {
65                 final int length = buffer.length;
66                 final StringBuilder result = new StringBuilder(length);
67 
68                 for (final byte b : buffer) {
69                     if (b == 0) { // Trailing null
70                         break;
71                     }
72                     result.append((char) (b & 0xFF)); // Allow for sign-extension
73                 }
74 
75                 return result.toString();
76             }
77         };
78 
79     /** Private constructor to prevent instantiation of this utility class. */
TarUtils()80     private TarUtils(){
81     }
82 
83     /**
84      * Parse an octal string from a buffer.
85      *
86      * <p>Leading spaces are ignored.
87      * The buffer must contain a trailing space or NUL,
88      * and may contain an additional trailing space or NUL.</p>
89      *
90      * <p>The input buffer is allowed to contain all NULs,
91      * in which case the method returns 0L
92      * (this allows for missing fields).</p>
93      *
94      * <p>To work-around some tar implementations that insert a
95      * leading NUL this method returns 0 if it detects a leading NUL
96      * since Commons Compress 1.4.</p>
97      *
98      * @param buffer The buffer from which to parse.
99      * @param offset The offset into the buffer from which to parse.
100      * @param length The maximum number of bytes to parse - must be at least 2 bytes.
101      * @return The long value of the octal string.
102      * @throws IllegalArgumentException if the trailing space/NUL is missing or if a invalid byte is detected.
103      */
parseOctal(final byte[] buffer, final int offset, final int length)104     public static long parseOctal(final byte[] buffer, final int offset, final int length) {
105         long    result = 0;
106         int     end = offset + length;
107         int     start = offset;
108 
109         if (length < 2){
110             throw new IllegalArgumentException("Length "+length+" must be at least 2");
111         }
112 
113         if (buffer[start] == 0) {
114             return 0L;
115         }
116 
117         // Skip leading spaces
118         while (start < end){
119             if (buffer[start] == ' '){
120                 start++;
121             } else {
122                 break;
123             }
124         }
125 
126         // Trim all trailing NULs and spaces.
127         // The ustar and POSIX tar specs require a trailing NUL or
128         // space but some implementations use the extra digit for big
129         // sizes/uids/gids ...
130         byte trailer = buffer[end - 1];
131         while (start < end && (trailer == 0 || trailer == ' ')) {
132             end--;
133             trailer = buffer[end - 1];
134         }
135 
136         for ( ;start < end; start++) {
137             final byte currentByte = buffer[start];
138             // CheckStyle:MagicNumber OFF
139             if (currentByte < '0' || currentByte > '7'){
140                 throw new IllegalArgumentException(
141                         exceptionMessage(buffer, offset, length, start, currentByte));
142             }
143             result = (result << 3) + (currentByte - '0'); // convert from ASCII
144             // CheckStyle:MagicNumber ON
145         }
146 
147         return result;
148     }
149 
150     /**
151      * Compute the value contained in a byte buffer.  If the most
152      * significant bit of the first byte in the buffer is set, this
153      * bit is ignored and the rest of the buffer is interpreted as a
154      * binary number.  Otherwise, the buffer is interpreted as an
155      * octal number as per the parseOctal function above.
156      *
157      * @param buffer The buffer from which to parse.
158      * @param offset The offset into the buffer from which to parse.
159      * @param length The maximum number of bytes to parse.
160      * @return The long value of the octal or binary string.
161      * @throws IllegalArgumentException if the trailing space/NUL is
162      * missing or an invalid byte is detected in an octal number, or
163      * if a binary number would exceed the size of a signed long
164      * 64-bit integer.
165      * @since 1.4
166      */
parseOctalOrBinary(final byte[] buffer, final int offset, final int length)167     public static long parseOctalOrBinary(final byte[] buffer, final int offset,
168                                           final int length) {
169 
170         if ((buffer[offset] & 0x80) == 0) {
171             return parseOctal(buffer, offset, length);
172         }
173         final boolean negative = buffer[offset] == (byte) 0xff;
174         if (length < 9) {
175             return parseBinaryLong(buffer, offset, length, negative);
176         }
177         return parseBinaryBigInteger(buffer, offset, length, negative);
178     }
179 
parseBinaryLong(final byte[] buffer, final int offset, final int length, final boolean negative)180     private static long parseBinaryLong(final byte[] buffer, final int offset,
181                                         final int length,
182                                         final boolean negative) {
183         if (length >= 9) {
184             throw new IllegalArgumentException("At offset " + offset + ", "
185                                                + length + " byte binary number"
186                                                + " exceeds maximum signed long"
187                                                + " value");
188         }
189         long val = 0;
190         for (int i = 1; i < length; i++) {
191             val = (val << 8) + (buffer[offset + i] & 0xff);
192         }
193         if (negative) {
194             // 2's complement
195             val--;
196             val ^= (long) Math.pow(2.0, (length - 1) * 8.0) - 1;
197         }
198         return negative ? -val : val;
199     }
200 
parseBinaryBigInteger(final byte[] buffer, final int offset, final int length, final boolean negative)201     private static long parseBinaryBigInteger(final byte[] buffer,
202                                               final int offset,
203                                               final int length,
204                                               final boolean negative) {
205         final byte[] remainder = new byte[length - 1];
206         System.arraycopy(buffer, offset + 1, remainder, 0, length - 1);
207         BigInteger val = new BigInteger(remainder);
208         if (negative) {
209             // 2's complement
210             val = val.add(BigInteger.valueOf(-1)).not();
211         }
212         if (val.bitLength() > 63) {
213             throw new IllegalArgumentException("At offset " + offset + ", "
214                                                + length + " byte binary number"
215                                                + " exceeds maximum signed long"
216                                                + " value");
217         }
218         return negative ? -val.longValue() : val.longValue();
219     }
220 
221     /**
222      * Parse a boolean byte from a buffer.
223      * Leading spaces and NUL are ignored.
224      * The buffer may contain trailing spaces or NULs.
225      *
226      * @param buffer The buffer from which to parse.
227      * @param offset The offset into the buffer from which to parse.
228      * @return The boolean value of the bytes.
229      * @throws IllegalArgumentException if an invalid byte is detected.
230      */
parseBoolean(final byte[] buffer, final int offset)231     public static boolean parseBoolean(final byte[] buffer, final int offset) {
232         return buffer[offset] == 1;
233     }
234 
235     // Helper method to generate the exception message
exceptionMessage(final byte[] buffer, final int offset, final int length, final int current, final byte currentByte)236     private static String exceptionMessage(final byte[] buffer, final int offset,
237             final int length, final int current, final byte currentByte) {
238         // default charset is good enough for an exception message,
239         //
240         // the alternative was to modify parseOctal and
241         // parseOctalOrBinary to receive the ZipEncoding of the
242         // archive (deprecating the existing public methods, of
243         // course) and dealing with the fact that ZipEncoding#decode
244         // can throw an IOException which parseOctal* doesn't declare
245         String string = new String(buffer, offset, length);
246 
247         string=string.replaceAll("\0", "{NUL}"); // Replace NULs to allow string to be printed
248         return "Invalid byte "+currentByte+" at offset "+(current-offset)+" in '"+string+"' len="+length;
249     }
250 
251     /**
252      * Parse an entry name from a buffer.
253      * Parsing stops when a NUL is found
254      * or the buffer length is reached.
255      *
256      * @param buffer The buffer from which to parse.
257      * @param offset The offset into the buffer from which to parse.
258      * @param length The maximum number of bytes to parse.
259      * @return The entry name.
260      */
parseName(final byte[] buffer, final int offset, final int length)261     public static String parseName(final byte[] buffer, final int offset, final int length) {
262         try {
263             return parseName(buffer, offset, length, DEFAULT_ENCODING);
264         } catch (final IOException ex) {
265             try {
266                 return parseName(buffer, offset, length, FALLBACK_ENCODING);
267             } catch (final IOException ex2) {
268                 // impossible
269                 throw new RuntimeException(ex2); //NOSONAR
270             }
271         }
272     }
273 
274     /**
275      * Parse an entry name from a buffer.
276      * Parsing stops when a NUL is found
277      * or the buffer length is reached.
278      *
279      * @param buffer The buffer from which to parse.
280      * @param offset The offset into the buffer from which to parse.
281      * @param length The maximum number of bytes to parse.
282      * @param encoding name of the encoding to use for file names
283      * @since 1.4
284      * @return The entry name.
285      * @throws IOException on error
286      */
parseName(final byte[] buffer, final int offset, final int length, final ZipEncoding encoding)287     public static String parseName(final byte[] buffer, final int offset,
288                                    final int length,
289                                    final ZipEncoding encoding)
290         throws IOException {
291 
292         int len = 0;
293         for (int i = offset; len < length && buffer[i] != 0; i++) {
294             len++;
295         }
296         if (len > 0) {
297             final byte[] b = new byte[len];
298             System.arraycopy(buffer, offset, b, 0, len);
299             return encoding.decode(b);
300         }
301         return "";
302     }
303 
304     /**
305      * Copy a name into a buffer.
306      * Copies characters from the name into the buffer
307      * starting at the specified offset.
308      * If the buffer is longer than the name, the buffer
309      * is filled with trailing NULs.
310      * If the name is longer than the buffer,
311      * the output is truncated.
312      *
313      * @param name The header name from which to copy the characters.
314      * @param buf The buffer where the name is to be stored.
315      * @param offset The starting offset into the buffer
316      * @param length The maximum number of header bytes to copy.
317      * @return The updated offset, i.e. offset + length
318      */
formatNameBytes(final String name, final byte[] buf, final int offset, final int length)319     public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length) {
320         try {
321             return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING);
322         } catch (final IOException ex) {
323             try {
324                 return formatNameBytes(name, buf, offset, length,
325                                        FALLBACK_ENCODING);
326             } catch (final IOException ex2) {
327                 // impossible
328                 throw new RuntimeException(ex2); //NOSONAR
329             }
330         }
331     }
332 
333     /**
334      * Copy a name into a buffer.
335      * Copies characters from the name into the buffer
336      * starting at the specified offset.
337      * If the buffer is longer than the name, the buffer
338      * is filled with trailing NULs.
339      * If the name is longer than the buffer,
340      * the output is truncated.
341      *
342      * @param name The header name from which to copy the characters.
343      * @param buf The buffer where the name is to be stored.
344      * @param offset The starting offset into the buffer
345      * @param length The maximum number of header bytes to copy.
346      * @param encoding name of the encoding to use for file names
347      * @since 1.4
348      * @return The updated offset, i.e. offset + length
349      * @throws IOException on error
350      */
formatNameBytes(final String name, final byte[] buf, final int offset, final int length, final ZipEncoding encoding)351     public static int formatNameBytes(final String name, final byte[] buf, final int offset,
352                                       final int length,
353                                       final ZipEncoding encoding)
354         throws IOException {
355         int len = name.length();
356         ByteBuffer b = encoding.encode(name);
357         while (b.limit() > length && len > 0) {
358             b = encoding.encode(name.substring(0, --len));
359         }
360         final int limit = b.limit() - b.position();
361         System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit);
362 
363         // Pad any remaining output bytes with NUL
364         for (int i = limit; i < length; ++i) {
365             buf[offset + i] = 0;
366         }
367 
368         return offset + length;
369     }
370 
371     /**
372      * Fill buffer with unsigned octal number, padded with leading zeroes.
373      *
374      * @param value number to convert to octal - treated as unsigned
375      * @param buffer destination buffer
376      * @param offset starting offset in buffer
377      * @param length length of buffer to fill
378      * @throws IllegalArgumentException if the value will not fit in the buffer
379      */
formatUnsignedOctalString(final long value, final byte[] buffer, final int offset, final int length)380     public static void formatUnsignedOctalString(final long value, final byte[] buffer,
381             final int offset, final int length) {
382         int remaining = length;
383         remaining--;
384         if (value == 0) {
385             buffer[offset + remaining--] = (byte) '0';
386         } else {
387             long val = value;
388             for (; remaining >= 0 && val != 0; --remaining) {
389                 // CheckStyle:MagicNumber OFF
390                 buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7));
391                 val = val >>> 3;
392                 // CheckStyle:MagicNumber ON
393             }
394             if (val != 0){
395                 throw new IllegalArgumentException
396                 (value+"="+Long.toOctalString(value)+ " will not fit in octal number buffer of length "+length);
397             }
398         }
399 
400         for (; remaining >= 0; --remaining) { // leading zeros
401             buffer[offset + remaining] = (byte) '0';
402         }
403     }
404 
405     /**
406      * Write an octal integer into a buffer.
407      *
408      * Uses {@link #formatUnsignedOctalString} to format
409      * the value as an octal string with leading zeros.
410      * The converted number is followed by space and NUL
411      *
412      * @param value The value to write
413      * @param buf The buffer to receive the output
414      * @param offset The starting offset into the buffer
415      * @param length The size of the output buffer
416      * @return The updated offset, i.e offset+length
417      * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
418      */
formatOctalBytes(final long value, final byte[] buf, final int offset, final int length)419     public static int formatOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
420 
421         int idx=length-2; // For space and trailing null
422         formatUnsignedOctalString(value, buf, offset, idx);
423 
424         buf[offset + idx++] = (byte) ' '; // Trailing space
425         buf[offset + idx]   = 0; // Trailing null
426 
427         return offset + length;
428     }
429 
430     /**
431      * Write an octal long integer into a buffer.
432      *
433      * Uses {@link #formatUnsignedOctalString} to format
434      * the value as an octal string with leading zeros.
435      * The converted number is followed by a space.
436      *
437      * @param value The value to write as octal
438      * @param buf The destinationbuffer.
439      * @param offset The starting offset into the buffer.
440      * @param length The length of the buffer
441      * @return The updated offset
442      * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
443      */
formatLongOctalBytes(final long value, final byte[] buf, final int offset, final int length)444     public static int formatLongOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
445 
446         final int idx=length-1; // For space
447 
448         formatUnsignedOctalString(value, buf, offset, idx);
449         buf[offset + idx] = (byte) ' '; // Trailing space
450 
451         return offset + length;
452     }
453 
454     /**
455      * Write an long integer into a buffer as an octal string if this
456      * will fit, or as a binary number otherwise.
457      *
458      * Uses {@link #formatUnsignedOctalString} to format
459      * the value as an octal string with leading zeros.
460      * The converted number is followed by a space.
461      *
462      * @param value The value to write into the buffer.
463      * @param buf The destination buffer.
464      * @param offset The starting offset into the buffer.
465      * @param length The length of the buffer.
466      * @return The updated offset.
467      * @throws IllegalArgumentException if the value (and trailer)
468      * will not fit in the buffer.
469      * @since 1.4
470      */
formatLongOctalOrBinaryBytes( final long value, final byte[] buf, final int offset, final int length)471     public static int formatLongOctalOrBinaryBytes(
472         final long value, final byte[] buf, final int offset, final int length) {
473 
474         // Check whether we are dealing with UID/GID or SIZE field
475         final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE;
476 
477         final boolean negative = value < 0;
478         if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars
479             return formatLongOctalBytes(value, buf, offset, length);
480         }
481 
482         if (length < 9) {
483             formatLongBinary(value, buf, offset, length, negative);
484         } else {
485             formatBigIntegerBinary(value, buf, offset, length, negative);
486         }
487 
488         buf[offset] = (byte) (negative ? 0xff : 0x80);
489         return offset + length;
490     }
491 
492     private static void formatLongBinary(final long value, final byte[] buf,
493                                          final int offset, final int length,
494                                          final boolean negative) {
495         final int bits = (length - 1) * 8;
496         final long max = 1L << bits;
497         long val = Math.abs(value); // Long.MIN_VALUE stays Long.MIN_VALUE
498         if (val < 0 || val >= max) {
499             throw new IllegalArgumentException("Value " + value +
500                 " is too large for " + length + " byte field.");
501         }
502         if (negative) {
503             val ^= max - 1;
504             val++;
505             val |= 0xffL << bits;
506         }
507         for (int i = offset + length - 1; i >= offset; i--) {
508             buf[i] = (byte) val;
509             val >>= 8;
510         }
511     }
512 
513     private static void formatBigIntegerBinary(final long value, final byte[] buf,
514                                                final int offset,
515                                                final int length,
516                                                final boolean negative) {
517         final BigInteger val = BigInteger.valueOf(value);
518         final byte[] b = val.toByteArray();
519         final int len = b.length;
520         if (len > length - 1) {
521             throw new IllegalArgumentException("Value " + value +
522                 " is too large for " + length + " byte field.");
523         }
524         final int off = offset + length - len;
525         System.arraycopy(b, 0, buf, off, len);
526         final byte fill = (byte) (negative ? 0xff : 0);
527         for (int i = offset + 1; i < off; i++) {
528             buf[i] = fill;
529         }
530     }
531 
532     /**
533      * Writes an octal value into a buffer.
534      *
535      * Uses {@link #formatUnsignedOctalString} to format
536      * the value as an octal string with leading zeros.
537      * The converted number is followed by NUL and then space.
538      *
539      * @param value The value to convert
540      * @param buf The destination buffer
541      * @param offset The starting offset into the buffer.
542      * @param length The size of the buffer.
543      * @return The updated value of offset, i.e. offset+length
544      * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
545      */
546     public static int formatCheckSumOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
547 
548         int idx=length-2; // for NUL and space
549         formatUnsignedOctalString(value, buf, offset, idx);
550 
551         buf[offset + idx++]   = 0; // Trailing null
552         buf[offset + idx]     = (byte) ' '; // Trailing space
553 
554         return offset + length;
555     }
556 
557     /**
558      * Compute the checksum of a tar entry header.
559      *
560      * @param buf The tar entry's header buffer.
561      * @return The computed checksum.
562      */
563     public static long computeCheckSum(final byte[] buf) {
564         long sum = 0;
565 
566         for (final byte element : buf) {
567             sum += BYTE_MASK & element;
568         }
569 
570         return sum;
571     }
572 
573     /**
574      * Wikipedia <a href="https://en.wikipedia.org/wiki/Tar_(file_format)#File_header">says</a>:
575      * <blockquote>
576      * The checksum is calculated by taking the sum of the unsigned byte values
577      * of the header block with the eight checksum bytes taken to be ascii
578      * spaces (decimal value 32). It is stored as a six digit octal number with
579      * leading zeroes followed by a NUL and then a space. Various
580      * implementations do not adhere to this format. For better compatibility,
581      * ignore leading and trailing whitespace, and get the first six digits. In
582      * addition, some historic tar implementations treated bytes as signed.
583      * Implementations typically calculate the checksum both ways, and treat it
584      * as good if either the signed or unsigned sum matches the included
585      * checksum.
586      * </blockquote>
587      * <p>
588      * The return value of this method should be treated as a best-effort
589      * heuristic rather than an absolute and final truth. The checksum
590      * verification logic may well evolve over time as more special cases
591      * are encountered.
592      *
593      * @param header tar header
594      * @return whether the checksum is reasonably good
595      * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a>
596      * @since 1.5
597      */
598     public static boolean verifyCheckSum(final byte[] header) {
599         final long storedSum = parseOctal(header, CHKSUM_OFFSET, CHKSUMLEN);
600         long unsignedSum = 0;
601         long signedSum = 0;
602 
603         for (int i = 0; i < header.length; i++) {
604             byte b = header[i];
605             if (CHKSUM_OFFSET  <= i && i < CHKSUM_OFFSET + CHKSUMLEN) {
606                 b = ' ';
607             }
608             unsignedSum += 0xff & b;
609             signedSum += b;
610         }
611         return storedSum == unsignedSum || storedSum == signedSum;
612     }
613 
614 }
615