1 /* tar.c - create/extract archives
2  *
3  * Copyright 2014 Ashwini Kumar <ak.ashwini81@gmail.com>
4  *
5  * For the command, see
6  *   http://pubs.opengroup.org/onlinepubs/007908799/xcu/tar.html
7  * For the modern file format, see
8  *   http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06
9  *   https://en.wikipedia.org/wiki/Tar_(computing)#File_format
10  *   https://www.gnu.org/software/tar/manual/html_node/Tar-Internals.html
11  *
12  * For writing to external program
13  * http://www.gnu.org/software/tar/manual/html_node/Writing-to-an-External-Program.html
14  *
15  * Toybox will never implement the "pax" command as a matter of policy.
16  *
17  * Why --exclude pattern but no --include? tar cvzf a.tgz dir --include '*.txt'
18  *
19 
20 USE_TAR(NEWTOY(tar, "&(strip-components)#(selinux)(restrict)(full-time)(no-recursion)(numeric-owner)(no-same-permissions)(overwrite)(exclude)*(mode):(mtime):(group):(owner):(to-command):o(no-same-owner)p(same-permissions)k(keep-old)c(create)|h(dereference)x(extract)|t(list)|v(verbose)I(use-compress-program):J(xz)j(bzip2)z(gzip)S(sparse)O(to-stdout)P(absolute-names)m(touch)X(exclude-from)*T(files-from)*C(directory):f(file):a[!txc][!jzJa]", TOYFLAG_USR|TOYFLAG_BIN))
21 
22 config TAR
23   bool "tar"
24   default y
25   help
26     usage: tar [-cxt] [-fvohmjkOS] [-XTCf NAME] [--selinux] [FILE...]
27 
28     Create, extract, or list files in a .tar (or compressed t?z) file.
29 
30     Options:
31     c  Create                x  Extract               t  Test (list)
32     f  tar FILE (default -)  C  Change to DIR first   v  Verbose display
33     o  Ignore owner          h  Follow symlinks       m  Ignore mtime
34     J  xz compression        j  bzip2 compression     z  gzip compression
35     O  Extract to stdout     X  exclude names in FILE T  include names in FILE
36 
37     --exclude        FILENAME to exclude  --full-time         Show seconds with -tv
38     --mode MODE      Adjust permissions   --owner NAME[:UID]  Set file ownership
39     --mtime TIME     Override timestamps  --group NAME[:GID]  Set file group
40     --sparse         Record sparse files  --selinux           Save/restore labels
41     --restrict       All under one dir    --no-recursion      Skip dir contents
42     --numeric-owner  Use numeric uid/gid, not user/group names
43     --strip-components NUM  Ignore first NUM directory components when extracting
44     -I PROG          Filter through PROG to compress or PROG -d to decompress
45 */
46 
47 #define FOR_tar
48 #include "toys.h"
49 
50 GLOBALS(
51   char *f, *C;
52   struct arg_list *T, *X;
53   char *I, *to_command, *owner, *group, *mtime, *mode;
54   struct arg_list *exclude;
55   long strip_components;
56 
57   struct double_list *incl, *excl, *seen;
58   struct string_list *dirs;
59   char *cwd;
60   int fd, ouid, ggid, hlc, warn, adev, aino, sparselen, pid;
61   long long *sparse;
62   time_t mtt;
63 
64   // hardlinks seen so far (hlc many)
65   struct {
66     char *arg;
67     ino_t ino;
68     dev_t dev;
69   } *hlx;
70 
71   // Parsed information about a tar header.
72   struct tar_header {
73     char *name, *link_target, *uname, *gname;
74     long long size, ssize;
75     uid_t uid;
76     gid_t gid;
77     mode_t mode;
78     time_t mtime;
79     dev_t device;
80   } hdr;
81 )
82 
83 struct tar_hdr {
84   char name[100], mode[8], uid[8], gid[8],size[12], mtime[12], chksum[8],
85        type, link[100], magic[8], uname[32], gname[32], major[8], minor[8],
86        prefix[155], padd[12];
87 };
88 
89 // Tar uses ASCII octal when it fits, base-256 otherwise.
ascii_fits(unsigned long long val,int len)90 static int ascii_fits(unsigned long long val, int len)
91 {
92   return !(val>>(3*(len-1)));
93 }
94 
95 // convert from int to octal (or base-256)
itoo(char * str,int len,unsigned long long val)96 static void itoo(char *str, int len, unsigned long long val)
97 {
98   if (ascii_fits(val, len)) sprintf(str, "%0*llo", len-1, val);
99   else {
100     for (str += len; len--; val >>= 8) *--str = val;
101     *str = 128;
102   }
103 }
104 #define ITOO(x, y) itoo(x, sizeof(x), y)
105 
106 // convert octal (or base-256) to int
otoi(char * str,unsigned len)107 static unsigned long long otoi(char *str, unsigned len)
108 {
109   unsigned long long val = 0;
110 
111   // When tar value too big or octal, use binary encoding with high bit set
112   if (128&*str) while (--len) {
113     if (val<<8 < val) error_exit("bad header");
114     val = (val<<8)+*++str;
115   } else {
116     while (len && *str == ' ') str++;
117     while (len && *str>='0' && *str<='7') val = val*8+*str++-'0', len--;
118     if (len && *str && *str != ' ') error_exit("bad header");
119   }
120 
121   return val;
122 }
123 #define OTOI(x) otoi(x, sizeof(x))
124 
write_prefix_block(char * data,int len,char type)125 static void write_prefix_block(char *data, int len, char type)
126 {
127   struct tar_hdr tmp;
128 
129   memset(&tmp, 0, sizeof(tmp));
130   sprintf(tmp.name, "././@%s", type=='x' ? "PaxHeaders" : "LongLink");
131   ITOO(tmp.uid, 0);
132   ITOO(tmp.gid, 0);
133   ITOO(tmp.size, len);
134   ITOO(tmp.mtime, 0);
135   tmp.type = type;
136   strcpy(tmp.magic, "ustar  ");
137 
138   // Historical nonsense to match other implementations. Never used.
139   ITOO(tmp.mode, 0644);
140   strcpy(tmp.uname, "root");
141   strcpy(tmp.gname, "root");
142 
143   // Calculate checksum. Since 512*255 = 0377000 in octal, this can never
144   // use more than 6 digits. The last byte is ' ' for historical reasons.
145   itoo(tmp.chksum, sizeof(tmp.chksum)-1, tar_cksum(&tmp));
146   tmp.chksum[7] = ' ';
147 
148   // write header and name, padded with NUL to block size
149   xwrite(TT.fd, &tmp, 512);
150   xwrite(TT.fd, data, len);
151   if (len%512) xwrite(TT.fd, toybuf, 512-(len%512));
152 }
153 
maybe_prefix_block(char * data,int check,int type)154 static void maybe_prefix_block(char *data, int check, int type)
155 {
156   int len = strlen(data);
157 
158   if (len>check) write_prefix_block(data, len+1, type);
159 }
160 
filter(struct double_list * lst,char * name)161 static struct double_list *filter(struct double_list *lst, char *name)
162 {
163   struct double_list *end = lst;
164 
165   if (lst)
166     // constant is FNM_LEADING_DIR
167     do if (!fnmatch(lst->data, name, 1<<3)) return lst;
168     while (end != (lst = lst->next));
169 
170   return 0;
171 }
172 
skippy(long long len)173 static void skippy(long long len)
174 {
175   if (lskip(TT.fd, len)) perror_exit("EOF");
176 }
177 
178 // allocate and read data from TT.fd
alloread(void * buf,int len)179 static void alloread(void *buf, int len)
180 {
181   // actually void **, but automatic typecasting doesn't work with void ** :(
182   char **b = buf;
183 
184   free(*b);
185   *b = xmalloc(len+1);
186   xreadall(TT.fd, *b, len);
187   (*b)[len] = 0;
188 }
189 
190 // callback from dirtree to create archive
add_to_tar(struct dirtree * node)191 static int add_to_tar(struct dirtree *node)
192 {
193   struct stat *st = &(node->st);
194   struct tar_hdr hdr;
195   struct passwd *pw = pw;
196   struct group *gr = gr;
197   int i, fd = -1, norecurse = FLAG(no_recursion);
198   char *name, *lnk, *hname;
199 
200   if (!dirtree_notdotdot(node)) return 0;
201   if (TT.adev == st->st_dev && TT.aino == st->st_ino) {
202     error_msg("'%s' file is the archive; not dumped", node->name);
203     return 0;
204   }
205 
206   i = 1;
207   name = hname = dirtree_path(node, &i);
208 
209   // exclusion defaults to --no-anchored and --wildcards-match-slash
210   for (lnk = name; *lnk;) {
211     if (filter(TT.excl, lnk)) {
212       norecurse++;
213 
214       goto done;
215     }
216     while (*lnk && *lnk!='/') lnk++;
217     while (*lnk=='/') lnk++;
218   }
219 
220   // Consume the 1 extra byte alocated in dirtree_path()
221   if (S_ISDIR(st->st_mode) && name[i-1] != '/') strcat(name, "/");
222 
223   // remove leading / and any .. entries from saved name
224   if (!FLAG(P)) while (*hname == '/') hname++;
225   for (lnk = hname;;) {
226     if (!(lnk = strstr(lnk, ".."))) break;
227     if (lnk == hname || lnk[-1] == '/') {
228       if (!lnk[2]) goto done;
229       if (lnk[2]=='/') {
230         lnk = hname = lnk+3;
231         continue;
232       }
233     }
234     lnk += 2;
235   }
236   if (!*hname) goto done;
237 
238   if (TT.warn && hname != name) {
239     dprintf(2, "removing leading '%.*s' from member names\n",
240            (int)(hname-name), name);
241     TT.warn = 0;
242   }
243 
244   if (TT.owner) st->st_uid = TT.ouid;
245   if (TT.group) st->st_gid = TT.ggid;
246   if (TT.mode) st->st_mode = string_to_mode(TT.mode, st->st_mode);
247   if (TT.mtime) st->st_mtime = TT.mtt;
248 
249   memset(&hdr, 0, sizeof(hdr));
250   strncpy(hdr.name, hname, sizeof(hdr.name));
251   ITOO(hdr.mode, st->st_mode &07777);
252   ITOO(hdr.uid, st->st_uid);
253   ITOO(hdr.gid, st->st_gid);
254   ITOO(hdr.size, 0); //set size later
255   ITOO(hdr.mtime, st->st_mtime);
256   strcpy(hdr.magic, "ustar  ");
257 
258   // Hard link or symlink? i=0 neither, i=1 hardlink, i=2 symlink
259 
260   // Are there hardlinks to a non-directory entry?
261   if (st->st_nlink>1 && !S_ISDIR(st->st_mode)) {
262     // Have we seen this dev&ino before?
263     for (i = 0; i<TT.hlc; i++) {
264       if (st->st_ino == TT.hlx[i].ino && st->st_dev == TT.hlx[i].dev)
265         break;
266     }
267     if (i != TT.hlc) {
268       lnk = TT.hlx[i].arg;
269       i = 1;
270     } else {
271       // first time we've seen it. Store as normal file, but remember it.
272       if (!(TT.hlc&255))
273         TT.hlx = xrealloc(TT.hlx, sizeof(*TT.hlx)*(TT.hlc+256));
274       TT.hlx[TT.hlc].arg = xstrdup(hname);
275       TT.hlx[TT.hlc].ino = st->st_ino;
276       TT.hlx[TT.hlc].dev = st->st_dev;
277       TT.hlc++;
278       i = 0;
279     }
280   } else i = 0;
281 
282   // Handle file types
283   if (i || S_ISLNK(st->st_mode)) {
284     hdr.type = '1'+!i;
285     if (!i && !(lnk = xreadlink(name))) {
286       perror_msg("readlink");
287       goto done;
288     }
289     maybe_prefix_block(lnk, sizeof(hdr.link), 'K');
290     strncpy(hdr.link, lnk, sizeof(hdr.link));
291     if (!i) free(lnk);
292   } else if (S_ISREG(st->st_mode)) {
293     hdr.type = '0';
294     ITOO(hdr.size, st->st_size);
295   } else if (S_ISDIR(st->st_mode)) hdr.type = '5';
296   else if (S_ISFIFO(st->st_mode)) hdr.type = '6';
297   else if (S_ISBLK(st->st_mode) || S_ISCHR(st->st_mode)) {
298     hdr.type = (S_ISCHR(st->st_mode))?'3':'4';
299     ITOO(hdr.major, dev_major(st->st_rdev));
300     ITOO(hdr.minor, dev_minor(st->st_rdev));
301   } else {
302     error_msg("unknown file type '%o'", st->st_mode & S_IFMT);
303     goto done;
304   }
305 
306   // write out 'x' prefix header for --selinux data
307   if (FLAG(selinux)) {
308     int start = 0, sz = 0, temp, len = 0;
309     char *buf = 0, *sec = "security.selinux";
310 
311     for (;;) {
312       // First time get length, second time read data into prepared buffer
313       len = (S_ISLNK(st->st_mode) ? xattr_lget : xattr_get)
314         (name, sec, buf+start, sz);
315 
316       // Handle data or error
317       if (len>999999 || (sz && len>sz)) len = -1, errno = E2BIG;
318       if (buf || len<1) {
319         if (len>0) {
320           strcpy(buf+start+sz, "\n");
321           write_prefix_block(buf, start+sz+2, 'x');
322         } else if (errno==ENODATA || errno==ENOTSUP) len = 0;
323         if (len) perror_msg("getfilecon %s", name);
324 
325         free(buf);
326         break;
327       }
328 
329       // Allocate buffer. Length includes prefix: calculate twice (wrap 99->100)
330       temp = snprintf(0, 0, "%d", sz = (start = 22)+len+1);
331       start += temp + (temp != snprintf(0, 0, "%d", temp+sz));
332       buf = xmprintf("%u RHT.%s=%.*s", start+len+1, sec, sz = len, "");
333     }
334   }
335 
336   maybe_prefix_block(hname, sizeof(hdr.name), 'L');
337   if (!FLAG(numeric_owner)) {
338     if ((TT.owner || (pw = bufgetpwuid(st->st_uid))) &&
339         ascii_fits(st->st_uid, sizeof(hdr.uid)))
340       strncpy(hdr.uname, TT.owner ? : pw->pw_name, sizeof(hdr.uname));
341     if ((TT.group || (gr = bufgetgrgid(st->st_gid))) &&
342         ascii_fits(st->st_gid, sizeof(hdr.gid)))
343       strncpy(hdr.gname, TT.group ? : gr->gr_name, sizeof(hdr.gname));
344   }
345 
346   TT.sparselen = 0;
347   if (hdr.type == '0') {
348     // Before we write the header, make sure we can read the file
349     if ((fd = open(name, O_RDONLY)) < 0) {
350       perror_msg("can't open '%s'", name);
351 
352       return 0;
353     }
354     if (FLAG(S)) {
355       long long lo, ld = 0, len = 0;
356 
357       // Enumerate the extents
358       while ((lo = lseek(fd, ld, SEEK_HOLE)) != -1) {
359         if (!(TT.sparselen&511))
360           TT.sparse = xrealloc(TT.sparse, (TT.sparselen+514)*sizeof(long long));
361         if (ld != lo) {
362           TT.sparse[TT.sparselen++] = ld;
363           len += TT.sparse[TT.sparselen++] = lo-ld;
364         }
365         if (lo == st->st_size || (ld = lseek(fd, lo, SEEK_DATA)) < lo) break;
366       }
367 
368       // If there were extents, change type to S record
369       if (TT.sparselen>2) {
370         TT.sparse[TT.sparselen++] = st->st_size;
371         TT.sparse[TT.sparselen++] = 0;
372         hdr.type = 'S';
373         lnk = (char *)&hdr;
374         for (i = 0; i<TT.sparselen && i<8; i++)
375           itoo(lnk+386+12*i, 12, TT.sparse[i]);
376 
377         // Record if there's overflow records, change length to sparse length,
378         // record apparent length
379         if (TT.sparselen>8) lnk[482] = 1;
380         itoo(lnk+483, 12, st->st_size);
381         ITOO(hdr.size, len);
382       } else TT.sparselen = 0;
383       lseek(fd, 0, SEEK_SET);
384     }
385   }
386 
387   itoo(hdr.chksum, sizeof(hdr.chksum)-1, tar_cksum(&hdr));
388   hdr.chksum[7] = ' ';
389 
390   if (FLAG(v)) dprintf(1+(TT.fd==1), "%s\n", hname);
391 
392   // Write header and data to archive
393   xwrite(TT.fd, &hdr, 512);
394   if (TT.sparselen>8) {
395     char buf[512];
396 
397     // write extent overflow blocks
398     for (i=8;;i++) {
399       int j = (i-8)%42;
400 
401       if (!j || i==TT.sparselen) {
402         if (i!=8) {
403           if (i!=TT.sparselen) buf[504] = 1;
404           xwrite(TT.fd, buf, 512);
405         }
406         if (i==TT.sparselen) break;
407         memset(buf, 0, sizeof(buf));
408       }
409       itoo(buf+12*j, 12, TT.sparse[i]);
410     }
411   }
412   TT.sparselen >>= 1;
413   if (hdr.type == '0' || hdr.type == 'S') {
414     if (hdr.type == '0') xsendfile_pad(fd, TT.fd, st->st_size);
415     else for (i = 0; i<TT.sparselen; i++) {
416       if (TT.sparse[i*2] != lseek(fd, TT.sparse[i*2], SEEK_SET))
417         perror_msg("%s: seek %lld", name, TT.sparse[i*2]);
418       xsendfile_pad(fd, TT.fd, TT.sparse[i*2+1]);
419     }
420     if (st->st_size%512) writeall(TT.fd, toybuf, (512-(st->st_size%512)));
421     close(fd);
422   }
423 done:
424   free(name);
425 
426   return (DIRTREE_RECURSE|(FLAG(h)?DIRTREE_SYMFOLLOW:0))*!norecurse;
427 }
428 
wsettime(char * s,long long sec)429 static void wsettime(char *s, long long sec)
430 {
431   struct timespec times[2] = {{sec, 0},{sec, 0}};
432 
433   if (utimensat(AT_FDCWD, s, times, AT_SYMLINK_NOFOLLOW))
434     perror_msg("settime %lld %s", sec, s);
435 }
436 
437 // Do pending directory utimes(), NULL to flush all.
dirflush(char * name,int isdir)438 static int dirflush(char *name, int isdir)
439 {
440   char *s = 0, *ss;
441 
442   // Barf if name not in TT.cwd
443   if (name) {
444     if (!(ss = s = xabspath(name, isdir ? ABS_LAST : 0))) {
445       error_msg("'%s' bad symlink", name);
446 
447       return 1;
448     }
449     if (TT.cwd[1] && (!strstart(&ss, TT.cwd) || (*ss && *ss!='/'))) {
450       error_msg("'%s' %s not under '%s'", name, s, TT.cwd);
451       free(s);
452 
453       return 1;
454     }
455 
456     // --restrict means first entry extracted is what everything must be under
457     if (FLAG(restrict)) {
458       free(TT.cwd);
459       TT.cwd = strdup(s);
460       toys.optflags ^= FLAG_restrict;
461     }
462     // use resolved name so trailing / is stripped
463     if (isdir) unlink(s);
464   }
465 
466   // Set deferred utimes() for directories this file isn't under.
467   // (Files must be depth-first ordered in tarball for this to matter.)
468   while (TT.dirs) {
469 
470     // If next file is under (or equal to) this dir, keep waiting
471     if (name && strstart(&ss, ss = s) && (!*ss || *ss=='/')) break;
472 
473     wsettime(TT.dirs->str+sizeof(long long), *(long long *)TT.dirs->str);
474     free(llist_pop(&TT.dirs));
475   }
476   free(s);
477 
478   // name was under TT.cwd
479   return 0;
480 }
481 
482 // write data to file
sendfile_sparse(int fd)483 static void sendfile_sparse(int fd)
484 {
485   long long len, used = 0, sent;
486   int i = 0, j;
487 
488   do {
489     if (TT.sparselen) {
490       // Seek past holes or fill output with zeroes.
491       if (-1 == lseek(fd, len = TT.sparse[i*2], SEEK_SET)) {
492         sent = 0;
493         while (len) {
494           // first/last 512 bytes used, rest left zeroes
495           j = (len>3072) ? 3072 : len;
496           if (j != writeall(fd, toybuf+512, j)) goto error;
497           len -= j;
498         }
499       } else {
500         sent = len;
501         if (!(len = TT.sparse[i*2+1]) && ftruncate(fd, sent+len))
502           perror_msg("ftruncate");
503       }
504       if (len+used>TT.hdr.size) error_exit("sparse overflow");
505     } else len = TT.hdr.size;
506 
507     len -= sendfile_len(TT.fd, fd, len, &sent);
508     used += sent;
509     if (len) {
510 error:
511       if (fd!=1) perror_msg(0);
512       skippy(TT.hdr.size-used);
513 
514       break;
515     }
516   } while (++i<TT.sparselen);
517 
518   close(fd);
519 }
520 
extract_to_disk(void)521 static void extract_to_disk(void)
522 {
523   char *name = TT.hdr.name;
524   int ala = TT.hdr.mode, strip;
525 
526   for (strip = 0; strip < TT.strip_components; strip++) {
527     char *s = strchr(name, '/');
528 
529     if (s && s[1]) name = s+1;
530     else if (S_ISDIR(ala)) return;
531     else break;
532   }
533 
534   if (dirflush(name, S_ISDIR(ala))) {
535     if (S_ISREG(ala) && !TT.hdr.link_target) skippy(TT.hdr.size);
536 
537     return;
538   }
539 
540   // create path before file if necessary
541   if (strrchr(name, '/') && mkpath(name) && errno!=EEXIST)
542       return perror_msg(":%s: can't mkdir", name);
543 
544   // remove old file, if exists
545   if (!FLAG(k) && !S_ISDIR(ala) && rmdir(name) && errno!=ENOENT && unlink(name))
546     return perror_msg("can't remove: %s", name);
547 
548   if (S_ISREG(ala)) {
549     // hardlink?
550     if (TT.hdr.link_target) {
551       if (link(TT.hdr.link_target, name))
552         return perror_msg("can't link '%s' -> '%s'", name, TT.hdr.link_target);
553     // write contents
554     } else {
555       int fd = WARN_ONLY|O_WRONLY|O_CREAT|(FLAG(overwrite) ? O_TRUNC : O_EXCL);
556 
557       if ((fd = xcreate(name, fd, ala&07777)) != -1) sendfile_sparse(fd);
558       else return skippy(TT.hdr.size);
559     }
560   } else if (S_ISDIR(ala)) {
561     if ((mkdir(name, 0700) == -1) && errno != EEXIST)
562       return perror_msg("%s: can't create", name);
563   } else if (S_ISLNK(ala)) {
564     if (symlink(TT.hdr.link_target, name))
565       return perror_msg("can't link '%s' -> '%s'", name, TT.hdr.link_target);
566   } else if (mknod(name, ala, TT.hdr.device))
567     return perror_msg("can't create '%s'", name);
568 
569   // Set ownership
570   if (!FLAG(o) && !geteuid()) {
571     int u = TT.hdr.uid, g = TT.hdr.gid;
572 
573     if (TT.owner) TT.hdr.uid = TT.ouid;
574     else if (!FLAG(numeric_owner) && *TT.hdr.uname) {
575       struct passwd *pw = bufgetpwnamuid(TT.hdr.uname, 0);
576       if (pw && (TT.owner || !FLAG(numeric_owner))) TT.hdr.uid = pw->pw_uid;
577     }
578 
579     if (TT.group) TT.hdr.gid = TT.ggid;
580     else if (!FLAG(numeric_owner) && *TT.hdr.uname) {
581       struct group *gr = bufgetgrnamgid(TT.hdr.gname, 0);
582       if (gr) TT.hdr.gid = gr->gr_gid;
583     }
584 
585     if (lchown(name, u, g)) perror_msg("chown %d:%d '%s'", u, g, name);;
586   }
587 
588   if (!S_ISLNK(ala)) chmod(name, FLAG(p) ? ala : ala&0777);
589 
590   // Apply mtime.
591   if (!FLAG(m)) {
592     if (S_ISDIR(ala)) {
593       struct string_list *sl;
594 
595       // Writing files into a directory changes directory timestamps, so
596       // defer mtime updates until contents written.
597 
598       sl = xmalloc(sizeof(struct string_list)+sizeof(long long)+strlen(name)+1);
599       *(long long *)sl->str = TT.hdr.mtime;
600       strcpy(sl->str+sizeof(long long), name);
601       sl->next = TT.dirs;
602       TT.dirs = sl;
603     } else wsettime(name, TT.hdr.mtime);
604   }
605 }
606 
unpack_tar(char * first)607 static void unpack_tar(char *first)
608 {
609   struct double_list *walk, *delete;
610   struct tar_hdr tar;
611   int i, sefd = -1, and = 0;
612   unsigned maj, min;
613   char *s;
614 
615   for (;;) {
616     if (first) {
617       memcpy(&tar, first, i = 512);
618       first = 0;
619     } else {
620       // align to next block and read it
621       if (TT.hdr.size%512) skippy(512-TT.hdr.size%512);
622       i = readall(TT.fd, &tar, 512);
623     }
624 
625     if (i && i!=512) error_exit("short header");
626 
627     // Two consecutive empty headers ends tar even if there's more data
628     if (!i || !*tar.name) {
629       if (!i || and++) return;
630       TT.hdr.size = 0;
631       continue;
632     }
633     // ensure null temination even of pathological packets
634     tar.padd[0] = and = 0;
635 
636     // Is this a valid TAR header?
637     if (!is_tar_header(&tar)) error_exit("bad header");
638     TT.hdr.size = OTOI(tar.size);
639 
640     // If this header isn't writing something to the filesystem
641     if ((tar.type<'0' || tar.type>'7') && tar.type!='S'
642         && (*tar.magic && tar.type))
643     {
644       // Skip to next record if unknown type or payload > 1 megabyte
645       if (!strchr("KLx", tar.type) || TT.hdr.size>1<<20) skippy(TT.hdr.size);
646       // Read link or long name
647       else if (tar.type != 'x')
648         alloread(tar.type=='K'?&TT.hdr.link_target:&TT.hdr.name, TT.hdr.size);
649       // Loop through 'x' payload records in "LEN NAME=VALUE\n" format
650       else {
651         char *p, *pp, *buf = 0;
652         unsigned i, len, n;
653 
654         alloread(&buf, TT.hdr.size);
655         for (p = buf; (p-buf)<TT.hdr.size; p += len) {
656           i = TT.hdr.size-(p-buf);
657           if (1!=sscanf(p, "%u %n", &len, &n) || len<n+4 || len>i || n>i) {
658             error_msg("bad header");
659             break;
660           }
661           p[len-1] = 0;
662           pp = p+n;
663           // Ignore "RHT." prefix, if any.
664           strstart(&pp, "RHT.");
665           if ((FLAG(selinux) && !(FLAG(t)|FLAG(O)))
666               && strstart(&pp, "security.selinux="))
667           {
668             i = strlen(pp);
669             sefd = xopen("/proc/self/attr/fscreate", O_WRONLY|WARN_ONLY);
670             if (sefd==-1 ||  i!=write(sefd, pp, i))
671               perror_msg("setfscreatecon %s", pp);
672           } else if (strstart(&pp, "path=")) {
673             free(TT.hdr.name);
674             TT.hdr.name = xstrdup(pp);
675             break;
676           }
677         }
678         free(buf);
679       }
680 
681       continue;
682     }
683 
684     // Handle sparse file type
685     TT.sparselen = 0;
686     if (tar.type == 'S') {
687       char sparse[512];
688       int max = 8;
689 
690       // Load 4 pairs of offset/len from S block, plus 21 pairs from each
691       // continuation block, list says where to seek/write sparse file contents
692       s = 386+(char *)&tar;
693       *sparse = i = 0;
694 
695       for (;;) {
696         if (!(TT.sparselen&511))
697           TT.sparse = xrealloc(TT.sparse, (TT.sparselen+512)*sizeof(long long));
698 
699         // If out of data in block check continue flag, stop or load next block
700         if (++i>max || !*s) {
701           if (!(*sparse ? sparse[504] : ((char *)&tar)[482])) break;
702           xreadall(TT.fd, s = sparse, 512);
703           max = 41;
704           i = 0;
705         }
706         // Load next entry
707         TT.sparse[TT.sparselen++] = otoi(s, 12);
708         s += 12;
709       }
710 
711       // Odd number of entries (from corrupted tar) would be dropped here
712       TT.sparselen /= 2;
713       if (TT.sparselen)
714         TT.hdr.ssize = TT.sparse[2*TT.sparselen-1]+TT.sparse[2*TT.sparselen-2];
715     } else TT.hdr.ssize = TT.hdr.size;
716 
717     // At this point, we have something to output. Convert metadata.
718     TT.hdr.mode = OTOI(tar.mode)&0xfff;
719     if (tar.type == 'S' || !tar.type) TT.hdr.mode |= 0x8000;
720     else TT.hdr.mode |= (char []){8,8,10,2,6,4,1,8}[tar.type-'0']<<12;
721     TT.hdr.uid = OTOI(tar.uid);
722     TT.hdr.gid = OTOI(tar.gid);
723     TT.hdr.mtime = OTOI(tar.mtime);
724     maj = OTOI(tar.major);
725     min = OTOI(tar.minor);
726     TT.hdr.device = dev_makedev(maj, min);
727     TT.hdr.uname = xstrndup(TT.owner ? : tar.uname, sizeof(tar.uname));
728     TT.hdr.gname = xstrndup(TT.group ? : tar.gname, sizeof(tar.gname));
729 
730     if (TT.owner) TT.hdr.uid = TT.ouid;
731     else if (!FLAG(numeric_owner)) {
732       struct passwd *pw = bufgetpwnamuid(TT.hdr.uname, 0);
733       if (pw && (TT.owner || !FLAG(numeric_owner))) TT.hdr.uid = pw->pw_uid;
734     }
735 
736     if (TT.group) TT.hdr.gid = TT.ggid;
737     else if (!FLAG(numeric_owner)) {
738       struct group *gr = bufgetgrnamgid(TT.hdr.gname, 0);
739       if (gr) TT.hdr.gid = gr->gr_gid;
740     }
741 
742     if (!TT.hdr.link_target && *tar.link)
743       TT.hdr.link_target = xstrndup(tar.link, sizeof(tar.link));
744     if (!TT.hdr.name) {
745       // Glue prefix and name fields together with / if necessary
746       i = (tar.type=='S') ? 0 : strnlen(tar.prefix, sizeof(tar.prefix));
747       TT.hdr.name = xmprintf("%.*s%s%.*s", i, tar.prefix,
748         (i && tar.prefix[i-1] != '/') ? "/" : "",
749         (int)sizeof(tar.name), tar.name);
750     }
751 
752     // Old broken tar recorded dir as "file with trailing slash"
753     if (S_ISREG(TT.hdr.mode) && (s = strend(TT.hdr.name, "/"))) {
754       *s = 0;
755       TT.hdr.mode = (TT.hdr.mode & ~S_IFMT) | S_IFDIR;
756     }
757 
758     // Non-regular files don't have contents stored in archive.
759     if ((TT.hdr.link_target && *TT.hdr.link_target)
760       || (tar.type && !S_ISREG(TT.hdr.mode)))
761         TT.hdr.size = 0;
762 
763     // Files are seen even if excluded, so check them here.
764     // TT.seen points to first seen entry in TT.incl, or NULL if none yet.
765 
766     if ((delete = filter(TT.incl, TT.hdr.name)) && TT.incl != TT.seen) {
767       if (!TT.seen) TT.seen = delete;
768 
769       // Move seen entry to end of list.
770       if (TT.incl == delete) TT.incl = TT.incl->next;
771       else for (walk = TT.incl; walk != TT.seen; walk = walk->next) {
772         if (walk == delete) {
773           dlist_pop(&walk);
774           dlist_add_nomalloc(&TT.incl, delete);
775         }
776       }
777     }
778 
779     // Skip excluded files
780     if (filter(TT.excl, TT.hdr.name) || (TT.incl && !delete))
781       skippy(TT.hdr.size);
782     else if (FLAG(t)) {
783       if (FLAG(v)) {
784         struct tm *lc = localtime(TT.mtime ? &TT.mtt : &TT.hdr.mtime);
785         char perm[12], gname[12];
786 
787         mode_to_string(TT.hdr.mode, perm);
788         printf("%s", perm);
789         sprintf(perm, "%u", TT.hdr.uid);
790         sprintf(gname, "%u", TT.hdr.gid);
791         printf(" %s/%s ", *TT.hdr.uname ? TT.hdr.uname : perm,
792           *TT.hdr.gname ? TT.hdr.gname : gname);
793         if (tar.type=='3' || tar.type=='4') printf("%u,%u", maj, min);
794         else printf("%9lld", TT.hdr.ssize);
795         sprintf(perm, ":%02d", lc->tm_sec);
796         printf("  %d-%02d-%02d %02d:%02d%s ", 1900+lc->tm_year, 1+lc->tm_mon,
797           lc->tm_mday, lc->tm_hour, lc->tm_min, FLAG(full_time) ? perm : "");
798       }
799       printf("%s", TT.hdr.name);
800       if (TT.hdr.link_target) printf(" -> %s", TT.hdr.link_target);
801       xputc('\n');
802       skippy(TT.hdr.size);
803     } else {
804       if (FLAG(v)) printf("%s\n", TT.hdr.name);
805       if (FLAG(O)) sendfile_sparse(1);
806       else if (FLAG(to_command)) {
807         if (S_ISREG(TT.hdr.mode)) {
808           int fd, pid;
809 
810           xsetenv("TAR_FILETYPE", "f");
811           xsetenv(xmprintf("TAR_MODE=%o", TT.hdr.mode), 0);
812           xsetenv(xmprintf("TAR_SIZE=%lld", TT.hdr.ssize), 0);
813           xsetenv("TAR_FILENAME", TT.hdr.name);
814           xsetenv("TAR_UNAME", TT.hdr.uname);
815           xsetenv("TAR_GNAME", TT.hdr.gname);
816           xsetenv(xmprintf("TAR_MTIME=%llo", (long long)TT.hdr.mtime), 0);
817           xsetenv(xmprintf("TAR_UID=%o", TT.hdr.uid), 0);
818           xsetenv(xmprintf("TAR_GID=%o", TT.hdr.gid), 0);
819 
820           pid = xpopen((char *[]){"sh", "-c", TT.to_command, NULL}, &fd, 0);
821           // todo: short write exits tar here, other skips data.
822           sendfile_sparse(fd);
823           fd = xpclose_both(pid, 0);
824           if (fd) error_msg("%d: Child returned %d", pid, fd);
825         }
826       } else extract_to_disk();
827     }
828 
829     if (sefd != -1) {
830       // zero length write resets fscreate context to default
831       (void)write(sefd, 0, 0);
832       close(sefd);
833       sefd = -1;
834     }
835     free(TT.hdr.name);
836     free(TT.hdr.link_target);
837     free(TT.hdr.uname);
838     free(TT.hdr.gname);
839     TT.hdr.name = TT.hdr.link_target = 0;
840   }
841 }
842 
843 // Add copy of filename (minus trailing \n and /) to dlist **
trim2list(void * list,char * pline)844 static void trim2list(void *list, char *pline)
845 {
846   char *n = xstrdup(pline);
847   int i = strlen(n);
848 
849   dlist_add(list, n);
850   if (i && n[i-1]=='\n') i--;
851   while (i && n[i-1] == '/') i--;
852   n[i] = 0;
853 }
854 
855 // do_lines callback, selects TT.incl or TT.excl based on call order
do_XT(char ** pline,long len)856 static void do_XT(char **pline, long len)
857 {
858   if (pline) trim2list(TT.X ? &TT.excl : &TT.incl, *pline);
859 }
860 
tar_main(void)861 void tar_main(void)
862 {
863   char *s, **args = toys.optargs,
864     *archiver = FLAG(I) ? TT.I : (FLAG(z) ? "gzip" : (FLAG(J) ? "xz":"bzip2"));
865   int len = 0;
866 
867   // Needed when extracting to command
868   signal(SIGPIPE, SIG_IGN);
869 
870   // Get possible early errors out of the way
871   if (!geteuid()) toys.optflags |= FLAG_p;
872   if (TT.owner) {
873     if (!(s = strchr(TT.owner, ':'))) TT.ouid = xgetuid(TT.owner);
874     else {
875       TT.owner = xstrndup(TT.owner, s++-TT.owner);
876       TT.ouid = atolx_range(s, 0, INT_MAX);
877     }
878   }
879   if (TT.group) {
880     if (!(s = strchr(TT.group, ':'))) TT.ggid = xgetgid(TT.group);
881     else {
882       TT.group = xstrndup(TT.group, s++-TT.group);
883       TT.ggid = atolx_range(s, 0, INT_MAX);
884     }
885   }
886   if (TT.mtime) xparsedate(TT.mtime, &TT.mtt, (void *)&s, 1);
887 
888   // Collect file list.
889   for (; TT.exclude; TT.exclude = TT.exclude->next)
890     trim2list(&TT.excl, TT.exclude->arg);
891   for (;TT.X; TT.X = TT.X->next) do_lines(xopenro(TT.X->arg), '\n', do_XT);
892   for (args = toys.optargs; *args; args++) trim2list(&TT.incl, *args);
893   for (;TT.T; TT.T = TT.T->next) do_lines(xopenro(TT.T->arg), '\n', do_XT);
894 
895   // If include file list empty, don't create empty archive
896   if (FLAG(c)) {
897     if (!TT.incl) error_exit("empty archive");
898     TT.fd = 1;
899   }
900 
901   // nommu reentry for nonseekable input skips this, parent did it for us
902   if (toys.stacktop) {
903     if (TT.f && strcmp(TT.f, "-"))
904       TT.fd = xcreate(TT.f, TT.fd*(O_WRONLY|O_CREAT|O_TRUNC), 0666);
905     // Get destination directory
906     if (TT.C) xchdir(TT.C);
907   }
908 
909   // Get destination directory
910   TT.cwd = xabspath(s = xgetcwd(), ABS_PATH);
911   free(s);
912 
913   // Remember archive inode so we don't overwrite it or add it to itself
914   {
915     struct stat st;
916 
917     if (!fstat(TT.fd, &st)) {
918       TT.aino = st.st_ino;
919       TT.adev = st.st_dev;
920     }
921   }
922 
923   // Are we reading?
924   if (FLAG(x)||FLAG(t)) {
925     char *hdr = 0;
926 
927     // autodetect compression type when not specified
928     if (!(FLAG(j)||FLAG(z)||FLAG(I)||FLAG(J))) {
929       len = xread(TT.fd, hdr = toybuf+sizeof(toybuf)-512, 512);
930       if (len!=512 || !is_tar_header(hdr)) {
931         // detect gzip and bzip signatures
932         if (SWAP_BE16(*(short *)hdr)==0x1f8b) toys.optflags |= FLAG_z;
933         else if (!memcmp(hdr, "BZh", 3)) toys.optflags |= FLAG_j;
934         else if (peek_be(hdr, 7) == 0xfd377a585a0000UL) toys.optflags |= FLAG_J;
935         else error_exit("Not tar");
936 
937         // if we can seek back we don't need to loop and copy data
938         if (!lseek(TT.fd, -len, SEEK_CUR)) hdr = 0;
939       }
940     }
941 
942     if (FLAG(j)||FLAG(z)||FLAG(I)||FLAG(J)) {
943       int pipefd[2] = {hdr ? -1 : TT.fd, -1}, i, pid;
944       struct string_list *zcat = FLAG(I) ? 0 : find_in_path(getenv("PATH"),
945         FLAG(j) ? "bzcat" : FLAG(J) ? "xzcat" : "zcat");
946 
947       // Toybox provides more decompressors than compressors, so try them first
948       TT.pid = xpopen_both(zcat ? (char *[]){zcat->str, 0} :
949         (char *[]){archiver, "-d", 0}, pipefd);
950       if (CFG_TOYBOX_FREE) llist_traverse(zcat, free);
951 
952       if (!hdr) {
953         // If we could seek, child gzip inherited fd and we read its output
954         close(TT.fd);
955         TT.fd = pipefd[1];
956 
957       } else {
958 
959         // If we autodetected type but then couldn't lseek to put the data back
960         // we have to loop reading data from TT.fd and pass it to gzip ourselves
961         // (starting with the block of data we read to autodetect).
962 
963         // dirty trick: move gzip input pipe to stdin so child closes spare copy
964         dup2(pipefd[0], 0);
965         if (pipefd[0]) close(pipefd[0]);
966 
967         // Fork a copy of ourselves to handle extraction (reads from zip output
968         // pipe, writes to stdout).
969         pipefd[0] = pipefd[1];
970         pipefd[1] = 1;
971         pid = xpopen_both(0, pipefd);
972         close(pipefd[1]);
973 
974         // loop writing collated data to zip proc
975         xwrite(0, hdr, len);
976         for (;;) {
977           if ((i = read(TT.fd, toybuf, sizeof(toybuf)))<1) {
978             close(0);
979             xwaitpid(pid);
980             return;
981           }
982           xwrite(0, toybuf, i);
983         }
984       }
985     }
986 
987     unpack_tar(hdr);
988     dirflush(0, 0);
989     // Shut up archiver about inability to write all trailing NULs to pipe buf
990     if (TT.pid>0) kill(TT.pid, 9);
991 
992     // Each time a TT.incl entry is seen it's moved to the end of the list,
993     // with TT.seen pointing to first seen list entry. Anything between
994     // TT.incl and TT.seen wasn't encountered in archive..
995     if (TT.seen != TT.incl) {
996       if (!TT.seen) TT.seen = TT.incl;
997       while (TT.incl != TT.seen) {
998         error_msg("'%s' not in archive", TT.incl->data);
999         TT.incl = TT.incl->next;
1000       }
1001     }
1002 
1003   // are we writing? (Don't have to test flag here, one of 3 must be set)
1004   } else {
1005     struct double_list *dl = TT.incl;
1006 
1007     // autodetect compression type based on -f name. (Use > to avoid.)
1008     if (TT.f && !FLAG(j) && !FLAG(z) && !FLAG(I) && !FLAG(J)) {
1009       char *tbz[] = {".tbz", ".tbz2", ".tar.bz", ".tar.bz2"};
1010       if (strend(TT.f, ".tgz") || strend(TT.f, ".tar.gz"))
1011         toys.optflags |= FLAG_z;
1012       if (strend(TT.f, ".txz") || strend(TT.f, ".tar.xz"))
1013         toys.optflags |= FLAG_J;
1014       else for (len = 0; len<ARRAY_LEN(tbz); len++)
1015         if (strend(TT.f, tbz[len])) toys.optflags |= FLAG_j;
1016     }
1017 
1018     if (FLAG(j)||FLAG(z)||FLAG(I)||FLAG(J)) {
1019       int pipefd[2] = {-1, TT.fd};
1020 
1021       xpopen_both((char *[]){archiver, 0}, pipefd);
1022       close(TT.fd);
1023       TT.fd = pipefd[0];
1024     }
1025     do {
1026       TT.warn = 1;
1027       dirtree_flagread(dl->data, FLAG(h) ? DIRTREE_SYMFOLLOW : 0, add_to_tar);
1028     } while (TT.incl != (dl = dl->next));
1029 
1030     writeall(TT.fd, toybuf, 1024);
1031   }
1032   if (toys.exitval) error_msg("had errors");
1033 
1034   if (CFG_TOYBOX_FREE) {
1035     llist_traverse(TT.excl, llist_free_double);
1036     llist_traverse(TT.incl, llist_free_double);
1037     while(TT.hlc) free(TT.hlx[--TT.hlc].arg);
1038     free(TT.hlx);
1039     free(TT.cwd);
1040     close(TT.fd);
1041   }
1042 }
1043