• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* tar.c - create/extract archives
2  *
3  * Copyright 2014 Ashwini Kumar <ak.ashwini81@gmail.com>
4  *
5  * For the command, see
6  *   http://pubs.opengroup.org/onlinepubs/007908799/xcu/tar.html
7  * For the modern file format, see
8  *   http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06
9  *   https://en.wikipedia.org/wiki/Tar_(computing)#File_format
10  *   https://www.gnu.org/software/tar/manual/html_node/Tar-Internals.html
11  *
12  * For writing to external program
13  * http://www.gnu.org/software/tar/manual/html_node/Writing-to-an-External-Program.html
14  *
15  * Toybox will never implement the "pax" command as a matter of policy.
16  *
17  * Why --exclude pattern but no --include? tar cvzf a.tgz dir --include '*.txt'
18  *
19 
20 USE_TAR(NEWTOY(tar, "&(restrict)(full-time)(no-recursion)(numeric-owner)(no-same-permissions)(overwrite)(exclude)*(mode):(mtime):(group):(owner):(to-command):o(no-same-owner)p(same-permissions)k(keep-old)c(create)|h(dereference)x(extract)|t(list)|v(verbose)J(xz)j(bzip2)z(gzip)S(sparse)O(to-stdout)m(touch)X(exclude-from)*T(files-from)*C(directory):f(file):a[!txc][!jzJa]", TOYFLAG_USR|TOYFLAG_BIN))
21 
22 config TAR
23   bool "tar"
24   default y
25   help
26     usage: tar [-cxt] [-fvohmjkOS] [-XTCf NAME] [FILE...]
27 
28     Create, extract, or list files in a .tar (or compressed t?z) file.
29 
30     Options:
31     c  Create                x  Extract               t  Test (list)
32     f  tar FILE (default -)  C  Change to DIR first   v  Verbose display
33     o  Ignore owner          h  Follow symlinks       m  Ignore mtime
34     J  xz compression        j  bzip2 compression     z  gzip compression
35     O  Extract to stdout     X  exclude names in FILE T  include names in FILE
36 
37     --exclude        FILENAME to exclude    --full-time   Show seconds with -tv
38     --mode MODE      Adjust modes           --mtime TIME  Override timestamps
39     --owner NAME     Set file owner to NAME --group NAME  Set file group to NAME
40     --sparse         Record sparse files
41     --restrict       All archive contents must extract under one subdirctory
42     --numeric-owner  Save/use/display uid and gid, not user/group name
43     --no-recursion   Don't store directory contents
44 */
45 
46 #define FOR_tar
47 #include "toys.h"
48 
49 GLOBALS(
50   char *f, *C;
51   struct arg_list *T, *X;
52   char *to_command, *owner, *group, *mtime, *mode;
53   struct arg_list *exclude;
54 
55   struct double_list *incl, *excl, *seen;
56   struct string_list *dirs;
57   char *cwd;
58   int fd, ouid, ggid, hlc, warn, adev, aino, sparselen;
59   long long *sparse;
60   time_t mtt;
61 
62   // hardlinks seen so far (hlc many)
63   struct {
64     char *arg;
65     ino_t ino;
66     dev_t dev;
67   } *hlx;
68 
69   // Parsed information about a tar header.
70   struct tar_header {
71     char *name, *link_target, *uname, *gname;
72     long long size, ssize;
73     uid_t uid;
74     gid_t gid;
75     mode_t mode;
76     time_t mtime;
77     dev_t device;
78   } hdr;
79 )
80 
81 struct tar_hdr {
82   char name[100], mode[8], uid[8], gid[8],size[12], mtime[12], chksum[8],
83        type, link[100], magic[8], uname[32], gname[32], major[8], minor[8],
84        prefix[155], padd[12];
85 };
86 
87 // convert from int to octal (or base-256)
itoo(char * str,int len,unsigned long long val)88 static void itoo(char *str, int len, unsigned long long val)
89 {
90   // Do we need binary encoding?
91   if (!(val>>(3*(len-1)))) sprintf(str, "%0*llo", len-1, val);
92   else {
93     *str = 128;
94     while (--len) *++str = val>>(3*len);
95   }
96 }
97 #define ITOO(x, y) itoo(x, sizeof(x), y)
98 
99 // convert octal (or base-256) to int
otoi(char * str,unsigned len)100 static unsigned long long otoi(char *str, unsigned len)
101 {
102   unsigned long long val = 0;
103 
104   // When tar value too big or octal, use binary encoding with high bit set
105   if (128&*str) while (--len) val = (val<<8)+*++str;
106   else {
107     while (len && *str == ' ') str++;
108     while (len && *str>='0' && *str<='7') val = val*8+*str++-'0', len--;
109     if (len && *str && *str != ' ') error_exit("bad header");
110   }
111 
112   return val;
113 }
114 #define OTOI(x) otoi(x, sizeof(x))
115 
write_longname(char * name,char type)116 static void write_longname(char *name, char type)
117 {
118   struct tar_hdr tmp;
119   int sz = strlen(name) +1;
120 
121   memset(&tmp, 0, sizeof(tmp));
122   strcpy(tmp.name, "././@LongLink");
123   ITOO(tmp.uid, 0);
124   ITOO(tmp.gid, 0);
125   ITOO(tmp.size, sz);
126   ITOO(tmp.mtime, 0);
127   tmp.type = type;
128   strcpy(tmp.magic, "ustar  ");
129 
130   // Historical nonsense to match other implementations. Never used.
131   ITOO(tmp.mode, 0644);
132   strcpy(tmp.uname, "root");
133   strcpy(tmp.gname, "root");
134 
135   // Calculate checksum. Since 512*255 = 0377000 in octal, this can never
136   // use more than 6 digits. The last byte is ' ' for historical reasons.
137   itoo(tmp.chksum, sizeof(tmp.chksum)-1, tar_cksum(&tmp));
138   tmp.chksum[7] = ' ';
139 
140   // write header and name, padded with NUL to block size
141   xwrite(TT.fd, &tmp, 512);
142   xwrite(TT.fd, name, sz);
143   if (sz%512) xwrite(TT.fd, toybuf, 512-(sz%512));
144 }
145 
filter(struct double_list * lst,char * name)146 static struct double_list *filter(struct double_list *lst, char *name)
147 {
148   struct double_list *end = lst;
149 
150   if (lst)
151     // constant is FNM_LEADING_DIR
152     do if (!fnmatch(lst->data, name, 1<<3)) return lst;
153     while (end != (lst = lst->next));
154 
155   return 0;
156 }
157 
skippy(long long len)158 static void skippy(long long len)
159 {
160   if (lskip(TT.fd, len)) perror_exit("EOF");
161 }
162 
163 // allocate and read data from TT.fd
alloread(void * buf,int len)164 static void alloread(void *buf, int len)
165 {
166   // actually void **, but automatic typecasting doesn't work with void ** :(
167   char **b = buf;
168 
169   free(*b);
170   *b = xmalloc(len+1);
171   xreadall(TT.fd, *b, len);
172   (*b)[len] = 0;
173 }
174 
175 // callback from dirtree to create archive
add_to_tar(struct dirtree * node)176 static int add_to_tar(struct dirtree *node)
177 {
178   struct stat *st = &(node->st);
179   struct tar_hdr hdr;
180   struct passwd *pw = pw;
181   struct group *gr = gr;
182   int i, fd =-1;
183   char *name, *lnk, *hname;
184 
185   if (!dirtree_notdotdot(node)) return 0;
186   if (TT.adev == st->st_dev && TT.aino == st->st_ino) {
187     error_msg("'%s' file is the archive; not dumped", node->name);
188     return 0;
189   }
190 
191   i = 1;
192   name = dirtree_path(node, &i);
193 
194   // exclusion defaults to --no-anchored and --wildcards-match-slash
195   for (lnk = name; *lnk;) {
196     if (filter(TT.excl, lnk)) goto done;
197     while (*lnk && *lnk!='/') lnk++;
198     while (*lnk=='/') lnk++;
199   }
200 
201   // Consume the 1 extra byte alocated in dirtree_path()
202   if (S_ISDIR(st->st_mode) && name[i-1] != '/') strcat(name, "/");
203 
204   // remove leading / and any .. entries from saved name
205   for (hname = name; *hname == '/'; hname++);
206   for (lnk = hname;;) {
207     if (!(lnk = strstr(lnk, ".."))) break;
208     if (lnk == hname || lnk[-1] == '/') {
209       if (!lnk[2]) goto done;
210       if (lnk[2]=='/') lnk = hname = lnk+3;
211     } else lnk+= 2;
212   }
213   if (!*hname) goto done;
214 
215   if (TT.warn && hname != name) {
216     fprintf(stderr, "removing leading '%.*s' from member names\n",
217            (int)(hname-name), name);
218     TT.warn = 0;
219   }
220 
221   if (TT.owner) st->st_uid = TT.ouid;
222   if (TT.group) st->st_gid = TT.ggid;
223   if (TT.mode) st->st_mode = string_to_mode(TT.mode, st->st_mode);
224   if (TT.mtime) st->st_mtime = TT.mtt;
225 
226   memset(&hdr, 0, sizeof(hdr));
227   strncpy(hdr.name, hname, sizeof(hdr.name));
228   ITOO(hdr.mode, st->st_mode &07777);
229   ITOO(hdr.uid, st->st_uid);
230   ITOO(hdr.gid, st->st_gid);
231   ITOO(hdr.size, 0); //set size later
232   ITOO(hdr.mtime, st->st_mtime);
233   strcpy(hdr.magic, "ustar  ");
234 
235   // Hard link or symlink? i=0 neither, i=1 hardlink, i=2 symlink
236 
237   // Are there hardlinks to a non-directory entry?
238   if (st->st_nlink>1 && !S_ISDIR(st->st_mode)) {
239     // Have we seen this dev&ino before?
240     for (i = 0; i<TT.hlc; i++) {
241       if (st->st_ino == TT.hlx[i].ino && st->st_dev == TT.hlx[i].dev)
242         break;
243     }
244     if (i != TT.hlc) {
245       lnk = TT.hlx[i].arg;
246       i = 1;
247     } else {
248       // first time we've seen it. Store as normal file, but remember it.
249       if (!(TT.hlc&255)) TT.hlx = xrealloc(TT.hlx, TT.hlc+256);
250       TT.hlx[TT.hlc].arg = xstrdup(hname);
251       TT.hlx[TT.hlc].ino = st->st_ino;
252       TT.hlx[TT.hlc].dev = st->st_dev;
253       TT.hlc++;
254       i = 0;
255     }
256   } else i = 0;
257 
258   // !i because hardlink to a symlink is a thing.
259   if (!i && S_ISLNK(st->st_mode)) {
260     i = 2;
261     lnk = xreadlink(name);
262   }
263 
264   // Handle file types
265   if (i) {
266     hdr.type = '0'+i;
267     if (i==2 && !(lnk = xreadlink(name))) {
268       perror_msg("readlink");
269       goto done;
270     }
271     if (strlen(lnk) > sizeof(hdr.link)) write_longname(lnk, 'K');
272     strncpy(hdr.link, lnk, sizeof(hdr.link));
273     if (i) free(lnk);
274   } else if (S_ISREG(st->st_mode)) {
275     hdr.type = '0';
276     ITOO(hdr.size, st->st_size);
277   } else if (S_ISDIR(st->st_mode)) hdr.type = '5';
278   else if (S_ISFIFO(st->st_mode)) hdr.type = '6';
279   else if (S_ISBLK(st->st_mode) || S_ISCHR(st->st_mode)) {
280     hdr.type = (S_ISCHR(st->st_mode))?'3':'4';
281     ITOO(hdr.major, dev_major(st->st_rdev));
282     ITOO(hdr.minor, dev_minor(st->st_rdev));
283   } else {
284     error_msg("unknown file type '%o'", st->st_mode & S_IFMT);
285     goto done;
286   }
287 
288   if (strlen(hname) > sizeof(hdr.name)) write_longname(hname, 'L');
289 
290   if (!FLAG(numeric_owner)) {
291     if (TT.owner || (pw = bufgetpwuid(st->st_uid)))
292       strncpy(hdr.uname, TT.owner ? TT.owner : pw->pw_name, sizeof(hdr.uname));
293     if (TT.group || (gr = bufgetgrgid(st->st_gid)))
294       strncpy(hdr.gname, TT.group ? TT.group : gr->gr_name, sizeof(hdr.gname));
295   }
296 
297   TT.sparselen = 0;
298   if (hdr.type == '0') {
299     // Before we write the header, make sure we can read the file
300     if ((fd = open(name, O_RDONLY)) < 0) {
301       perror_msg("can't open '%s'", name);
302 
303       return 0;
304     }
305     if (FLAG(S)) {
306       long long lo, ld = 0, len = 0;
307 
308       // Enumerate the extents
309       while ((lo = lseek(fd, ld, SEEK_HOLE)) != -1) {
310         if (!(TT.sparselen&511))
311           TT.sparse = xrealloc(TT.sparse, (TT.sparselen+514)*sizeof(long long));
312         if (ld != lo) {
313           TT.sparse[TT.sparselen++] = ld;
314           len += TT.sparse[TT.sparselen++] = lo-ld;
315         }
316         if (lo == st->st_size || (ld = lseek(fd, lo, SEEK_DATA)) < lo) break;
317       }
318 
319       // If there were extents, change type to S record
320       if (TT.sparselen>2) {
321         TT.sparse[TT.sparselen++] = st->st_size;
322         TT.sparse[TT.sparselen++] = 0;
323         hdr.type = 'S';
324         lnk = (char *)&hdr;
325         for (i = 0; i<TT.sparselen && i<8; i++)
326           itoo(lnk+386+12*i, 12, TT.sparse[i]);
327 
328         // Record if there's overflow records, change length to sparse length,
329         // record apparent length
330         if (TT.sparselen>8) lnk[482] = 1;
331         itoo(lnk+483, 12, st->st_size);
332         ITOO(hdr.size, len);
333       } else TT.sparselen = 0;
334       lseek(fd, 0, SEEK_SET);
335     }
336   }
337 
338   itoo(hdr.chksum, sizeof(hdr.chksum)-1, tar_cksum(&hdr));
339   hdr.chksum[7] = ' ';
340 
341   if (FLAG(v)) dprintf(TT.fd ? 2 : 1, "%s\n", hname);
342 
343   // Write header and data to archive
344   xwrite(TT.fd, &hdr, 512);
345   if (TT.sparselen>8) {
346     char buf[512];
347 
348     // write extent overflow blocks
349     for (i=8;;i++) {
350       int j = (i-8)%42;
351 
352       if (!j || i==TT.sparselen) {
353         if (i!=8) {
354           if (i!=TT.sparselen) buf[504] = 1;
355           xwrite(TT.fd, buf, 512);
356         }
357         if (i==TT.sparselen) break;
358         memset(buf, 0, sizeof(buf));
359       }
360       itoo(buf+12*j, 12, TT.sparse[i]);
361     }
362   }
363   TT.sparselen >>= 1;
364   if (hdr.type == '0' || hdr.type == 'S') {
365     if (hdr.type == '0') xsendfile_pad(fd, TT.fd, st->st_size);
366     else for (i = 0; i<TT.sparselen; i++) {
367       if (TT.sparse[i*2] != lseek(fd, TT.sparse[i*2], SEEK_SET))
368         perror_msg("%s: seek %lld", name, TT.sparse[i*2]);
369       xsendfile_pad(fd, TT.fd, TT.sparse[i*2+1]);
370     }
371     if (st->st_size%512) writeall(TT.fd, toybuf, (512-(st->st_size%512)));
372     close(fd);
373   }
374 done:
375   free(name);
376 
377   return (DIRTREE_RECURSE|(FLAG(h)?DIRTREE_SYMFOLLOW:0))*!FLAG(no_recursion);
378 }
379 
wsettime(char * s,long long sec)380 static void wsettime(char *s, long long sec)
381 {
382   struct timespec times[2] = {{sec, 0},{sec, 0}};
383 
384   if (utimensat(AT_FDCWD, s, times, AT_SYMLINK_NOFOLLOW))
385     perror_msg("settime %lld %s", sec, s);
386 }
387 
388 // Do pending directory utimes(), NULL to flush all.
dirflush(char * name,int isdir)389 static int dirflush(char *name, int isdir)
390 {
391   char *s = 0, *ss;
392 
393   // Barf if name not in TT.cwd
394   if (name) {
395     if (!(ss = s = xabspath(name, -1-isdir))) {
396       error_msg("'%s' bad symlink", name);
397 
398       return 1;
399     }
400     if (TT.cwd[1] && (!strstart(&ss, TT.cwd) || (*ss && *ss!='/'))) {
401       error_msg("'%s' %s not under '%s'", name, s, TT.cwd);
402       free(s);
403 
404       return 1;
405     }
406 
407     // --restrict means first entry extracted is what everything must be under
408     if (FLAG(restrict)) {
409       free(TT.cwd);
410       TT.cwd = strdup(s);
411       toys.optflags ^= FLAG_restrict;
412     }
413     // use resolved name so trailing / is stripped
414     if (isdir) unlink(s);
415   }
416 
417   // Set deferred utimes() for directories this file isn't under.
418   // (Files must be depth-first ordered in tarball for this to matter.)
419   while (TT.dirs) {
420 
421     // If next file is under (or equal to) this dir, keep waiting
422     if (name && strstart(&ss, ss = s) && (!*ss || *ss=='/')) break;
423 
424     wsettime(TT.dirs->str+sizeof(long long), *(long long *)TT.dirs->str);
425     free(llist_pop(&TT.dirs));
426   }
427   free(s);
428 
429   // name was under TT.cwd
430   return 0;
431 }
432 
433 // write data to file
sendfile_sparse(int fd)434 static void sendfile_sparse(int fd)
435 {
436   long long len, used = 0, sent;
437   int i = 0, j;
438 
439   do {
440     if (TT.sparselen) {
441       // Seek past holes or fill output with zeroes.
442       if (-1 == lseek(fd, len = TT.sparse[i*2], SEEK_SET)) {
443         sent = 0;
444         while (len) {
445           // first/last 512 bytes used, rest left zeroes
446           j = (len>3072) ? 3072 : len;
447           if (j != writeall(fd, toybuf+512, j)) goto error;
448           len -= j;
449         }
450       } else {
451         sent = len;
452         if (!(len = TT.sparse[i*2+1]) && ftruncate(fd, sent+len))
453           perror_msg("ftruncate");
454       }
455       if (len+used>TT.hdr.size) error_exit("sparse overflow");
456     } else len = TT.hdr.size;
457 
458     len -= sendfile_len(TT.fd, fd, len, &sent);
459     used += sent;
460     if (len) {
461 error:
462       if (fd!=1) perror_msg(0);
463       skippy(TT.hdr.size-used);
464 
465       break;
466     }
467   } while (++i<TT.sparselen);
468 
469   close(fd);
470 }
471 
extract_to_disk(void)472 static void extract_to_disk(void)
473 {
474   char *name = TT.hdr.name;
475   int ala = TT.hdr.mode;
476 
477   if (dirflush(name, S_ISDIR(ala))) {
478     if (S_ISREG(ala) && !TT.hdr.link_target) skippy(TT.hdr.size);
479 
480     return;
481   }
482 
483   // create path before file if necessary
484   if (strrchr(name, '/') && mkpath(name) && errno!=EEXIST)
485       return perror_msg(":%s: can't mkdir", name);
486 
487   // remove old file, if exists
488   if (!FLAG(k) && !S_ISDIR(ala) && unlink(name) && errno!=ENOENT)
489     return perror_msg("can't remove: %s", name);
490 
491   if (S_ISREG(ala)) {
492     // hardlink?
493     if (TT.hdr.link_target) {
494       if (link(TT.hdr.link_target, name))
495         return perror_msg("can't link '%s' -> '%s'", name, TT.hdr.link_target);
496     // write contents
497     } else {
498       int fd = xcreate(name, O_WRONLY|O_CREAT|(FLAG(overwrite)?O_TRUNC:O_EXCL),
499         WARN_ONLY|(ala & 07777));
500       if (fd != -1) sendfile_sparse(fd);
501       else skippy(TT.hdr.size);
502     }
503   } else if (S_ISDIR(ala)) {
504     if ((mkdir(name, 0700) == -1) && errno != EEXIST)
505       return perror_msg("%s: can't create", TT.hdr.name);
506   } else if (S_ISLNK(ala)) {
507     if (symlink(TT.hdr.link_target, TT.hdr.name))
508       return perror_msg("can't link '%s' -> '%s'", name, TT.hdr.link_target);
509   } else if (mknod(name, ala, TT.hdr.device))
510     return perror_msg("can't create '%s'", name);
511 
512   // Set ownership
513   if (!FLAG(o) && !geteuid()) {
514     int u = TT.hdr.uid, g = TT.hdr.gid;
515 
516     if (TT.owner) TT.hdr.uid = TT.ouid;
517     else if (!FLAG(numeric_owner) && *TT.hdr.uname) {
518       struct passwd *pw = getpwnam(TT.hdr.uname);
519       if (pw && (TT.owner || !FLAG(numeric_owner))) TT.hdr.uid = pw->pw_uid;
520     }
521 
522     if (TT.group) TT.hdr.gid = TT.ggid;
523     else if (!FLAG(numeric_owner) && *TT.hdr.uname) {
524       struct group *gr = getgrnam(TT.hdr.gname);
525       if (gr) TT.hdr.gid = gr->gr_gid;
526     }
527 
528     if (lchown(name, u, g)) perror_msg("chown %d:%d '%s'", u, g, name);;
529   }
530 
531   if (!S_ISLNK(ala)) chmod(TT.hdr.name, FLAG(p) ? ala : ala&0777);
532 
533   // Apply mtime.
534   if (!FLAG(m)) {
535     if (S_ISDIR(ala)) {
536       struct string_list *sl;
537 
538       // Writing files into a directory changes directory timestamps, so
539       // defer mtime updates until contents written.
540 
541       sl = xmalloc(sizeof(struct string_list)+sizeof(long long)+strlen(name)+1);
542       *(long long *)sl->str = TT.hdr.mtime;
543       strcpy(sl->str+sizeof(long long), name);
544       sl->next = TT.dirs;
545       TT.dirs = sl;
546     } else wsettime(TT.hdr.name, TT.hdr.mtime);
547   }
548 }
549 
unpack_tar(char * first)550 static void unpack_tar(char *first)
551 {
552   struct double_list *walk, *delete;
553   struct tar_hdr tar;
554   int i, and = 0;
555   unsigned maj, min;
556   char *s;
557 
558   for (;;) {
559     if (first) {
560       memcpy(&tar, first, i = 512);
561       first = 0;
562     } else {
563       // align to next block and read it
564       if (TT.hdr.size%512) skippy(512-TT.hdr.size%512);
565       i = readall(TT.fd, &tar, 512);
566     }
567 
568     if (i && i!=512) error_exit("short header");
569 
570     // Two consecutive empty headers ends tar even if there's more data
571     if (!i || !*tar.name) {
572       if (!i || and++) return;
573       TT.hdr.size = 0;
574       continue;
575     }
576     // ensure null temination even of pathological packets
577     tar.padd[0] = and = 0;
578 
579     // Is this a valid TAR header?
580     if (!is_tar_header(&tar)) error_exit("bad header");
581     TT.hdr.size = OTOI(tar.size);
582 
583     // If this header isn't writing something to the filesystem
584     if ((tar.type<'0' || tar.type>'7') && tar.type!='S'
585         && (*tar.magic && tar.type))
586     {
587       // Long name extension header?
588       if (tar.type == 'K') alloread(&TT.hdr.link_target, TT.hdr.size);
589       else if (tar.type == 'L') alloread(&TT.hdr.name, TT.hdr.size);
590       else if (tar.type == 'x') {
591         char *p, *buf = 0;
592         int i, len, n;
593 
594         // Posix extended record "LEN NAME=VALUE\n" format
595         alloread(&buf, TT.hdr.size);
596         for (p = buf; (p-buf)<TT.hdr.size; p += len) {
597           i = sscanf(p, "%u path=%n", &len, &n);
598           if (i<1 || len<4 || len>TT.hdr.size) {
599             error_msg("bad header");
600             break;
601           }
602           p[len-1] = 0;
603           if (i == 2) {
604             TT.hdr.name = xstrdup(p+n);
605             break;
606           }
607         }
608         free(buf);
609 
610       // Ignore everything else.
611       } else skippy(TT.hdr.size);
612 
613       continue;
614     }
615 
616     // Handle sparse file type
617     if (tar.type == 'S') {
618       char sparse[512];
619       int max = 8;
620 
621       // Load 4 pairs of offset/len from S block, plus 21 pairs from each
622       // continuation block, list says where to seek/write sparse file contents
623       TT.sparselen = 0;
624       s = 386+(char *)&tar;
625       *sparse = i = 0;
626 
627       for (;;) {
628         if (!(TT.sparselen&511))
629           TT.sparse = xrealloc(TT.sparse, (TT.sparselen+512)*sizeof(long long));
630 
631         // If out of data in block check continue flag, stop or load next block
632         if (++i>max || !*s) {
633           if (!(*sparse ? sparse[504] : ((char *)&tar)[482])) break;
634           xreadall(TT.fd, s = sparse, 512);
635           max = 41;
636           i = 0;
637         }
638         // Load next entry
639         TT.sparse[TT.sparselen++] = otoi(s, 12);
640         s += 12;
641       }
642 
643       // Odd number of entries (from corrupted tar) would be dropped here
644       TT.sparselen /= 2;
645       if (TT.sparselen)
646         TT.hdr.ssize = TT.sparse[2*TT.sparselen-1]+TT.sparse[2*TT.sparselen-2];
647     } else {
648       TT.sparselen = 0;
649       TT.hdr.ssize = TT.hdr.size;
650     }
651 
652     // At this point, we have something to output. Convert metadata.
653     TT.hdr.mode = OTOI(tar.mode)&0xfff;
654     if (tar.type == 'S' || !tar.type) TT.hdr.mode |= 0x8000;
655     else TT.hdr.mode |= (char []){8,8,10,2,6,4,1,8}[tar.type-'0']<<12;
656     TT.hdr.uid = OTOI(tar.uid);
657     TT.hdr.gid = OTOI(tar.gid);
658     TT.hdr.mtime = OTOI(tar.mtime);
659     maj = OTOI(tar.major);
660     min = OTOI(tar.minor);
661     TT.hdr.device = dev_makedev(maj, min);
662     TT.hdr.uname = xstrndup(TT.owner ? TT.owner : tar.uname, sizeof(tar.uname));
663     TT.hdr.gname = xstrndup(TT.group ? TT.group : tar.gname, sizeof(tar.gname));
664 
665     if (TT.owner) TT.hdr.uid = TT.ouid;
666     else if (!FLAG(numeric_owner)) {
667       struct passwd *pw = getpwnam(TT.hdr.uname);
668       if (pw && (TT.owner || !FLAG(numeric_owner))) TT.hdr.uid = pw->pw_uid;
669     }
670 
671     if (TT.group) TT.hdr.gid = TT.ggid;
672     else if (!FLAG(numeric_owner)) {
673       struct group *gr = getgrnam(TT.hdr.gname);
674       if (gr) TT.hdr.gid = gr->gr_gid;
675     }
676 
677     if (!TT.hdr.link_target && *tar.link)
678       TT.hdr.link_target = xstrndup(tar.link, sizeof(tar.link));
679     if (!TT.hdr.name) {
680       // Glue prefix and name fields together with / if necessary
681       i = (tar.type=='S') ? 0 : strnlen(tar.prefix, sizeof(tar.prefix));
682       TT.hdr.name = xmprintf("%.*s%s%.*s", i, tar.prefix,
683         (i && tar.prefix[i-1] != '/') ? "/" : "",
684         (int)sizeof(tar.name), tar.name);
685     }
686 
687     // Old broken tar recorded dir as "file with trailing slash"
688     if (S_ISREG(TT.hdr.mode) && (s = strend(TT.hdr.name, "/"))) {
689       *s = 0;
690       TT.hdr.mode = (TT.hdr.mode & ~S_IFMT) | S_IFDIR;
691     }
692 
693     // Non-regular files don't have contents stored in archive.
694     if ((TT.hdr.link_target && *TT.hdr.link_target)
695       || (tar.type && !S_ISREG(TT.hdr.mode)))
696         TT.hdr.size = 0;
697 
698     // Files are seen even if excluded, so check them here.
699     // TT.seen points to first seen entry in TT.incl, or NULL if none yet.
700 
701     if ((delete = filter(TT.incl, TT.hdr.name)) && TT.incl != TT.seen) {
702       if (!TT.seen) TT.seen = delete;
703 
704       // Move seen entry to end of list.
705       if (TT.incl == delete) TT.incl = TT.incl->next;
706       else for (walk = TT.incl; walk != TT.seen; walk = walk->next) {
707         if (walk == delete) {
708           dlist_pop(&walk);
709           dlist_add_nomalloc(&TT.incl, delete);
710         }
711       }
712     }
713 
714     // Skip excluded files
715     if (filter(TT.excl, TT.hdr.name) || (TT.incl && !delete))
716       skippy(TT.hdr.size);
717     else if (FLAG(t)) {
718       if (FLAG(v)) {
719         struct tm *lc = localtime(TT.mtime ? &TT.mtt : &TT.hdr.mtime);
720         char perm[12], gname[12];
721 
722         mode_to_string(TT.hdr.mode, perm);
723         printf("%s", perm);
724         sprintf(perm, "%u", TT.hdr.uid);
725         sprintf(gname, "%u", TT.hdr.gid);
726         printf(" %s/%s ", *TT.hdr.uname ? TT.hdr.uname : perm,
727           *TT.hdr.gname ? TT.hdr.gname : gname);
728         if (tar.type=='3' || tar.type=='4') printf("%u,%u", maj, min);
729         else printf("%9lld", TT.hdr.ssize);
730         sprintf(perm, ":%02d", lc->tm_sec);
731         printf("  %d-%02d-%02d %02d:%02d%s ", 1900+lc->tm_year, 1+lc->tm_mon,
732           lc->tm_mday, lc->tm_hour, lc->tm_min, FLAG(full_time) ? perm : "");
733       }
734       printf("%s", TT.hdr.name);
735       if (TT.hdr.link_target) printf(" -> %s", TT.hdr.link_target);
736       xputc('\n');
737       skippy(TT.hdr.size);
738     } else {
739       if (FLAG(v)) printf("%s\n", TT.hdr.name);
740       if (FLAG(O)) sendfile_sparse(1);
741       else if (FLAG(to_command)) {
742         if (S_ISREG(TT.hdr.mode)) {
743           int fd, pid;
744 
745           xsetenv("TAR_FILETYPE", "f");
746           xsetenv(xmprintf("TAR_MODE=%o", TT.hdr.mode), 0);
747           xsetenv(xmprintf("TAR_SIZE=%lld", TT.hdr.ssize), 0);
748           xsetenv("TAR_FILENAME", TT.hdr.name);
749           xsetenv("TAR_UNAME", TT.hdr.uname);
750           xsetenv("TAR_GNAME", TT.hdr.gname);
751           xsetenv(xmprintf("TAR_MTIME=%llo", (long long)TT.hdr.mtime), 0);
752           xsetenv(xmprintf("TAR_UID=%o", TT.hdr.uid), 0);
753           xsetenv(xmprintf("TAR_GID=%o", TT.hdr.gid), 0);
754 
755           pid = xpopen((char *[]){"sh", "-c", TT.to_command, NULL}, &fd, 0);
756           // todo: short write exits tar here, other skips data.
757           sendfile_sparse(fd);
758           fd = xpclose_both(pid, 0);
759           if (fd) error_msg("%d: Child returned %d", pid, fd);
760         }
761       } else extract_to_disk();
762     }
763 
764     free(TT.hdr.name);
765     free(TT.hdr.link_target);
766     free(TT.hdr.uname);
767     free(TT.hdr.gname);
768     TT.hdr.name = TT.hdr.link_target = 0;
769   }
770 }
771 
772 // Add copy of filename (minus trailing \n and /) to dlist **
trim2list(void * list,char * pline)773 static void trim2list(void *list, char *pline)
774 {
775   char *n = xstrdup(pline);
776   int i = strlen(n);
777 
778   dlist_add(list, n);
779   if (i && n[i-1]=='\n') i--;
780   while (i && n[i-1] == '/') i--;
781   n[i] = 0;
782 }
783 
784 // do_lines callback, selects TT.incl or TT.excl based on call order
do_XT(char ** pline,long len)785 static void do_XT(char **pline, long len)
786 {
787   if (pline) trim2list(TT.X ? &TT.excl : &TT.incl, *pline);
788 }
789 
tar_main(void)790 void tar_main(void)
791 {
792   char *s, **args = toys.optargs,
793     *archiver = FLAG(z) ? "gzip" : (FLAG(J) ? "xz" : "bzip2");
794   int len = 0;
795 
796   // Needed when extracting to command
797   signal(SIGPIPE, SIG_IGN);
798 
799   // Get possible early errors out of the way
800   if (!geteuid()) toys.optflags |= FLAG_p;
801   if (TT.owner) TT.ouid = xgetuid(TT.owner);
802   if (TT.group) TT.ggid = xgetgid(TT.group);
803   if (TT.mtime) xparsedate(TT.mtime, &TT.mtt, (void *)&s, 1);
804 
805   // Collect file list.
806   for (; TT.exclude; TT.exclude = TT.exclude->next)
807     trim2list(&TT.excl, TT.exclude->arg);
808   for (;TT.X; TT.X = TT.X->next) do_lines(xopenro(TT.X->arg), '\n', do_XT);
809   for (args = toys.optargs; *args; args++) trim2list(&TT.incl, *args);
810   for (;TT.T; TT.T = TT.T->next) do_lines(xopenro(TT.T->arg), '\n', do_XT);
811 
812   // If include file list empty, don't create empty archive
813   if (FLAG(c)) {
814     if (!TT.incl) error_exit("empty archive");
815     TT.fd = 1;
816   }
817 
818   // nommu reentry for nonseekable input skips this, parent did it for us
819   if (toys.stacktop) {
820     if (TT.f && strcmp(TT.f, "-"))
821       TT.fd = xcreate(TT.f, TT.fd*(O_WRONLY|O_CREAT|O_TRUNC), 0666);
822     // Get destination directory
823     if (TT.C) xchdir(TT.C);
824   }
825 
826   // Get destination directory
827   TT.cwd = xabspath(s = xgetcwd(), 1);
828   free(s);
829 
830   // Remember archive inode so we don't overwrite it or add it to itself
831   {
832     struct stat st;
833 
834     if (!fstat(TT.fd, &st)) {
835       TT.aino = st.st_ino;
836       TT.adev = st.st_dev;
837     }
838   }
839 
840   // Are we reading?
841   if (FLAG(x)||FLAG(t)) {
842     char *hdr = 0;
843 
844     // autodetect compression type when not specified
845     if (!(FLAG(j)||FLAG(z)||FLAG(J))) {
846       len = xread(TT.fd, hdr = toybuf+sizeof(toybuf)-512, 512);
847       if (len!=512 || !is_tar_header(hdr)) {
848         // detect gzip and bzip signatures
849         if (SWAP_BE16(*(short *)hdr)==0x1f8b) toys.optflags |= FLAG_z;
850         else if (!memcmp(hdr, "BZh", 3)) toys.optflags |= FLAG_j;
851         else if (peek_be(hdr, 7) == 0xfd377a585a0000UL) toys.optflags |= FLAG_J;
852         else error_exit("Not tar");
853 
854         // if we can seek back we don't need to loop and copy data
855         if (!lseek(TT.fd, -len, SEEK_CUR)) hdr = 0;
856       }
857     }
858 
859     if (FLAG(j)||FLAG(z)||FLAG(J)) {
860       int pipefd[2] = {hdr ? -1 : TT.fd, -1}, i, pid;
861       struct string_list *zcat = find_in_path(getenv("PATH"),
862         FLAG(j) ? "bzcat" : FLAG(J) ? "xzcat" : "zcat");
863 
864       // Toybox provides more decompressors than compressors, so try them first
865       xpopen_both(zcat ? (char *[]){zcat->str, 0} :
866         (char *[]){archiver, "-dc", 0}, pipefd);
867       if (CFG_TOYBOX_FREE) llist_traverse(zcat, free);
868 
869       if (!hdr) {
870         // If we could seek, child gzip inherited fd and we read its output
871         close(TT.fd);
872         TT.fd = pipefd[1];
873 
874       } else {
875 
876         // If we autodetected type but then couldn't lseek to put the data back
877         // we have to loop reading data from TT.fd and pass it to gzip ourselves
878         // (starting with the block of data we read to autodetect).
879 
880         // dirty trick: move gzip input pipe to stdin so child closes spare copy
881         dup2(pipefd[0], 0);
882         if (pipefd[0]) close(pipefd[0]);
883 
884         // Fork a copy of ourselves to handle extraction (reads from zip output
885         // pipe, writes to stdout).
886         pipefd[0] = pipefd[1];
887         pipefd[1] = 1;
888         pid = xpopen_both(0, pipefd);
889         close(pipefd[1]);
890 
891         // loop writing collated data to zip proc
892         xwrite(0, hdr, len);
893         for (;;) {
894           if ((i = read(TT.fd, toybuf, sizeof(toybuf)))<1) {
895             close(0);
896             xwaitpid(pid);
897             return;
898           }
899           xwrite(0, toybuf, i);
900         }
901       }
902     }
903 
904     unpack_tar(hdr);
905     dirflush(0, 0);
906 
907     // Each time a TT.incl entry is seen it's moved to the end of the list,
908     // with TT.seen pointing to first seen list entry. Anything between
909     // TT.incl and TT.seen wasn't encountered in archive..
910     if (TT.seen != TT.incl) {
911       if (!TT.seen) TT.seen = TT.incl;
912       while (TT.incl != TT.seen) {
913         error_msg("'%s' not in archive", TT.incl->data);
914         TT.incl = TT.incl->next;
915       }
916     }
917 
918   // are we writing? (Don't have to test flag here, one of 3 must be set)
919   } else {
920     struct double_list *dl = TT.incl;
921 
922     // autodetect compression type based on -f name. (Use > to avoid.)
923     if (TT.f && !FLAG(j) && !FLAG(z)) {
924       char *tbz[] = {".tbz", ".tbz2", ".tar.bz", ".tar.bz2"};
925       if (strend(TT.f, ".tgz") || strend(TT.f, ".tar.gz"))
926         toys.optflags |= FLAG_z;
927       if (strend(TT.f, ".txz") || strend(TT.f, ".tar.xz"))
928         toys.optflags |= FLAG_J;
929       else for (len = 0; len<ARRAY_LEN(tbz); len++)
930         if (strend(TT.f, tbz[len])) toys.optflags |= FLAG_j;
931     }
932 
933     if (FLAG(j)||FLAG(z)||FLAG(J)) {
934       int pipefd[2] = {-1, TT.fd};
935 
936       xpopen_both((char *[]){archiver, "-f", 0}, pipefd);
937       close(TT.fd);
938       TT.fd = pipefd[0];
939     }
940     do {
941       TT.warn = 1;
942       dirtree_flagread(dl->data, FLAG(h)?DIRTREE_SYMFOLLOW:0, add_to_tar);
943     } while (TT.incl != (dl = dl->next));
944 
945     writeall(TT.fd, toybuf, 1024);
946   }
947 
948   if (CFG_TOYBOX_FREE) {
949     llist_traverse(TT.excl, llist_free_double);
950     llist_traverse(TT.incl, llist_free_double);
951     while(TT.hlc) free(TT.hlx[--TT.hlc].arg);
952     free(TT.hlx);
953     free(TT.cwd);
954     close(TT.fd);
955   }
956 }
957