• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1'use strict'
2
3// this[BUFFER] is the remainder of a chunk if we're waiting for
4// the full 512 bytes of a header to come in.  We will Buffer.concat()
5// it to the next write(), which is a mem copy, but a small one.
6//
7// this[QUEUE] is a Yallist of entries that haven't been emitted
8// yet this can only get filled up if the user keeps write()ing after
9// a write() returns false, or does a write() with more than one entry
10//
11// We don't buffer chunks, we always parse them and either create an
12// entry, or push it into the active entry.  The ReadEntry class knows
13// to throw data away if .ignore=true
14//
15// Shift entry off the buffer when it emits 'end', and emit 'entry' for
16// the next one in the list.
17//
18// At any time, we're pushing body chunks into the entry at WRITEENTRY,
19// and waiting for 'end' on the entry at READENTRY
20//
21// ignored entries get .resume() called on them straight away
22
23const warner = require('./warn-mixin.js')
24const Header = require('./header.js')
25const EE = require('events')
26const Yallist = require('yallist')
27const maxMetaEntrySize = 1024 * 1024
28const Entry = require('./read-entry.js')
29const Pax = require('./pax.js')
30const zlib = require('minizlib')
31const { nextTick } = require('process')
32
33const gzipHeader = Buffer.from([0x1f, 0x8b])
34const STATE = Symbol('state')
35const WRITEENTRY = Symbol('writeEntry')
36const READENTRY = Symbol('readEntry')
37const NEXTENTRY = Symbol('nextEntry')
38const PROCESSENTRY = Symbol('processEntry')
39const EX = Symbol('extendedHeader')
40const GEX = Symbol('globalExtendedHeader')
41const META = Symbol('meta')
42const EMITMETA = Symbol('emitMeta')
43const BUFFER = Symbol('buffer')
44const QUEUE = Symbol('queue')
45const ENDED = Symbol('ended')
46const EMITTEDEND = Symbol('emittedEnd')
47const EMIT = Symbol('emit')
48const UNZIP = Symbol('unzip')
49const CONSUMECHUNK = Symbol('consumeChunk')
50const CONSUMECHUNKSUB = Symbol('consumeChunkSub')
51const CONSUMEBODY = Symbol('consumeBody')
52const CONSUMEMETA = Symbol('consumeMeta')
53const CONSUMEHEADER = Symbol('consumeHeader')
54const CONSUMING = Symbol('consuming')
55const BUFFERCONCAT = Symbol('bufferConcat')
56const MAYBEEND = Symbol('maybeEnd')
57const WRITING = Symbol('writing')
58const ABORTED = Symbol('aborted')
59const DONE = Symbol('onDone')
60const SAW_VALID_ENTRY = Symbol('sawValidEntry')
61const SAW_NULL_BLOCK = Symbol('sawNullBlock')
62const SAW_EOF = Symbol('sawEOF')
63const CLOSESTREAM = Symbol('closeStream')
64
65const noop = _ => true
66
67module.exports = warner(class Parser extends EE {
68  constructor (opt) {
69    opt = opt || {}
70    super(opt)
71
72    this.file = opt.file || ''
73
74    // set to boolean false when an entry starts.  1024 bytes of \0
75    // is technically a valid tarball, albeit a boring one.
76    this[SAW_VALID_ENTRY] = null
77
78    // these BADARCHIVE errors can't be detected early. listen on DONE.
79    this.on(DONE, _ => {
80      if (this[STATE] === 'begin' || this[SAW_VALID_ENTRY] === false) {
81        // either less than 1 block of data, or all entries were invalid.
82        // Either way, probably not even a tarball.
83        this.warn('TAR_BAD_ARCHIVE', 'Unrecognized archive format')
84      }
85    })
86
87    if (opt.ondone) {
88      this.on(DONE, opt.ondone)
89    } else {
90      this.on(DONE, _ => {
91        this.emit('prefinish')
92        this.emit('finish')
93        this.emit('end')
94      })
95    }
96
97    this.strict = !!opt.strict
98    this.maxMetaEntrySize = opt.maxMetaEntrySize || maxMetaEntrySize
99    this.filter = typeof opt.filter === 'function' ? opt.filter : noop
100    // Unlike gzip, brotli doesn't have any magic bytes to identify it
101    // Users need to explicitly tell us they're extracting a brotli file
102    // Or we infer from the file extension
103    const isTBR = (opt.file && (
104        opt.file.endsWith('.tar.br') || opt.file.endsWith('.tbr')))
105    // if it's a tbr file it MIGHT be brotli, but we don't know until
106    // we look at it and verify it's not a valid tar file.
107    this.brotli = !opt.gzip && opt.brotli !== undefined ? opt.brotli
108      : isTBR ? undefined
109      : false
110
111    // have to set this so that streams are ok piping into it
112    this.writable = true
113    this.readable = false
114
115    this[QUEUE] = new Yallist()
116    this[BUFFER] = null
117    this[READENTRY] = null
118    this[WRITEENTRY] = null
119    this[STATE] = 'begin'
120    this[META] = ''
121    this[EX] = null
122    this[GEX] = null
123    this[ENDED] = false
124    this[UNZIP] = null
125    this[ABORTED] = false
126    this[SAW_NULL_BLOCK] = false
127    this[SAW_EOF] = false
128
129    this.on('end', () => this[CLOSESTREAM]())
130
131    if (typeof opt.onwarn === 'function') {
132      this.on('warn', opt.onwarn)
133    }
134    if (typeof opt.onentry === 'function') {
135      this.on('entry', opt.onentry)
136    }
137  }
138
139  [CONSUMEHEADER] (chunk, position) {
140    if (this[SAW_VALID_ENTRY] === null) {
141      this[SAW_VALID_ENTRY] = false
142    }
143    let header
144    try {
145      header = new Header(chunk, position, this[EX], this[GEX])
146    } catch (er) {
147      return this.warn('TAR_ENTRY_INVALID', er)
148    }
149
150    if (header.nullBlock) {
151      if (this[SAW_NULL_BLOCK]) {
152        this[SAW_EOF] = true
153        // ending an archive with no entries.  pointless, but legal.
154        if (this[STATE] === 'begin') {
155          this[STATE] = 'header'
156        }
157        this[EMIT]('eof')
158      } else {
159        this[SAW_NULL_BLOCK] = true
160        this[EMIT]('nullBlock')
161      }
162    } else {
163      this[SAW_NULL_BLOCK] = false
164      if (!header.cksumValid) {
165        this.warn('TAR_ENTRY_INVALID', 'checksum failure', { header })
166      } else if (!header.path) {
167        this.warn('TAR_ENTRY_INVALID', 'path is required', { header })
168      } else {
169        const type = header.type
170        if (/^(Symbolic)?Link$/.test(type) && !header.linkpath) {
171          this.warn('TAR_ENTRY_INVALID', 'linkpath required', { header })
172        } else if (!/^(Symbolic)?Link$/.test(type) && header.linkpath) {
173          this.warn('TAR_ENTRY_INVALID', 'linkpath forbidden', { header })
174        } else {
175          const entry = this[WRITEENTRY] = new Entry(header, this[EX], this[GEX])
176
177          // we do this for meta & ignored entries as well, because they
178          // are still valid tar, or else we wouldn't know to ignore them
179          if (!this[SAW_VALID_ENTRY]) {
180            if (entry.remain) {
181              // this might be the one!
182              const onend = () => {
183                if (!entry.invalid) {
184                  this[SAW_VALID_ENTRY] = true
185                }
186              }
187              entry.on('end', onend)
188            } else {
189              this[SAW_VALID_ENTRY] = true
190            }
191          }
192
193          if (entry.meta) {
194            if (entry.size > this.maxMetaEntrySize) {
195              entry.ignore = true
196              this[EMIT]('ignoredEntry', entry)
197              this[STATE] = 'ignore'
198              entry.resume()
199            } else if (entry.size > 0) {
200              this[META] = ''
201              entry.on('data', c => this[META] += c)
202              this[STATE] = 'meta'
203            }
204          } else {
205            this[EX] = null
206            entry.ignore = entry.ignore || !this.filter(entry.path, entry)
207
208            if (entry.ignore) {
209              // probably valid, just not something we care about
210              this[EMIT]('ignoredEntry', entry)
211              this[STATE] = entry.remain ? 'ignore' : 'header'
212              entry.resume()
213            } else {
214              if (entry.remain) {
215                this[STATE] = 'body'
216              } else {
217                this[STATE] = 'header'
218                entry.end()
219              }
220
221              if (!this[READENTRY]) {
222                this[QUEUE].push(entry)
223                this[NEXTENTRY]()
224              } else {
225                this[QUEUE].push(entry)
226              }
227            }
228          }
229        }
230      }
231    }
232  }
233
234  [CLOSESTREAM] () {
235    nextTick(() => this.emit('close'))
236  }
237
238  [PROCESSENTRY] (entry) {
239    let go = true
240
241    if (!entry) {
242      this[READENTRY] = null
243      go = false
244    } else if (Array.isArray(entry)) {
245      this.emit.apply(this, entry)
246    } else {
247      this[READENTRY] = entry
248      this.emit('entry', entry)
249      if (!entry.emittedEnd) {
250        entry.on('end', _ => this[NEXTENTRY]())
251        go = false
252      }
253    }
254
255    return go
256  }
257
258  [NEXTENTRY] () {
259    do {} while (this[PROCESSENTRY](this[QUEUE].shift()))
260
261    if (!this[QUEUE].length) {
262      // At this point, there's nothing in the queue, but we may have an
263      // entry which is being consumed (readEntry).
264      // If we don't, then we definitely can handle more data.
265      // If we do, and either it's flowing, or it has never had any data
266      // written to it, then it needs more.
267      // The only other possibility is that it has returned false from a
268      // write() call, so we wait for the next drain to continue.
269      const re = this[READENTRY]
270      const drainNow = !re || re.flowing || re.size === re.remain
271      if (drainNow) {
272        if (!this[WRITING]) {
273          this.emit('drain')
274        }
275      } else {
276        re.once('drain', _ => this.emit('drain'))
277      }
278    }
279  }
280
281  [CONSUMEBODY] (chunk, position) {
282    // write up to but no  more than writeEntry.blockRemain
283    const entry = this[WRITEENTRY]
284    const br = entry.blockRemain
285    const c = (br >= chunk.length && position === 0) ? chunk
286      : chunk.slice(position, position + br)
287
288    entry.write(c)
289
290    if (!entry.blockRemain) {
291      this[STATE] = 'header'
292      this[WRITEENTRY] = null
293      entry.end()
294    }
295
296    return c.length
297  }
298
299  [CONSUMEMETA] (chunk, position) {
300    const entry = this[WRITEENTRY]
301    const ret = this[CONSUMEBODY](chunk, position)
302
303    // if we finished, then the entry is reset
304    if (!this[WRITEENTRY]) {
305      this[EMITMETA](entry)
306    }
307
308    return ret
309  }
310
311  [EMIT] (ev, data, extra) {
312    if (!this[QUEUE].length && !this[READENTRY]) {
313      this.emit(ev, data, extra)
314    } else {
315      this[QUEUE].push([ev, data, extra])
316    }
317  }
318
319  [EMITMETA] (entry) {
320    this[EMIT]('meta', this[META])
321    switch (entry.type) {
322      case 'ExtendedHeader':
323      case 'OldExtendedHeader':
324        this[EX] = Pax.parse(this[META], this[EX], false)
325        break
326
327      case 'GlobalExtendedHeader':
328        this[GEX] = Pax.parse(this[META], this[GEX], true)
329        break
330
331      case 'NextFileHasLongPath':
332      case 'OldGnuLongPath':
333        this[EX] = this[EX] || Object.create(null)
334        this[EX].path = this[META].replace(/\0.*/, '')
335        break
336
337      case 'NextFileHasLongLinkpath':
338        this[EX] = this[EX] || Object.create(null)
339        this[EX].linkpath = this[META].replace(/\0.*/, '')
340        break
341
342      /* istanbul ignore next */
343      default: throw new Error('unknown meta: ' + entry.type)
344    }
345  }
346
347  abort (error) {
348    this[ABORTED] = true
349    this.emit('abort', error)
350    // always throws, even in non-strict mode
351    this.warn('TAR_ABORT', error, { recoverable: false })
352  }
353
354  write (chunk) {
355    if (this[ABORTED]) {
356      return
357    }
358
359    // first write, might be gzipped
360    const needSniff = this[UNZIP] === null ||
361      this.brotli === undefined && this[UNZIP] === false
362    if (needSniff && chunk) {
363      if (this[BUFFER]) {
364        chunk = Buffer.concat([this[BUFFER], chunk])
365        this[BUFFER] = null
366      }
367      if (chunk.length < gzipHeader.length) {
368        this[BUFFER] = chunk
369        return true
370      }
371
372      // look for gzip header
373      for (let i = 0; this[UNZIP] === null && i < gzipHeader.length; i++) {
374        if (chunk[i] !== gzipHeader[i]) {
375          this[UNZIP] = false
376        }
377      }
378
379      const maybeBrotli = this.brotli === undefined
380      if (this[UNZIP] === false && maybeBrotli) {
381        // read the first header to see if it's a valid tar file. If so,
382        // we can safely assume that it's not actually brotli, despite the
383        // .tbr or .tar.br file extension.
384        // if we ended before getting a full chunk, yes, def brotli
385        if (chunk.length < 512) {
386          if (this[ENDED]) {
387            this.brotli = true
388          } else {
389            this[BUFFER] = chunk
390            return true
391          }
392        } else {
393          // if it's tar, it's pretty reliably not brotli, chances of
394          // that happening are astronomical.
395          try {
396            new Header(chunk.slice(0, 512))
397            this.brotli = false
398          } catch (_) {
399            this.brotli = true
400          }
401        }
402      }
403
404      if (this[UNZIP] === null || (this[UNZIP] === false && this.brotli)) {
405        const ended = this[ENDED]
406        this[ENDED] = false
407        this[UNZIP] = this[UNZIP] === null
408          ? new zlib.Unzip()
409          : new zlib.BrotliDecompress()
410        this[UNZIP].on('data', chunk => this[CONSUMECHUNK](chunk))
411        this[UNZIP].on('error', er => this.abort(er))
412        this[UNZIP].on('end', _ => {
413          this[ENDED] = true
414          this[CONSUMECHUNK]()
415        })
416        this[WRITING] = true
417        const ret = this[UNZIP][ended ? 'end' : 'write'](chunk)
418        this[WRITING] = false
419        return ret
420      }
421    }
422
423    this[WRITING] = true
424    if (this[UNZIP]) {
425      this[UNZIP].write(chunk)
426    } else {
427      this[CONSUMECHUNK](chunk)
428    }
429    this[WRITING] = false
430
431    // return false if there's a queue, or if the current entry isn't flowing
432    const ret =
433      this[QUEUE].length ? false :
434      this[READENTRY] ? this[READENTRY].flowing :
435      true
436
437    // if we have no queue, then that means a clogged READENTRY
438    if (!ret && !this[QUEUE].length) {
439      this[READENTRY].once('drain', _ => this.emit('drain'))
440    }
441
442    return ret
443  }
444
445  [BUFFERCONCAT] (c) {
446    if (c && !this[ABORTED]) {
447      this[BUFFER] = this[BUFFER] ? Buffer.concat([this[BUFFER], c]) : c
448    }
449  }
450
451  [MAYBEEND] () {
452    if (this[ENDED] &&
453        !this[EMITTEDEND] &&
454        !this[ABORTED] &&
455        !this[CONSUMING]) {
456      this[EMITTEDEND] = true
457      const entry = this[WRITEENTRY]
458      if (entry && entry.blockRemain) {
459        // truncated, likely a damaged file
460        const have = this[BUFFER] ? this[BUFFER].length : 0
461        this.warn('TAR_BAD_ARCHIVE', `Truncated input (needed ${
462          entry.blockRemain} more bytes, only ${have} available)`, { entry })
463        if (this[BUFFER]) {
464          entry.write(this[BUFFER])
465        }
466        entry.end()
467      }
468      this[EMIT](DONE)
469    }
470  }
471
472  [CONSUMECHUNK] (chunk) {
473    if (this[CONSUMING]) {
474      this[BUFFERCONCAT](chunk)
475    } else if (!chunk && !this[BUFFER]) {
476      this[MAYBEEND]()
477    } else {
478      this[CONSUMING] = true
479      if (this[BUFFER]) {
480        this[BUFFERCONCAT](chunk)
481        const c = this[BUFFER]
482        this[BUFFER] = null
483        this[CONSUMECHUNKSUB](c)
484      } else {
485        this[CONSUMECHUNKSUB](chunk)
486      }
487
488      while (this[BUFFER] &&
489          this[BUFFER].length >= 512 &&
490          !this[ABORTED] &&
491          !this[SAW_EOF]) {
492        const c = this[BUFFER]
493        this[BUFFER] = null
494        this[CONSUMECHUNKSUB](c)
495      }
496      this[CONSUMING] = false
497    }
498
499    if (!this[BUFFER] || this[ENDED]) {
500      this[MAYBEEND]()
501    }
502  }
503
504  [CONSUMECHUNKSUB] (chunk) {
505    // we know that we are in CONSUMING mode, so anything written goes into
506    // the buffer.  Advance the position and put any remainder in the buffer.
507    let position = 0
508    const length = chunk.length
509    while (position + 512 <= length && !this[ABORTED] && !this[SAW_EOF]) {
510      switch (this[STATE]) {
511        case 'begin':
512        case 'header':
513          this[CONSUMEHEADER](chunk, position)
514          position += 512
515          break
516
517        case 'ignore':
518        case 'body':
519          position += this[CONSUMEBODY](chunk, position)
520          break
521
522        case 'meta':
523          position += this[CONSUMEMETA](chunk, position)
524          break
525
526        /* istanbul ignore next */
527        default:
528          throw new Error('invalid state: ' + this[STATE])
529      }
530    }
531
532    if (position < length) {
533      if (this[BUFFER]) {
534        this[BUFFER] = Buffer.concat([chunk.slice(position), this[BUFFER]])
535      } else {
536        this[BUFFER] = chunk.slice(position)
537      }
538    }
539  }
540
541  end (chunk) {
542    if (!this[ABORTED]) {
543      if (this[UNZIP]) {
544        this[UNZIP].end(chunk)
545      } else {
546        this[ENDED] = true
547        if (this.brotli === undefined) chunk = chunk || Buffer.alloc(0)
548        this.write(chunk)
549      }
550    }
551  }
552})
553