1'use strict' 2 3// this[BUFFER] is the remainder of a chunk if we're waiting for 4// the full 512 bytes of a header to come in. We will Buffer.concat() 5// it to the next write(), which is a mem copy, but a small one. 6// 7// this[QUEUE] is a Yallist of entries that haven't been emitted 8// yet this can only get filled up if the user keeps write()ing after 9// a write() returns false, or does a write() with more than one entry 10// 11// We don't buffer chunks, we always parse them and either create an 12// entry, or push it into the active entry. The ReadEntry class knows 13// to throw data away if .ignore=true 14// 15// Shift entry off the buffer when it emits 'end', and emit 'entry' for 16// the next one in the list. 17// 18// At any time, we're pushing body chunks into the entry at WRITEENTRY, 19// and waiting for 'end' on the entry at READENTRY 20// 21// ignored entries get .resume() called on them straight away 22 23const warner = require('./warn-mixin.js') 24const Header = require('./header.js') 25const EE = require('events') 26const Yallist = require('yallist') 27const maxMetaEntrySize = 1024 * 1024 28const Entry = require('./read-entry.js') 29const Pax = require('./pax.js') 30const zlib = require('minizlib') 31const { nextTick } = require('process') 32 33const gzipHeader = Buffer.from([0x1f, 0x8b]) 34const STATE = Symbol('state') 35const WRITEENTRY = Symbol('writeEntry') 36const READENTRY = Symbol('readEntry') 37const NEXTENTRY = Symbol('nextEntry') 38const PROCESSENTRY = Symbol('processEntry') 39const EX = Symbol('extendedHeader') 40const GEX = Symbol('globalExtendedHeader') 41const META = Symbol('meta') 42const EMITMETA = Symbol('emitMeta') 43const BUFFER = Symbol('buffer') 44const QUEUE = Symbol('queue') 45const ENDED = Symbol('ended') 46const EMITTEDEND = Symbol('emittedEnd') 47const EMIT = Symbol('emit') 48const UNZIP = Symbol('unzip') 49const CONSUMECHUNK = Symbol('consumeChunk') 50const CONSUMECHUNKSUB = Symbol('consumeChunkSub') 51const CONSUMEBODY = Symbol('consumeBody') 52const CONSUMEMETA = Symbol('consumeMeta') 53const CONSUMEHEADER = Symbol('consumeHeader') 54const CONSUMING = Symbol('consuming') 55const BUFFERCONCAT = Symbol('bufferConcat') 56const MAYBEEND = Symbol('maybeEnd') 57const WRITING = Symbol('writing') 58const ABORTED = Symbol('aborted') 59const DONE = Symbol('onDone') 60const SAW_VALID_ENTRY = Symbol('sawValidEntry') 61const SAW_NULL_BLOCK = Symbol('sawNullBlock') 62const SAW_EOF = Symbol('sawEOF') 63const CLOSESTREAM = Symbol('closeStream') 64 65const noop = _ => true 66 67module.exports = warner(class Parser extends EE { 68 constructor (opt) { 69 opt = opt || {} 70 super(opt) 71 72 this.file = opt.file || '' 73 74 // set to boolean false when an entry starts. 1024 bytes of \0 75 // is technically a valid tarball, albeit a boring one. 76 this[SAW_VALID_ENTRY] = null 77 78 // these BADARCHIVE errors can't be detected early. listen on DONE. 79 this.on(DONE, _ => { 80 if (this[STATE] === 'begin' || this[SAW_VALID_ENTRY] === false) { 81 // either less than 1 block of data, or all entries were invalid. 82 // Either way, probably not even a tarball. 83 this.warn('TAR_BAD_ARCHIVE', 'Unrecognized archive format') 84 } 85 }) 86 87 if (opt.ondone) { 88 this.on(DONE, opt.ondone) 89 } else { 90 this.on(DONE, _ => { 91 this.emit('prefinish') 92 this.emit('finish') 93 this.emit('end') 94 }) 95 } 96 97 this.strict = !!opt.strict 98 this.maxMetaEntrySize = opt.maxMetaEntrySize || maxMetaEntrySize 99 this.filter = typeof opt.filter === 'function' ? opt.filter : noop 100 // Unlike gzip, brotli doesn't have any magic bytes to identify it 101 // Users need to explicitly tell us they're extracting a brotli file 102 // Or we infer from the file extension 103 const isTBR = (opt.file && ( 104 opt.file.endsWith('.tar.br') || opt.file.endsWith('.tbr'))) 105 // if it's a tbr file it MIGHT be brotli, but we don't know until 106 // we look at it and verify it's not a valid tar file. 107 this.brotli = !opt.gzip && opt.brotli !== undefined ? opt.brotli 108 : isTBR ? undefined 109 : false 110 111 // have to set this so that streams are ok piping into it 112 this.writable = true 113 this.readable = false 114 115 this[QUEUE] = new Yallist() 116 this[BUFFER] = null 117 this[READENTRY] = null 118 this[WRITEENTRY] = null 119 this[STATE] = 'begin' 120 this[META] = '' 121 this[EX] = null 122 this[GEX] = null 123 this[ENDED] = false 124 this[UNZIP] = null 125 this[ABORTED] = false 126 this[SAW_NULL_BLOCK] = false 127 this[SAW_EOF] = false 128 129 this.on('end', () => this[CLOSESTREAM]()) 130 131 if (typeof opt.onwarn === 'function') { 132 this.on('warn', opt.onwarn) 133 } 134 if (typeof opt.onentry === 'function') { 135 this.on('entry', opt.onentry) 136 } 137 } 138 139 [CONSUMEHEADER] (chunk, position) { 140 if (this[SAW_VALID_ENTRY] === null) { 141 this[SAW_VALID_ENTRY] = false 142 } 143 let header 144 try { 145 header = new Header(chunk, position, this[EX], this[GEX]) 146 } catch (er) { 147 return this.warn('TAR_ENTRY_INVALID', er) 148 } 149 150 if (header.nullBlock) { 151 if (this[SAW_NULL_BLOCK]) { 152 this[SAW_EOF] = true 153 // ending an archive with no entries. pointless, but legal. 154 if (this[STATE] === 'begin') { 155 this[STATE] = 'header' 156 } 157 this[EMIT]('eof') 158 } else { 159 this[SAW_NULL_BLOCK] = true 160 this[EMIT]('nullBlock') 161 } 162 } else { 163 this[SAW_NULL_BLOCK] = false 164 if (!header.cksumValid) { 165 this.warn('TAR_ENTRY_INVALID', 'checksum failure', { header }) 166 } else if (!header.path) { 167 this.warn('TAR_ENTRY_INVALID', 'path is required', { header }) 168 } else { 169 const type = header.type 170 if (/^(Symbolic)?Link$/.test(type) && !header.linkpath) { 171 this.warn('TAR_ENTRY_INVALID', 'linkpath required', { header }) 172 } else if (!/^(Symbolic)?Link$/.test(type) && header.linkpath) { 173 this.warn('TAR_ENTRY_INVALID', 'linkpath forbidden', { header }) 174 } else { 175 const entry = this[WRITEENTRY] = new Entry(header, this[EX], this[GEX]) 176 177 // we do this for meta & ignored entries as well, because they 178 // are still valid tar, or else we wouldn't know to ignore them 179 if (!this[SAW_VALID_ENTRY]) { 180 if (entry.remain) { 181 // this might be the one! 182 const onend = () => { 183 if (!entry.invalid) { 184 this[SAW_VALID_ENTRY] = true 185 } 186 } 187 entry.on('end', onend) 188 } else { 189 this[SAW_VALID_ENTRY] = true 190 } 191 } 192 193 if (entry.meta) { 194 if (entry.size > this.maxMetaEntrySize) { 195 entry.ignore = true 196 this[EMIT]('ignoredEntry', entry) 197 this[STATE] = 'ignore' 198 entry.resume() 199 } else if (entry.size > 0) { 200 this[META] = '' 201 entry.on('data', c => this[META] += c) 202 this[STATE] = 'meta' 203 } 204 } else { 205 this[EX] = null 206 entry.ignore = entry.ignore || !this.filter(entry.path, entry) 207 208 if (entry.ignore) { 209 // probably valid, just not something we care about 210 this[EMIT]('ignoredEntry', entry) 211 this[STATE] = entry.remain ? 'ignore' : 'header' 212 entry.resume() 213 } else { 214 if (entry.remain) { 215 this[STATE] = 'body' 216 } else { 217 this[STATE] = 'header' 218 entry.end() 219 } 220 221 if (!this[READENTRY]) { 222 this[QUEUE].push(entry) 223 this[NEXTENTRY]() 224 } else { 225 this[QUEUE].push(entry) 226 } 227 } 228 } 229 } 230 } 231 } 232 } 233 234 [CLOSESTREAM] () { 235 nextTick(() => this.emit('close')) 236 } 237 238 [PROCESSENTRY] (entry) { 239 let go = true 240 241 if (!entry) { 242 this[READENTRY] = null 243 go = false 244 } else if (Array.isArray(entry)) { 245 this.emit.apply(this, entry) 246 } else { 247 this[READENTRY] = entry 248 this.emit('entry', entry) 249 if (!entry.emittedEnd) { 250 entry.on('end', _ => this[NEXTENTRY]()) 251 go = false 252 } 253 } 254 255 return go 256 } 257 258 [NEXTENTRY] () { 259 do {} while (this[PROCESSENTRY](this[QUEUE].shift())) 260 261 if (!this[QUEUE].length) { 262 // At this point, there's nothing in the queue, but we may have an 263 // entry which is being consumed (readEntry). 264 // If we don't, then we definitely can handle more data. 265 // If we do, and either it's flowing, or it has never had any data 266 // written to it, then it needs more. 267 // The only other possibility is that it has returned false from a 268 // write() call, so we wait for the next drain to continue. 269 const re = this[READENTRY] 270 const drainNow = !re || re.flowing || re.size === re.remain 271 if (drainNow) { 272 if (!this[WRITING]) { 273 this.emit('drain') 274 } 275 } else { 276 re.once('drain', _ => this.emit('drain')) 277 } 278 } 279 } 280 281 [CONSUMEBODY] (chunk, position) { 282 // write up to but no more than writeEntry.blockRemain 283 const entry = this[WRITEENTRY] 284 const br = entry.blockRemain 285 const c = (br >= chunk.length && position === 0) ? chunk 286 : chunk.slice(position, position + br) 287 288 entry.write(c) 289 290 if (!entry.blockRemain) { 291 this[STATE] = 'header' 292 this[WRITEENTRY] = null 293 entry.end() 294 } 295 296 return c.length 297 } 298 299 [CONSUMEMETA] (chunk, position) { 300 const entry = this[WRITEENTRY] 301 const ret = this[CONSUMEBODY](chunk, position) 302 303 // if we finished, then the entry is reset 304 if (!this[WRITEENTRY]) { 305 this[EMITMETA](entry) 306 } 307 308 return ret 309 } 310 311 [EMIT] (ev, data, extra) { 312 if (!this[QUEUE].length && !this[READENTRY]) { 313 this.emit(ev, data, extra) 314 } else { 315 this[QUEUE].push([ev, data, extra]) 316 } 317 } 318 319 [EMITMETA] (entry) { 320 this[EMIT]('meta', this[META]) 321 switch (entry.type) { 322 case 'ExtendedHeader': 323 case 'OldExtendedHeader': 324 this[EX] = Pax.parse(this[META], this[EX], false) 325 break 326 327 case 'GlobalExtendedHeader': 328 this[GEX] = Pax.parse(this[META], this[GEX], true) 329 break 330 331 case 'NextFileHasLongPath': 332 case 'OldGnuLongPath': 333 this[EX] = this[EX] || Object.create(null) 334 this[EX].path = this[META].replace(/\0.*/, '') 335 break 336 337 case 'NextFileHasLongLinkpath': 338 this[EX] = this[EX] || Object.create(null) 339 this[EX].linkpath = this[META].replace(/\0.*/, '') 340 break 341 342 /* istanbul ignore next */ 343 default: throw new Error('unknown meta: ' + entry.type) 344 } 345 } 346 347 abort (error) { 348 this[ABORTED] = true 349 this.emit('abort', error) 350 // always throws, even in non-strict mode 351 this.warn('TAR_ABORT', error, { recoverable: false }) 352 } 353 354 write (chunk) { 355 if (this[ABORTED]) { 356 return 357 } 358 359 // first write, might be gzipped 360 const needSniff = this[UNZIP] === null || 361 this.brotli === undefined && this[UNZIP] === false 362 if (needSniff && chunk) { 363 if (this[BUFFER]) { 364 chunk = Buffer.concat([this[BUFFER], chunk]) 365 this[BUFFER] = null 366 } 367 if (chunk.length < gzipHeader.length) { 368 this[BUFFER] = chunk 369 return true 370 } 371 372 // look for gzip header 373 for (let i = 0; this[UNZIP] === null && i < gzipHeader.length; i++) { 374 if (chunk[i] !== gzipHeader[i]) { 375 this[UNZIP] = false 376 } 377 } 378 379 const maybeBrotli = this.brotli === undefined 380 if (this[UNZIP] === false && maybeBrotli) { 381 // read the first header to see if it's a valid tar file. If so, 382 // we can safely assume that it's not actually brotli, despite the 383 // .tbr or .tar.br file extension. 384 // if we ended before getting a full chunk, yes, def brotli 385 if (chunk.length < 512) { 386 if (this[ENDED]) { 387 this.brotli = true 388 } else { 389 this[BUFFER] = chunk 390 return true 391 } 392 } else { 393 // if it's tar, it's pretty reliably not brotli, chances of 394 // that happening are astronomical. 395 try { 396 new Header(chunk.slice(0, 512)) 397 this.brotli = false 398 } catch (_) { 399 this.brotli = true 400 } 401 } 402 } 403 404 if (this[UNZIP] === null || (this[UNZIP] === false && this.brotli)) { 405 const ended = this[ENDED] 406 this[ENDED] = false 407 this[UNZIP] = this[UNZIP] === null 408 ? new zlib.Unzip() 409 : new zlib.BrotliDecompress() 410 this[UNZIP].on('data', chunk => this[CONSUMECHUNK](chunk)) 411 this[UNZIP].on('error', er => this.abort(er)) 412 this[UNZIP].on('end', _ => { 413 this[ENDED] = true 414 this[CONSUMECHUNK]() 415 }) 416 this[WRITING] = true 417 const ret = this[UNZIP][ended ? 'end' : 'write'](chunk) 418 this[WRITING] = false 419 return ret 420 } 421 } 422 423 this[WRITING] = true 424 if (this[UNZIP]) { 425 this[UNZIP].write(chunk) 426 } else { 427 this[CONSUMECHUNK](chunk) 428 } 429 this[WRITING] = false 430 431 // return false if there's a queue, or if the current entry isn't flowing 432 const ret = 433 this[QUEUE].length ? false : 434 this[READENTRY] ? this[READENTRY].flowing : 435 true 436 437 // if we have no queue, then that means a clogged READENTRY 438 if (!ret && !this[QUEUE].length) { 439 this[READENTRY].once('drain', _ => this.emit('drain')) 440 } 441 442 return ret 443 } 444 445 [BUFFERCONCAT] (c) { 446 if (c && !this[ABORTED]) { 447 this[BUFFER] = this[BUFFER] ? Buffer.concat([this[BUFFER], c]) : c 448 } 449 } 450 451 [MAYBEEND] () { 452 if (this[ENDED] && 453 !this[EMITTEDEND] && 454 !this[ABORTED] && 455 !this[CONSUMING]) { 456 this[EMITTEDEND] = true 457 const entry = this[WRITEENTRY] 458 if (entry && entry.blockRemain) { 459 // truncated, likely a damaged file 460 const have = this[BUFFER] ? this[BUFFER].length : 0 461 this.warn('TAR_BAD_ARCHIVE', `Truncated input (needed ${ 462 entry.blockRemain} more bytes, only ${have} available)`, { entry }) 463 if (this[BUFFER]) { 464 entry.write(this[BUFFER]) 465 } 466 entry.end() 467 } 468 this[EMIT](DONE) 469 } 470 } 471 472 [CONSUMECHUNK] (chunk) { 473 if (this[CONSUMING]) { 474 this[BUFFERCONCAT](chunk) 475 } else if (!chunk && !this[BUFFER]) { 476 this[MAYBEEND]() 477 } else { 478 this[CONSUMING] = true 479 if (this[BUFFER]) { 480 this[BUFFERCONCAT](chunk) 481 const c = this[BUFFER] 482 this[BUFFER] = null 483 this[CONSUMECHUNKSUB](c) 484 } else { 485 this[CONSUMECHUNKSUB](chunk) 486 } 487 488 while (this[BUFFER] && 489 this[BUFFER].length >= 512 && 490 !this[ABORTED] && 491 !this[SAW_EOF]) { 492 const c = this[BUFFER] 493 this[BUFFER] = null 494 this[CONSUMECHUNKSUB](c) 495 } 496 this[CONSUMING] = false 497 } 498 499 if (!this[BUFFER] || this[ENDED]) { 500 this[MAYBEEND]() 501 } 502 } 503 504 [CONSUMECHUNKSUB] (chunk) { 505 // we know that we are in CONSUMING mode, so anything written goes into 506 // the buffer. Advance the position and put any remainder in the buffer. 507 let position = 0 508 const length = chunk.length 509 while (position + 512 <= length && !this[ABORTED] && !this[SAW_EOF]) { 510 switch (this[STATE]) { 511 case 'begin': 512 case 'header': 513 this[CONSUMEHEADER](chunk, position) 514 position += 512 515 break 516 517 case 'ignore': 518 case 'body': 519 position += this[CONSUMEBODY](chunk, position) 520 break 521 522 case 'meta': 523 position += this[CONSUMEMETA](chunk, position) 524 break 525 526 /* istanbul ignore next */ 527 default: 528 throw new Error('invalid state: ' + this[STATE]) 529 } 530 } 531 532 if (position < length) { 533 if (this[BUFFER]) { 534 this[BUFFER] = Buffer.concat([chunk.slice(position), this[BUFFER]]) 535 } else { 536 this[BUFFER] = chunk.slice(position) 537 } 538 } 539 } 540 541 end (chunk) { 542 if (!this[ABORTED]) { 543 if (this[UNZIP]) { 544 this[UNZIP].end(chunk) 545 } else { 546 this[ENDED] = true 547 if (this.brotli === undefined) chunk = chunk || Buffer.alloc(0) 548 this.write(chunk) 549 } 550 } 551 } 552}) 553