1// This is the base class that the other fetcher types in lib 2// all descend from. 3// It handles the unpacking and retry logic that is shared among 4// all of the other Fetcher types. 5 6const npa = require('npm-package-arg') 7const ssri = require('ssri') 8const { promisify } = require('util') 9const { basename, dirname } = require('path') 10const tar = require('tar') 11const log = require('proc-log') 12const retry = require('promise-retry') 13const fs = require('fs/promises') 14const fsm = require('fs-minipass') 15const cacache = require('cacache') 16const isPackageBin = require('./util/is-package-bin.js') 17const removeTrailingSlashes = require('./util/trailing-slashes.js') 18const getContents = require('@npmcli/installed-package-contents') 19const readPackageJsonFast = require('read-package-json-fast') 20const readPackageJson = promisify(require('read-package-json')) 21const { Minipass } = require('minipass') 22 23const cacheDir = require('./util/cache-dir.js') 24 25// Private methods. 26// Child classes should not have to override these. 27// Users should never call them. 28const _extract = Symbol('_extract') 29const _mkdir = Symbol('_mkdir') 30const _empty = Symbol('_empty') 31const _toFile = Symbol('_toFile') 32const _tarxOptions = Symbol('_tarxOptions') 33const _entryMode = Symbol('_entryMode') 34const _istream = Symbol('_istream') 35const _assertType = Symbol('_assertType') 36const _tarballFromCache = Symbol('_tarballFromCache') 37const _tarballFromResolved = Symbol.for('pacote.Fetcher._tarballFromResolved') 38const _cacheFetches = Symbol.for('pacote.Fetcher._cacheFetches') 39const _readPackageJson = Symbol.for('package.Fetcher._readPackageJson') 40 41class FetcherBase { 42 constructor (spec, opts) { 43 if (!opts || typeof opts !== 'object') { 44 throw new TypeError('options object is required') 45 } 46 this.spec = npa(spec, opts.where) 47 48 this.allowGitIgnore = !!opts.allowGitIgnore 49 50 // a bit redundant because presumably the caller already knows this, 51 // but it makes it easier to not have to keep track of the requested 52 // spec when we're dispatching thousands of these at once, and normalizing 53 // is nice. saveSpec is preferred if set, because it turns stuff like 54 // x/y#committish into github:x/y#committish. use name@rawSpec for 55 // registry deps so that we turn xyz and xyz@ -> xyz@ 56 this.from = this.spec.registry 57 ? `${this.spec.name}@${this.spec.rawSpec}` : this.spec.saveSpec 58 59 this[_assertType]() 60 // clone the opts object so that others aren't upset when we mutate it 61 // by adding/modifying the integrity value. 62 this.opts = { ...opts } 63 64 this.cache = opts.cache || cacheDir().cacache 65 this.tufCache = opts.tufCache || cacheDir().tufcache 66 this.resolved = opts.resolved || null 67 68 // default to caching/verifying with sha512, that's what we usually have 69 // need to change this default, or start overriding it, when sha512 70 // is no longer strong enough. 71 this.defaultIntegrityAlgorithm = opts.defaultIntegrityAlgorithm || 'sha512' 72 73 if (typeof opts.integrity === 'string') { 74 this.opts.integrity = ssri.parse(opts.integrity) 75 } 76 77 this.package = null 78 this.type = this.constructor.name 79 this.fmode = opts.fmode || 0o666 80 this.dmode = opts.dmode || 0o777 81 // we don't need a default umask, because we don't chmod files coming 82 // out of package tarballs. they're forced to have a mode that is 83 // valid, regardless of what's in the tarball entry, and then we let 84 // the process's umask setting do its job. but if configured, we do 85 // respect it. 86 this.umask = opts.umask || 0 87 88 this.preferOnline = !!opts.preferOnline 89 this.preferOffline = !!opts.preferOffline 90 this.offline = !!opts.offline 91 92 this.before = opts.before 93 this.fullMetadata = this.before ? true : !!opts.fullMetadata 94 this.fullReadJson = !!opts.fullReadJson 95 if (this.fullReadJson) { 96 this[_readPackageJson] = readPackageJson 97 } else { 98 this[_readPackageJson] = readPackageJsonFast 99 } 100 101 // rrh is a registry hostname or 'never' or 'always' 102 // defaults to registry.npmjs.org 103 this.replaceRegistryHost = (!opts.replaceRegistryHost || opts.replaceRegistryHost === 'npmjs') ? 104 'registry.npmjs.org' : opts.replaceRegistryHost 105 106 this.defaultTag = opts.defaultTag || 'latest' 107 this.registry = removeTrailingSlashes(opts.registry || 'https://registry.npmjs.org') 108 109 // command to run 'prepare' scripts on directories and git dirs 110 // To use pacote with yarn, for example, set npmBin to 'yarn' 111 // and npmCliConfig with yarn's equivalents. 112 this.npmBin = opts.npmBin || 'npm' 113 114 // command to install deps for preparing 115 this.npmInstallCmd = opts.npmInstallCmd || ['install', '--force'] 116 117 // XXX fill more of this in based on what we know from this.opts 118 // we explicitly DO NOT fill in --tag, though, since we are often 119 // going to be packing in the context of a publish, which may set 120 // a dist-tag, but certainly wants to keep defaulting to latest. 121 this.npmCliConfig = opts.npmCliConfig || [ 122 `--cache=${dirname(this.cache)}`, 123 `--prefer-offline=${!!this.preferOffline}`, 124 `--prefer-online=${!!this.preferOnline}`, 125 `--offline=${!!this.offline}`, 126 ...(this.before ? [`--before=${this.before.toISOString()}`] : []), 127 '--no-progress', 128 '--no-save', 129 '--no-audit', 130 // override any omit settings from the environment 131 '--include=dev', 132 '--include=peer', 133 '--include=optional', 134 // we need the actual things, not just the lockfile 135 '--no-package-lock-only', 136 '--no-dry-run', 137 ] 138 } 139 140 get integrity () { 141 return this.opts.integrity || null 142 } 143 144 set integrity (i) { 145 if (!i) { 146 return 147 } 148 149 i = ssri.parse(i) 150 const current = this.opts.integrity 151 152 // do not ever update an existing hash value, but do 153 // merge in NEW algos and hashes that we don't already have. 154 if (current) { 155 current.merge(i) 156 } else { 157 this.opts.integrity = i 158 } 159 } 160 161 get notImplementedError () { 162 return new Error('not implemented in this fetcher type: ' + this.type) 163 } 164 165 // override in child classes 166 // Returns a Promise that resolves to this.resolved string value 167 resolve () { 168 return this.resolved ? Promise.resolve(this.resolved) 169 : Promise.reject(this.notImplementedError) 170 } 171 172 packument () { 173 return Promise.reject(this.notImplementedError) 174 } 175 176 // override in child class 177 // returns a manifest containing: 178 // - name 179 // - version 180 // - _resolved 181 // - _integrity 182 // - plus whatever else was in there (corgi, full metadata, or pj file) 183 manifest () { 184 return Promise.reject(this.notImplementedError) 185 } 186 187 // private, should be overridden. 188 // Note that they should *not* calculate or check integrity or cache, 189 // but *just* return the raw tarball data stream. 190 [_tarballFromResolved] () { 191 throw this.notImplementedError 192 } 193 194 // public, should not be overridden 195 tarball () { 196 return this.tarballStream(stream => stream.concat().then(data => { 197 data.integrity = this.integrity && String(this.integrity) 198 data.resolved = this.resolved 199 data.from = this.from 200 return data 201 })) 202 } 203 204 // private 205 // Note: cacache will raise a EINTEGRITY error if the integrity doesn't match 206 [_tarballFromCache] () { 207 return cacache.get.stream.byDigest(this.cache, this.integrity, this.opts) 208 } 209 210 get [_cacheFetches] () { 211 return true 212 } 213 214 [_istream] (stream) { 215 // if not caching this, just return it 216 if (!this.opts.cache || !this[_cacheFetches]) { 217 // instead of creating a new integrity stream, we only piggyback on the 218 // provided stream's events 219 if (stream.hasIntegrityEmitter) { 220 stream.on('integrity', i => this.integrity = i) 221 return stream 222 } 223 224 const istream = ssri.integrityStream(this.opts) 225 istream.on('integrity', i => this.integrity = i) 226 stream.on('error', err => istream.emit('error', err)) 227 return stream.pipe(istream) 228 } 229 230 // we have to return a stream that gets ALL the data, and proxies errors, 231 // but then pipe from the original tarball stream into the cache as well. 232 // To do this without losing any data, and since the cacache put stream 233 // is not a passthrough, we have to pipe from the original stream into 234 // the cache AFTER we pipe into the middleStream. Since the cache stream 235 // has an asynchronous flush to write its contents to disk, we need to 236 // defer the middleStream end until the cache stream ends. 237 const middleStream = new Minipass() 238 stream.on('error', err => middleStream.emit('error', err)) 239 stream.pipe(middleStream, { end: false }) 240 const cstream = cacache.put.stream( 241 this.opts.cache, 242 `pacote:tarball:${this.from}`, 243 this.opts 244 ) 245 cstream.on('integrity', i => this.integrity = i) 246 cstream.on('error', err => stream.emit('error', err)) 247 stream.pipe(cstream) 248 249 // eslint-disable-next-line promise/catch-or-return 250 cstream.promise().catch(() => {}).then(() => middleStream.end()) 251 return middleStream 252 } 253 254 pickIntegrityAlgorithm () { 255 return this.integrity ? this.integrity.pickAlgorithm(this.opts) 256 : this.defaultIntegrityAlgorithm 257 } 258 259 // TODO: check error class, once those are rolled out to our deps 260 isDataCorruptionError (er) { 261 return er.code === 'EINTEGRITY' || er.code === 'Z_DATA_ERROR' 262 } 263 264 // override the types getter 265 get types () { 266 return false 267 } 268 269 [_assertType] () { 270 if (this.types && !this.types.includes(this.spec.type)) { 271 throw new TypeError(`Wrong spec type (${ 272 this.spec.type 273 }) for ${ 274 this.constructor.name 275 }. Supported types: ${this.types.join(', ')}`) 276 } 277 } 278 279 // We allow ENOENTs from cacache, but not anywhere else. 280 // An ENOENT trying to read a tgz file, for example, is Right Out. 281 isRetriableError (er) { 282 // TODO: check error class, once those are rolled out to our deps 283 return this.isDataCorruptionError(er) || 284 er.code === 'ENOENT' || 285 er.code === 'EISDIR' 286 } 287 288 // Mostly internal, but has some uses 289 // Pass in a function which returns a promise 290 // Function will be called 1 or more times with streams that may fail. 291 // Retries: 292 // Function MUST handle errors on the stream by rejecting the promise, 293 // so that retry logic can pick it up and either retry or fail whatever 294 // promise it was making (ie, failing extraction, etc.) 295 // 296 // The return value of this method is a Promise that resolves the same 297 // as whatever the streamHandler resolves to. 298 // 299 // This should never be overridden by child classes, but it is public. 300 tarballStream (streamHandler) { 301 // Only short-circuit via cache if we have everything else we'll need, 302 // and the user has not expressed a preference for checking online. 303 304 const fromCache = ( 305 !this.preferOnline && 306 this.integrity && 307 this.resolved 308 ) ? streamHandler(this[_tarballFromCache]()).catch(er => { 309 if (this.isDataCorruptionError(er)) { 310 log.warn('tarball', `cached data for ${ 311 this.spec 312 } (${this.integrity}) seems to be corrupted. Refreshing cache.`) 313 return this.cleanupCached().then(() => { 314 throw er 315 }) 316 } else { 317 throw er 318 } 319 }) : null 320 321 const fromResolved = er => { 322 if (er) { 323 if (!this.isRetriableError(er)) { 324 throw er 325 } 326 log.silly('tarball', `no local data for ${ 327 this.spec 328 }. Extracting by manifest.`) 329 } 330 return this.resolve().then(() => retry(tryAgain => 331 streamHandler(this[_istream](this[_tarballFromResolved]())) 332 .catch(streamErr => { 333 // Most likely data integrity. A cache ENOENT error is unlikely 334 // here, since we're definitely not reading from the cache, but it 335 // IS possible that the fetch subsystem accessed the cache, and the 336 // entry got blown away or something. Try one more time to be sure. 337 if (this.isRetriableError(streamErr)) { 338 log.warn('tarball', `tarball data for ${ 339 this.spec 340 } (${this.integrity}) seems to be corrupted. Trying again.`) 341 return this.cleanupCached().then(() => tryAgain(streamErr)) 342 } 343 throw streamErr 344 }), { retries: 1, minTimeout: 0, maxTimeout: 0 })) 345 } 346 347 return fromCache ? fromCache.catch(fromResolved) : fromResolved() 348 } 349 350 cleanupCached () { 351 return cacache.rm.content(this.cache, this.integrity, this.opts) 352 } 353 354 [_empty] (path) { 355 return getContents({ path, depth: 1 }).then(contents => Promise.all( 356 contents.map(entry => fs.rm(entry, { recursive: true, force: true })))) 357 } 358 359 async [_mkdir] (dest) { 360 await this[_empty](dest) 361 return await fs.mkdir(dest, { recursive: true }) 362 } 363 364 // extraction is always the same. the only difference is where 365 // the tarball comes from. 366 async extract (dest) { 367 await this[_mkdir](dest) 368 return this.tarballStream((tarball) => this[_extract](dest, tarball)) 369 } 370 371 [_toFile] (dest) { 372 return this.tarballStream(str => new Promise((res, rej) => { 373 const writer = new fsm.WriteStream(dest) 374 str.on('error', er => writer.emit('error', er)) 375 writer.on('error', er => rej(er)) 376 writer.on('close', () => res({ 377 integrity: this.integrity && String(this.integrity), 378 resolved: this.resolved, 379 from: this.from, 380 })) 381 str.pipe(writer) 382 })) 383 } 384 385 // don't use this[_mkdir] because we don't want to rimraf anything 386 async tarballFile (dest) { 387 const dir = dirname(dest) 388 await fs.mkdir(dir, { recursive: true }) 389 return this[_toFile](dest) 390 } 391 392 [_extract] (dest, tarball) { 393 const extractor = tar.x(this[_tarxOptions]({ cwd: dest })) 394 const p = new Promise((resolve, reject) => { 395 extractor.on('end', () => { 396 resolve({ 397 resolved: this.resolved, 398 integrity: this.integrity && String(this.integrity), 399 from: this.from, 400 }) 401 }) 402 403 extractor.on('error', er => { 404 log.warn('tar', er.message) 405 log.silly('tar', er) 406 reject(er) 407 }) 408 409 tarball.on('error', er => reject(er)) 410 }) 411 412 tarball.pipe(extractor) 413 return p 414 } 415 416 // always ensure that entries are at least as permissive as our configured 417 // dmode/fmode, but never more permissive than the umask allows. 418 [_entryMode] (path, mode, type) { 419 const m = /Directory|GNUDumpDir/.test(type) ? this.dmode 420 : /File$/.test(type) ? this.fmode 421 : /* istanbul ignore next - should never happen in a pkg */ 0 422 423 // make sure package bins are executable 424 const exe = isPackageBin(this.package, path) ? 0o111 : 0 425 // always ensure that files are read/writable by the owner 426 return ((mode | m) & ~this.umask) | exe | 0o600 427 } 428 429 [_tarxOptions] ({ cwd }) { 430 const sawIgnores = new Set() 431 return { 432 cwd, 433 noChmod: true, 434 noMtime: true, 435 filter: (name, entry) => { 436 if (/Link$/.test(entry.type)) { 437 return false 438 } 439 entry.mode = this[_entryMode](entry.path, entry.mode, entry.type) 440 // this replicates the npm pack behavior where .gitignore files 441 // are treated like .npmignore files, but only if a .npmignore 442 // file is not present. 443 if (/File$/.test(entry.type)) { 444 const base = basename(entry.path) 445 if (base === '.npmignore') { 446 sawIgnores.add(entry.path) 447 } else if (base === '.gitignore' && !this.allowGitIgnore) { 448 // rename, but only if there's not already a .npmignore 449 const ni = entry.path.replace(/\.gitignore$/, '.npmignore') 450 if (sawIgnores.has(ni)) { 451 return false 452 } 453 entry.path = ni 454 } 455 return true 456 } 457 }, 458 strip: 1, 459 onwarn: /* istanbul ignore next - we can trust that tar logs */ 460 (code, msg, data) => { 461 log.warn('tar', code, msg) 462 log.silly('tar', code, msg, data) 463 }, 464 umask: this.umask, 465 // always ignore ownership info from tarball metadata 466 preserveOwner: false, 467 } 468 } 469} 470 471module.exports = FetcherBase 472 473// Child classes 474const GitFetcher = require('./git.js') 475const RegistryFetcher = require('./registry.js') 476const FileFetcher = require('./file.js') 477const DirFetcher = require('./dir.js') 478const RemoteFetcher = require('./remote.js') 479 480// Get an appropriate fetcher object from a spec and options 481FetcherBase.get = (rawSpec, opts = {}) => { 482 const spec = npa(rawSpec, opts.where) 483 switch (spec.type) { 484 case 'git': 485 return new GitFetcher(spec, opts) 486 487 case 'remote': 488 return new RemoteFetcher(spec, opts) 489 490 case 'version': 491 case 'range': 492 case 'tag': 493 case 'alias': 494 return new RegistryFetcher(spec.subSpec || spec, opts) 495 496 case 'file': 497 return new FileFetcher(spec, opts) 498 499 case 'directory': 500 return new DirFetcher(spec, opts) 501 502 default: 503 throw new TypeError('Unknown spec type: ' + spec.type) 504 } 505} 506