• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// This is the base class that the other fetcher types in lib
2// all descend from.
3// It handles the unpacking and retry logic that is shared among
4// all of the other Fetcher types.
5
6const npa = require('npm-package-arg')
7const ssri = require('ssri')
8const { promisify } = require('util')
9const { basename, dirname } = require('path')
10const tar = require('tar')
11const log = require('proc-log')
12const retry = require('promise-retry')
13const fs = require('fs/promises')
14const fsm = require('fs-minipass')
15const cacache = require('cacache')
16const isPackageBin = require('./util/is-package-bin.js')
17const removeTrailingSlashes = require('./util/trailing-slashes.js')
18const getContents = require('@npmcli/installed-package-contents')
19const readPackageJsonFast = require('read-package-json-fast')
20const readPackageJson = promisify(require('read-package-json'))
21const { Minipass } = require('minipass')
22
23const cacheDir = require('./util/cache-dir.js')
24
25// Private methods.
26// Child classes should not have to override these.
27// Users should never call them.
28const _extract = Symbol('_extract')
29const _mkdir = Symbol('_mkdir')
30const _empty = Symbol('_empty')
31const _toFile = Symbol('_toFile')
32const _tarxOptions = Symbol('_tarxOptions')
33const _entryMode = Symbol('_entryMode')
34const _istream = Symbol('_istream')
35const _assertType = Symbol('_assertType')
36const _tarballFromCache = Symbol('_tarballFromCache')
37const _tarballFromResolved = Symbol.for('pacote.Fetcher._tarballFromResolved')
38const _cacheFetches = Symbol.for('pacote.Fetcher._cacheFetches')
39const _readPackageJson = Symbol.for('package.Fetcher._readPackageJson')
40
41class FetcherBase {
42  constructor (spec, opts) {
43    if (!opts || typeof opts !== 'object') {
44      throw new TypeError('options object is required')
45    }
46    this.spec = npa(spec, opts.where)
47
48    this.allowGitIgnore = !!opts.allowGitIgnore
49
50    // a bit redundant because presumably the caller already knows this,
51    // but it makes it easier to not have to keep track of the requested
52    // spec when we're dispatching thousands of these at once, and normalizing
53    // is nice.  saveSpec is preferred if set, because it turns stuff like
54    // x/y#committish into github:x/y#committish.  use name@rawSpec for
55    // registry deps so that we turn xyz and xyz@ -> xyz@
56    this.from = this.spec.registry
57      ? `${this.spec.name}@${this.spec.rawSpec}` : this.spec.saveSpec
58
59    this[_assertType]()
60    // clone the opts object so that others aren't upset when we mutate it
61    // by adding/modifying the integrity value.
62    this.opts = { ...opts }
63
64    this.cache = opts.cache || cacheDir().cacache
65    this.tufCache = opts.tufCache || cacheDir().tufcache
66    this.resolved = opts.resolved || null
67
68    // default to caching/verifying with sha512, that's what we usually have
69    // need to change this default, or start overriding it, when sha512
70    // is no longer strong enough.
71    this.defaultIntegrityAlgorithm = opts.defaultIntegrityAlgorithm || 'sha512'
72
73    if (typeof opts.integrity === 'string') {
74      this.opts.integrity = ssri.parse(opts.integrity)
75    }
76
77    this.package = null
78    this.type = this.constructor.name
79    this.fmode = opts.fmode || 0o666
80    this.dmode = opts.dmode || 0o777
81    // we don't need a default umask, because we don't chmod files coming
82    // out of package tarballs.  they're forced to have a mode that is
83    // valid, regardless of what's in the tarball entry, and then we let
84    // the process's umask setting do its job.  but if configured, we do
85    // respect it.
86    this.umask = opts.umask || 0
87
88    this.preferOnline = !!opts.preferOnline
89    this.preferOffline = !!opts.preferOffline
90    this.offline = !!opts.offline
91
92    this.before = opts.before
93    this.fullMetadata = this.before ? true : !!opts.fullMetadata
94    this.fullReadJson = !!opts.fullReadJson
95    if (this.fullReadJson) {
96      this[_readPackageJson] = readPackageJson
97    } else {
98      this[_readPackageJson] = readPackageJsonFast
99    }
100
101    // rrh is a registry hostname or 'never' or 'always'
102    // defaults to registry.npmjs.org
103    this.replaceRegistryHost = (!opts.replaceRegistryHost || opts.replaceRegistryHost === 'npmjs') ?
104      'registry.npmjs.org' : opts.replaceRegistryHost
105
106    this.defaultTag = opts.defaultTag || 'latest'
107    this.registry = removeTrailingSlashes(opts.registry || 'https://registry.npmjs.org')
108
109    // command to run 'prepare' scripts on directories and git dirs
110    // To use pacote with yarn, for example, set npmBin to 'yarn'
111    // and npmCliConfig with yarn's equivalents.
112    this.npmBin = opts.npmBin || 'npm'
113
114    // command to install deps for preparing
115    this.npmInstallCmd = opts.npmInstallCmd || ['install', '--force']
116
117    // XXX fill more of this in based on what we know from this.opts
118    // we explicitly DO NOT fill in --tag, though, since we are often
119    // going to be packing in the context of a publish, which may set
120    // a dist-tag, but certainly wants to keep defaulting to latest.
121    this.npmCliConfig = opts.npmCliConfig || [
122      `--cache=${dirname(this.cache)}`,
123      `--prefer-offline=${!!this.preferOffline}`,
124      `--prefer-online=${!!this.preferOnline}`,
125      `--offline=${!!this.offline}`,
126      ...(this.before ? [`--before=${this.before.toISOString()}`] : []),
127      '--no-progress',
128      '--no-save',
129      '--no-audit',
130      // override any omit settings from the environment
131      '--include=dev',
132      '--include=peer',
133      '--include=optional',
134      // we need the actual things, not just the lockfile
135      '--no-package-lock-only',
136      '--no-dry-run',
137    ]
138  }
139
140  get integrity () {
141    return this.opts.integrity || null
142  }
143
144  set integrity (i) {
145    if (!i) {
146      return
147    }
148
149    i = ssri.parse(i)
150    const current = this.opts.integrity
151
152    // do not ever update an existing hash value, but do
153    // merge in NEW algos and hashes that we don't already have.
154    if (current) {
155      current.merge(i)
156    } else {
157      this.opts.integrity = i
158    }
159  }
160
161  get notImplementedError () {
162    return new Error('not implemented in this fetcher type: ' + this.type)
163  }
164
165  // override in child classes
166  // Returns a Promise that resolves to this.resolved string value
167  resolve () {
168    return this.resolved ? Promise.resolve(this.resolved)
169      : Promise.reject(this.notImplementedError)
170  }
171
172  packument () {
173    return Promise.reject(this.notImplementedError)
174  }
175
176  // override in child class
177  // returns a manifest containing:
178  // - name
179  // - version
180  // - _resolved
181  // - _integrity
182  // - plus whatever else was in there (corgi, full metadata, or pj file)
183  manifest () {
184    return Promise.reject(this.notImplementedError)
185  }
186
187  // private, should be overridden.
188  // Note that they should *not* calculate or check integrity or cache,
189  // but *just*  return the raw tarball data stream.
190  [_tarballFromResolved] () {
191    throw this.notImplementedError
192  }
193
194  // public, should not be overridden
195  tarball () {
196    return this.tarballStream(stream => stream.concat().then(data => {
197      data.integrity = this.integrity && String(this.integrity)
198      data.resolved = this.resolved
199      data.from = this.from
200      return data
201    }))
202  }
203
204  // private
205  // Note: cacache will raise a EINTEGRITY error if the integrity doesn't match
206  [_tarballFromCache] () {
207    return cacache.get.stream.byDigest(this.cache, this.integrity, this.opts)
208  }
209
210  get [_cacheFetches] () {
211    return true
212  }
213
214  [_istream] (stream) {
215    // if not caching this, just return it
216    if (!this.opts.cache || !this[_cacheFetches]) {
217      // instead of creating a new integrity stream, we only piggyback on the
218      // provided stream's events
219      if (stream.hasIntegrityEmitter) {
220        stream.on('integrity', i => this.integrity = i)
221        return stream
222      }
223
224      const istream = ssri.integrityStream(this.opts)
225      istream.on('integrity', i => this.integrity = i)
226      stream.on('error', err => istream.emit('error', err))
227      return stream.pipe(istream)
228    }
229
230    // we have to return a stream that gets ALL the data, and proxies errors,
231    // but then pipe from the original tarball stream into the cache as well.
232    // To do this without losing any data, and since the cacache put stream
233    // is not a passthrough, we have to pipe from the original stream into
234    // the cache AFTER we pipe into the middleStream.  Since the cache stream
235    // has an asynchronous flush to write its contents to disk, we need to
236    // defer the middleStream end until the cache stream ends.
237    const middleStream = new Minipass()
238    stream.on('error', err => middleStream.emit('error', err))
239    stream.pipe(middleStream, { end: false })
240    const cstream = cacache.put.stream(
241      this.opts.cache,
242      `pacote:tarball:${this.from}`,
243      this.opts
244    )
245    cstream.on('integrity', i => this.integrity = i)
246    cstream.on('error', err => stream.emit('error', err))
247    stream.pipe(cstream)
248
249    // eslint-disable-next-line promise/catch-or-return
250    cstream.promise().catch(() => {}).then(() => middleStream.end())
251    return middleStream
252  }
253
254  pickIntegrityAlgorithm () {
255    return this.integrity ? this.integrity.pickAlgorithm(this.opts)
256      : this.defaultIntegrityAlgorithm
257  }
258
259  // TODO: check error class, once those are rolled out to our deps
260  isDataCorruptionError (er) {
261    return er.code === 'EINTEGRITY' || er.code === 'Z_DATA_ERROR'
262  }
263
264  // override the types getter
265  get types () {
266    return false
267  }
268
269  [_assertType] () {
270    if (this.types && !this.types.includes(this.spec.type)) {
271      throw new TypeError(`Wrong spec type (${
272        this.spec.type
273      }) for ${
274        this.constructor.name
275      }. Supported types: ${this.types.join(', ')}`)
276    }
277  }
278
279  // We allow ENOENTs from cacache, but not anywhere else.
280  // An ENOENT trying to read a tgz file, for example, is Right Out.
281  isRetriableError (er) {
282    // TODO: check error class, once those are rolled out to our deps
283    return this.isDataCorruptionError(er) ||
284      er.code === 'ENOENT' ||
285      er.code === 'EISDIR'
286  }
287
288  // Mostly internal, but has some uses
289  // Pass in a function which returns a promise
290  // Function will be called 1 or more times with streams that may fail.
291  // Retries:
292  // Function MUST handle errors on the stream by rejecting the promise,
293  // so that retry logic can pick it up and either retry or fail whatever
294  // promise it was making (ie, failing extraction, etc.)
295  //
296  // The return value of this method is a Promise that resolves the same
297  // as whatever the streamHandler resolves to.
298  //
299  // This should never be overridden by child classes, but it is public.
300  tarballStream (streamHandler) {
301    // Only short-circuit via cache if we have everything else we'll need,
302    // and the user has not expressed a preference for checking online.
303
304    const fromCache = (
305      !this.preferOnline &&
306      this.integrity &&
307      this.resolved
308    ) ? streamHandler(this[_tarballFromCache]()).catch(er => {
309        if (this.isDataCorruptionError(er)) {
310          log.warn('tarball', `cached data for ${
311          this.spec
312        } (${this.integrity}) seems to be corrupted. Refreshing cache.`)
313          return this.cleanupCached().then(() => {
314            throw er
315          })
316        } else {
317          throw er
318        }
319      }) : null
320
321    const fromResolved = er => {
322      if (er) {
323        if (!this.isRetriableError(er)) {
324          throw er
325        }
326        log.silly('tarball', `no local data for ${
327          this.spec
328        }. Extracting by manifest.`)
329      }
330      return this.resolve().then(() => retry(tryAgain =>
331        streamHandler(this[_istream](this[_tarballFromResolved]()))
332          .catch(streamErr => {
333          // Most likely data integrity.  A cache ENOENT error is unlikely
334          // here, since we're definitely not reading from the cache, but it
335          // IS possible that the fetch subsystem accessed the cache, and the
336          // entry got blown away or something.  Try one more time to be sure.
337            if (this.isRetriableError(streamErr)) {
338              log.warn('tarball', `tarball data for ${
339              this.spec
340            } (${this.integrity}) seems to be corrupted. Trying again.`)
341              return this.cleanupCached().then(() => tryAgain(streamErr))
342            }
343            throw streamErr
344          }), { retries: 1, minTimeout: 0, maxTimeout: 0 }))
345    }
346
347    return fromCache ? fromCache.catch(fromResolved) : fromResolved()
348  }
349
350  cleanupCached () {
351    return cacache.rm.content(this.cache, this.integrity, this.opts)
352  }
353
354  [_empty] (path) {
355    return getContents({ path, depth: 1 }).then(contents => Promise.all(
356      contents.map(entry => fs.rm(entry, { recursive: true, force: true }))))
357  }
358
359  async [_mkdir] (dest) {
360    await this[_empty](dest)
361    return await fs.mkdir(dest, { recursive: true })
362  }
363
364  // extraction is always the same.  the only difference is where
365  // the tarball comes from.
366  async extract (dest) {
367    await this[_mkdir](dest)
368    return this.tarballStream((tarball) => this[_extract](dest, tarball))
369  }
370
371  [_toFile] (dest) {
372    return this.tarballStream(str => new Promise((res, rej) => {
373      const writer = new fsm.WriteStream(dest)
374      str.on('error', er => writer.emit('error', er))
375      writer.on('error', er => rej(er))
376      writer.on('close', () => res({
377        integrity: this.integrity && String(this.integrity),
378        resolved: this.resolved,
379        from: this.from,
380      }))
381      str.pipe(writer)
382    }))
383  }
384
385  // don't use this[_mkdir] because we don't want to rimraf anything
386  async tarballFile (dest) {
387    const dir = dirname(dest)
388    await fs.mkdir(dir, { recursive: true })
389    return this[_toFile](dest)
390  }
391
392  [_extract] (dest, tarball) {
393    const extractor = tar.x(this[_tarxOptions]({ cwd: dest }))
394    const p = new Promise((resolve, reject) => {
395      extractor.on('end', () => {
396        resolve({
397          resolved: this.resolved,
398          integrity: this.integrity && String(this.integrity),
399          from: this.from,
400        })
401      })
402
403      extractor.on('error', er => {
404        log.warn('tar', er.message)
405        log.silly('tar', er)
406        reject(er)
407      })
408
409      tarball.on('error', er => reject(er))
410    })
411
412    tarball.pipe(extractor)
413    return p
414  }
415
416  // always ensure that entries are at least as permissive as our configured
417  // dmode/fmode, but never more permissive than the umask allows.
418  [_entryMode] (path, mode, type) {
419    const m = /Directory|GNUDumpDir/.test(type) ? this.dmode
420      : /File$/.test(type) ? this.fmode
421      : /* istanbul ignore next - should never happen in a pkg */ 0
422
423    // make sure package bins are executable
424    const exe = isPackageBin(this.package, path) ? 0o111 : 0
425    // always ensure that files are read/writable by the owner
426    return ((mode | m) & ~this.umask) | exe | 0o600
427  }
428
429  [_tarxOptions] ({ cwd }) {
430    const sawIgnores = new Set()
431    return {
432      cwd,
433      noChmod: true,
434      noMtime: true,
435      filter: (name, entry) => {
436        if (/Link$/.test(entry.type)) {
437          return false
438        }
439        entry.mode = this[_entryMode](entry.path, entry.mode, entry.type)
440        // this replicates the npm pack behavior where .gitignore files
441        // are treated like .npmignore files, but only if a .npmignore
442        // file is not present.
443        if (/File$/.test(entry.type)) {
444          const base = basename(entry.path)
445          if (base === '.npmignore') {
446            sawIgnores.add(entry.path)
447          } else if (base === '.gitignore' && !this.allowGitIgnore) {
448            // rename, but only if there's not already a .npmignore
449            const ni = entry.path.replace(/\.gitignore$/, '.npmignore')
450            if (sawIgnores.has(ni)) {
451              return false
452            }
453            entry.path = ni
454          }
455          return true
456        }
457      },
458      strip: 1,
459      onwarn: /* istanbul ignore next - we can trust that tar logs */
460      (code, msg, data) => {
461        log.warn('tar', code, msg)
462        log.silly('tar', code, msg, data)
463      },
464      umask: this.umask,
465      // always ignore ownership info from tarball metadata
466      preserveOwner: false,
467    }
468  }
469}
470
471module.exports = FetcherBase
472
473// Child classes
474const GitFetcher = require('./git.js')
475const RegistryFetcher = require('./registry.js')
476const FileFetcher = require('./file.js')
477const DirFetcher = require('./dir.js')
478const RemoteFetcher = require('./remote.js')
479
480// Get an appropriate fetcher object from a spec and options
481FetcherBase.get = (rawSpec, opts = {}) => {
482  const spec = npa(rawSpec, opts.where)
483  switch (spec.type) {
484    case 'git':
485      return new GitFetcher(spec, opts)
486
487    case 'remote':
488      return new RemoteFetcher(spec, opts)
489
490    case 'version':
491    case 'range':
492    case 'tag':
493    case 'alias':
494      return new RegistryFetcher(spec.subSpec || spec, opts)
495
496    case 'file':
497      return new FileFetcher(spec, opts)
498
499    case 'directory':
500      return new DirFetcher(spec, opts)
501
502    default:
503      throw new TypeError('Unknown spec type: ' + spec.type)
504  }
505}
506