1// mix-in implementing the loadActual method 2 3const { relative, dirname, resolve, join, normalize } = require('path') 4 5const rpj = require('read-package-json-fast') 6const { readdirScoped } = require('@npmcli/fs') 7const { walkUp } = require('walk-up-path') 8const ancestorPath = require('common-ancestor-path') 9const treeCheck = require('../tree-check.js') 10 11const Shrinkwrap = require('../shrinkwrap.js') 12const calcDepFlags = require('../calc-dep-flags.js') 13const Node = require('../node.js') 14const Link = require('../link.js') 15const realpath = require('../realpath.js') 16 17// public symbols 18const _changePath = Symbol.for('_changePath') 19const _setWorkspaces = Symbol.for('setWorkspaces') 20const _rpcache = Symbol.for('realpathCache') 21const _stcache = Symbol.for('statCache') 22 23module.exports = cls => class ActualLoader extends cls { 24 #actualTree 25 // ensure when walking the tree that we don't call loadTree on the same 26 // actual node more than one time. 27 #actualTreeLoaded = new Set() 28 #actualTreePromise 29 30 // cache of nodes when loading the actualTree, so that we avoid loaded the 31 // same node multiple times when symlinks attack. 32 #cache = new Map() 33 #filter 34 35 // cache of link targets for setting fsParent links 36 // We don't do fsParent as a magic getter/setter, because it'd be too costly 37 // to keep up to date along the walk. 38 // And, we know that it can ONLY be relevant when the node is a target of a 39 // link, otherwise it'd be in a node_modules folder, so take advantage of 40 // that to limit the scans later. 41 #topNodes = new Set() 42 #transplantFilter 43 44 constructor (options) { 45 super(options) 46 47 // the tree of nodes on disk 48 this.actualTree = options.actualTree 49 50 // caches for cached realpath calls 51 const cwd = process.cwd() 52 // assume that the cwd is real enough for our purposes 53 this[_rpcache] = new Map([[cwd, cwd]]) 54 this[_stcache] = new Map() 55 } 56 57 // public method 58 // TODO remove options param in next semver major 59 async loadActual (options = {}) { 60 // In the past this.actualTree was set as a promise that eventually 61 // resolved, and overwrite this.actualTree with the resolved value. This 62 // was a problem because virtually no other code expects this.actualTree to 63 // be a promise. Instead we only set it once resolved, and also return it 64 // from the promise so that it is what's returned from this function when 65 // awaited. 66 if (this.actualTree) { 67 return this.actualTree 68 } 69 if (!this.#actualTreePromise) { 70 // allow the user to set options on the ctor as well. 71 // XXX: deprecate separate method options objects. 72 options = { ...this.options, ...options } 73 74 this.#actualTreePromise = this.#loadActual(options) 75 .then(tree => { 76 // reset all deps to extraneous prior to recalc 77 if (!options.root) { 78 for (const node of tree.inventory.values()) { 79 node.extraneous = true 80 } 81 } 82 83 // only reset root flags if we're not re-rooting, 84 // otherwise leave as-is 85 calcDepFlags(tree, !options.root) 86 this.actualTree = treeCheck(tree) 87 return this.actualTree 88 }) 89 } 90 return this.#actualTreePromise 91 } 92 93 // return the promise so that we don't ever have more than one going at the 94 // same time. This is so that buildIdealTree can default to the actualTree 95 // if no shrinkwrap present, but reify() can still call buildIdealTree and 96 // loadActual in parallel safely. 97 98 async #loadActual (options) { 99 // mostly realpath to throw if the root doesn't exist 100 const { 101 global, 102 filter = () => true, 103 root = null, 104 transplantFilter = () => true, 105 ignoreMissing = false, 106 forceActual = false, 107 } = options 108 this.#filter = filter 109 this.#transplantFilter = transplantFilter 110 111 if (global) { 112 const real = await realpath(this.path, this[_rpcache], this[_stcache]) 113 const params = { 114 path: this.path, 115 realpath: real, 116 pkg: {}, 117 global, 118 loadOverrides: true, 119 } 120 if (this.path === real) { 121 this.#actualTree = this.#newNode(params) 122 } else { 123 this.#actualTree = await this.#newLink(params) 124 } 125 } else { 126 // not in global mode, hidden lockfile is allowed, load root pkg too 127 this.#actualTree = await this.#loadFSNode({ 128 path: this.path, 129 real: await realpath(this.path, this[_rpcache], this[_stcache]), 130 loadOverrides: true, 131 }) 132 133 this.#actualTree.assertRootOverrides() 134 135 // if forceActual is set, don't even try the hidden lockfile 136 if (!forceActual) { 137 // Note: hidden lockfile will be rejected if it's not the latest thing 138 // in the folder, or if any of the entries in the hidden lockfile are 139 // missing. 140 const meta = await Shrinkwrap.load({ 141 path: this.#actualTree.path, 142 hiddenLockfile: true, 143 resolveOptions: this.options, 144 }) 145 146 if (meta.loadedFromDisk) { 147 this.#actualTree.meta = meta 148 // have to load on a new Arborist object, so we don't assign 149 // the virtualTree on this one! Also, the weird reference is because 150 // we can't easily get a ref to Arborist in this module, without 151 // creating a circular reference, since this class is a mixin used 152 // to build up the Arborist class itself. 153 await new this.constructor({ ...this.options }).loadVirtual({ 154 root: this.#actualTree, 155 }) 156 await this[_setWorkspaces](this.#actualTree) 157 158 this.#transplant(root) 159 return this.#actualTree 160 } 161 } 162 163 const meta = await Shrinkwrap.load({ 164 path: this.#actualTree.path, 165 lockfileVersion: this.options.lockfileVersion, 166 resolveOptions: this.options, 167 }) 168 this.#actualTree.meta = meta 169 } 170 171 await this.#loadFSTree(this.#actualTree) 172 await this[_setWorkspaces](this.#actualTree) 173 174 // if there are workspace targets without Link nodes created, load 175 // the targets, so that we know what they are. 176 if (this.#actualTree.workspaces && this.#actualTree.workspaces.size) { 177 const promises = [] 178 for (const path of this.#actualTree.workspaces.values()) { 179 if (!this.#cache.has(path)) { 180 // workspace overrides use the root overrides 181 const p = this.#loadFSNode({ path, root: this.#actualTree, useRootOverrides: true }) 182 .then(node => this.#loadFSTree(node)) 183 promises.push(p) 184 } 185 } 186 await Promise.all(promises) 187 } 188 189 if (!ignoreMissing) { 190 await this.#findMissingEdges() 191 } 192 193 // try to find a node that is the parent in a fs tree sense, but not a 194 // node_modules tree sense, of any link targets. this allows us to 195 // resolve deps that node will find, but a legacy npm view of the 196 // world would not have noticed. 197 for (const path of this.#topNodes) { 198 const node = this.#cache.get(path) 199 if (node && !node.parent && !node.fsParent) { 200 for (const p of walkUp(dirname(path))) { 201 if (this.#cache.has(p)) { 202 node.fsParent = this.#cache.get(p) 203 break 204 } 205 } 206 } 207 } 208 209 this.#transplant(root) 210 211 if (global) { 212 // need to depend on the children, or else all of them 213 // will end up being flagged as extraneous, since the 214 // global root isn't a "real" project 215 const tree = this.#actualTree 216 const actualRoot = tree.isLink ? tree.target : tree 217 const { dependencies = {} } = actualRoot.package 218 for (const [name, kid] of actualRoot.children.entries()) { 219 const def = kid.isLink ? `file:${kid.realpath.replace(/#/g, '%23')}` : '*' 220 dependencies[name] = dependencies[name] || def 221 } 222 actualRoot.package = { ...actualRoot.package, dependencies } 223 } 224 return this.#actualTree 225 } 226 227 #transplant (root) { 228 if (!root || root === this.#actualTree) { 229 return 230 } 231 232 this.#actualTree[_changePath](root.path) 233 for (const node of this.#actualTree.children.values()) { 234 if (!this.#transplantFilter(node)) { 235 node.root = null 236 } 237 } 238 239 root.replace(this.#actualTree) 240 for (const node of this.#actualTree.fsChildren) { 241 node.root = this.#transplantFilter(node) ? root : null 242 } 243 244 this.#actualTree = root 245 } 246 247 async #loadFSNode ({ path, parent, real, root, loadOverrides, useRootOverrides }) { 248 if (!real) { 249 try { 250 real = await realpath(path, this[_rpcache], this[_stcache]) 251 } catch (error) { 252 // if realpath fails, just provide a dummy error node 253 return new Node({ 254 error, 255 path, 256 realpath: path, 257 parent, 258 root, 259 loadOverrides, 260 }) 261 } 262 } 263 264 const cached = this.#cache.get(path) 265 let node 266 // missing edges get a dummy node, assign the parent and return it 267 if (cached && !cached.dummy) { 268 cached.parent = parent 269 return cached 270 } else { 271 const params = { 272 installLinks: this.installLinks, 273 legacyPeerDeps: this.legacyPeerDeps, 274 path, 275 realpath: real, 276 parent, 277 root, 278 loadOverrides, 279 } 280 281 try { 282 const pkg = await rpj(join(real, 'package.json')) 283 params.pkg = pkg 284 if (useRootOverrides && root.overrides) { 285 params.overrides = root.overrides.getNodeRule({ name: pkg.name, version: pkg.version }) 286 } 287 } catch (err) { 288 params.error = err 289 } 290 291 // soldier on if read-package-json raises an error, passing it to the 292 // Node which will attach it to its errors array (Link passes it along to 293 // its target node) 294 if (normalize(path) === real) { 295 node = this.#newNode(params) 296 } else { 297 node = await this.#newLink(params) 298 } 299 } 300 this.#cache.set(path, node) 301 return node 302 } 303 304 #newNode (options) { 305 // check it for an fsParent if it's a tree top. there's a decent chance 306 // it'll get parented later, making the fsParent scan a no-op, but better 307 // safe than sorry, since it's cheap. 308 const { parent, realpath } = options 309 if (!parent) { 310 this.#topNodes.add(realpath) 311 } 312 return new Node(options) 313 } 314 315 async #newLink (options) { 316 const { realpath } = options 317 this.#topNodes.add(realpath) 318 const target = this.#cache.get(realpath) 319 const link = new Link({ ...options, target }) 320 321 if (!target) { 322 // Link set its target itself in this case 323 this.#cache.set(realpath, link.target) 324 // if a link target points at a node outside of the root tree's 325 // node_modules hierarchy, then load that node as well. 326 await this.#loadFSTree(link.target) 327 } 328 329 return link 330 } 331 332 async #loadFSTree (node) { 333 const did = this.#actualTreeLoaded 334 if (!node.isLink && !did.has(node.target.realpath)) { 335 did.add(node.target.realpath) 336 await this.#loadFSChildren(node.target) 337 return Promise.all( 338 [...node.target.children.entries()] 339 .filter(([name, kid]) => !did.has(kid.realpath)) 340 .map(([name, kid]) => this.#loadFSTree(kid)) 341 ) 342 } 343 } 344 345 // create child nodes for all the entries in node_modules 346 // and attach them to the node as a parent 347 async #loadFSChildren (node) { 348 const nm = resolve(node.realpath, 'node_modules') 349 try { 350 const kids = await readdirScoped(nm).then(paths => paths.map(p => p.replace(/\\/g, '/'))) 351 return Promise.all( 352 // ignore . dirs and retired scoped package folders 353 kids.filter(kid => !/^(@[^/]+\/)?\./.test(kid)) 354 .filter(kid => this.#filter(node, kid)) 355 .map(kid => this.#loadFSNode({ 356 parent: node, 357 path: resolve(nm, kid), 358 }))) 359 } catch { 360 // error in the readdir is not fatal, just means no kids 361 } 362 } 363 364 async #findMissingEdges () { 365 // try to resolve any missing edges by walking up the directory tree, 366 // checking for the package in each node_modules folder. stop at the 367 // root directory. 368 // The tricky move here is that we load a "dummy" node for the folder 369 // containing the node_modules folder, so that it can be assigned as 370 // the fsParent. It's a bad idea to *actually* load that full node, 371 // because people sometimes develop in ~/projects/node_modules/... 372 // so we'd end up loading a massive tree with lots of unrelated junk. 373 const nmContents = new Map() 374 const tree = this.#actualTree 375 for (const node of tree.inventory.values()) { 376 const ancestor = ancestorPath(node.realpath, this.path) 377 378 const depPromises = [] 379 for (const [name, edge] of node.edgesOut.entries()) { 380 const notMissing = !edge.missing && 381 !(edge.to && (edge.to.dummy || edge.to.parent !== node)) 382 if (notMissing) { 383 continue 384 } 385 386 // start the walk from the dirname, because we would have found 387 // the dep in the loadFSTree step already if it was local. 388 for (const p of walkUp(dirname(node.realpath))) { 389 // only walk as far as the nearest ancestor 390 // this keeps us from going into completely unrelated 391 // places when a project is just missing something, but 392 // allows for finding the transitive deps of link targets. 393 // ie, if it has to go up and back out to get to the path 394 // from the nearest common ancestor, we've gone too far. 395 if (ancestor && /^\.\.(?:[\\/]|$)/.test(relative(ancestor, p))) { 396 break 397 } 398 399 let entries 400 if (!nmContents.has(p)) { 401 entries = await readdirScoped(p + '/node_modules') 402 .catch(() => []).then(paths => paths.map(p => p.replace(/\\/g, '/'))) 403 nmContents.set(p, entries) 404 } else { 405 entries = nmContents.get(p) 406 } 407 408 if (!entries.includes(name)) { 409 continue 410 } 411 412 let d 413 if (!this.#cache.has(p)) { 414 d = new Node({ path: p, root: node.root, dummy: true }) 415 this.#cache.set(p, d) 416 } else { 417 d = this.#cache.get(p) 418 } 419 if (d.dummy) { 420 // it's a placeholder, so likely would not have loaded this dep, 421 // unless another dep in the tree also needs it. 422 const depPath = normalize(`${p}/node_modules/${name}`) 423 const cached = this.#cache.get(depPath) 424 if (!cached || cached.dummy) { 425 depPromises.push(this.#loadFSNode({ 426 path: depPath, 427 root: node.root, 428 parent: d, 429 }).then(node => this.#loadFSTree(node))) 430 } 431 } 432 break 433 } 434 } 435 await Promise.all(depPromises) 436 } 437 } 438} 439