• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1'use strict'
2
3const BB = require('bluebird')
4
5const contentPath = require('./content/path')
6const figgyPudding = require('figgy-pudding')
7const finished = BB.promisify(require('mississippi').finished)
8const fixOwner = require('./util/fix-owner')
9const fs = require('graceful-fs')
10const glob = BB.promisify(require('glob'))
11const index = require('./entry-index')
12const path = require('path')
13const rimraf = BB.promisify(require('rimraf'))
14const ssri = require('ssri')
15
16BB.promisifyAll(fs)
17
18const VerifyOpts = figgyPudding({
19  concurrency: {
20    default: 20
21  },
22  filter: {},
23  log: {
24    default: { silly () {} }
25  }
26})
27
28module.exports = verify
29function verify (cache, opts) {
30  opts = VerifyOpts(opts)
31  opts.log.silly('verify', 'verifying cache at', cache)
32  return BB.reduce([
33    markStartTime,
34    fixPerms,
35    garbageCollect,
36    rebuildIndex,
37    cleanTmp,
38    writeVerifile,
39    markEndTime
40  ], (stats, step, i) => {
41    const label = step.name || `step #${i}`
42    const start = new Date()
43    return BB.resolve(step(cache, opts)).then(s => {
44      s && Object.keys(s).forEach(k => {
45        stats[k] = s[k]
46      })
47      const end = new Date()
48      if (!stats.runTime) { stats.runTime = {} }
49      stats.runTime[label] = end - start
50      return stats
51    })
52  }, {}).tap(stats => {
53    stats.runTime.total = stats.endTime - stats.startTime
54    opts.log.silly('verify', 'verification finished for', cache, 'in', `${stats.runTime.total}ms`)
55  })
56}
57
58function markStartTime (cache, opts) {
59  return { startTime: new Date() }
60}
61
62function markEndTime (cache, opts) {
63  return { endTime: new Date() }
64}
65
66function fixPerms (cache, opts) {
67  opts.log.silly('verify', 'fixing cache permissions')
68  return fixOwner.mkdirfix(cache, cache).then(() => {
69    // TODO - fix file permissions too
70    return fixOwner.chownr(cache, cache)
71  }).then(() => null)
72}
73
74// Implements a naive mark-and-sweep tracing garbage collector.
75//
76// The algorithm is basically as follows:
77// 1. Read (and filter) all index entries ("pointers")
78// 2. Mark each integrity value as "live"
79// 3. Read entire filesystem tree in `content-vX/` dir
80// 4. If content is live, verify its checksum and delete it if it fails
81// 5. If content is not marked as live, rimraf it.
82//
83function garbageCollect (cache, opts) {
84  opts.log.silly('verify', 'garbage collecting content')
85  const indexStream = index.lsStream(cache)
86  const liveContent = new Set()
87  indexStream.on('data', entry => {
88    if (opts.filter && !opts.filter(entry)) { return }
89    liveContent.add(entry.integrity.toString())
90  })
91  return finished(indexStream).then(() => {
92    const contentDir = contentPath._contentDir(cache)
93    return glob(path.join(contentDir, '**'), {
94      follow: false,
95      nodir: true,
96      nosort: true
97    }).then(files => {
98      return BB.resolve({
99        verifiedContent: 0,
100        reclaimedCount: 0,
101        reclaimedSize: 0,
102        badContentCount: 0,
103        keptSize: 0
104      }).tap((stats) => BB.map(files, (f) => {
105        const split = f.split(/[/\\]/)
106        const digest = split.slice(split.length - 3).join('')
107        const algo = split[split.length - 4]
108        const integrity = ssri.fromHex(digest, algo)
109        if (liveContent.has(integrity.toString())) {
110          return verifyContent(f, integrity).then(info => {
111            if (!info.valid) {
112              stats.reclaimedCount++
113              stats.badContentCount++
114              stats.reclaimedSize += info.size
115            } else {
116              stats.verifiedContent++
117              stats.keptSize += info.size
118            }
119            return stats
120          })
121        } else {
122          // No entries refer to this content. We can delete.
123          stats.reclaimedCount++
124          return fs.statAsync(f).then(s => {
125            return rimraf(f).then(() => {
126              stats.reclaimedSize += s.size
127              return stats
128            })
129          })
130        }
131      }, { concurrency: opts.concurrency }))
132    })
133  })
134}
135
136function verifyContent (filepath, sri) {
137  return fs.statAsync(filepath).then(stat => {
138    const contentInfo = {
139      size: stat.size,
140      valid: true
141    }
142    return ssri.checkStream(
143      fs.createReadStream(filepath),
144      sri
145    ).catch(err => {
146      if (err.code !== 'EINTEGRITY') { throw err }
147      return rimraf(filepath).then(() => {
148        contentInfo.valid = false
149      })
150    }).then(() => contentInfo)
151  }).catch({ code: 'ENOENT' }, () => ({ size: 0, valid: false }))
152}
153
154function rebuildIndex (cache, opts) {
155  opts.log.silly('verify', 'rebuilding index')
156  return index.ls(cache).then(entries => {
157    const stats = {
158      missingContent: 0,
159      rejectedEntries: 0,
160      totalEntries: 0
161    }
162    const buckets = {}
163    for (let k in entries) {
164      if (entries.hasOwnProperty(k)) {
165        const hashed = index._hashKey(k)
166        const entry = entries[k]
167        const excluded = opts.filter && !opts.filter(entry)
168        excluded && stats.rejectedEntries++
169        if (buckets[hashed] && !excluded) {
170          buckets[hashed].push(entry)
171        } else if (buckets[hashed] && excluded) {
172          // skip
173        } else if (excluded) {
174          buckets[hashed] = []
175          buckets[hashed]._path = index._bucketPath(cache, k)
176        } else {
177          buckets[hashed] = [entry]
178          buckets[hashed]._path = index._bucketPath(cache, k)
179        }
180      }
181    }
182    return BB.map(Object.keys(buckets), key => {
183      return rebuildBucket(cache, buckets[key], stats, opts)
184    }, { concurrency: opts.concurrency }).then(() => stats)
185  })
186}
187
188function rebuildBucket (cache, bucket, stats, opts) {
189  return fs.truncateAsync(bucket._path).then(() => {
190    // This needs to be serialized because cacache explicitly
191    // lets very racy bucket conflicts clobber each other.
192    return BB.mapSeries(bucket, entry => {
193      const content = contentPath(cache, entry.integrity)
194      return fs.statAsync(content).then(() => {
195        return index.insert(cache, entry.key, entry.integrity, {
196          metadata: entry.metadata,
197          size: entry.size
198        }).then(() => { stats.totalEntries++ })
199      }).catch({ code: 'ENOENT' }, () => {
200        stats.rejectedEntries++
201        stats.missingContent++
202      })
203    })
204  })
205}
206
207function cleanTmp (cache, opts) {
208  opts.log.silly('verify', 'cleaning tmp directory')
209  return rimraf(path.join(cache, 'tmp'))
210}
211
212function writeVerifile (cache, opts) {
213  const verifile = path.join(cache, '_lastverified')
214  opts.log.silly('verify', 'writing verifile to ' + verifile)
215  try {
216    return fs.writeFileAsync(verifile, '' + (+(new Date())))
217  } finally {
218    fixOwner.chownr.sync(cache, verifile)
219  }
220}
221
222module.exports.lastRun = lastRun
223function lastRun (cache) {
224  return fs.readFileAsync(
225    path.join(cache, '_lastverified'), 'utf8'
226  ).then(data => new Date(+data))
227}
228