1'use strict' 2 3const BB = require('bluebird') 4 5const contentPath = require('./content/path') 6const figgyPudding = require('figgy-pudding') 7const finished = BB.promisify(require('mississippi').finished) 8const fixOwner = require('./util/fix-owner') 9const fs = require('graceful-fs') 10const glob = BB.promisify(require('glob')) 11const index = require('./entry-index') 12const path = require('path') 13const rimraf = BB.promisify(require('rimraf')) 14const ssri = require('ssri') 15 16BB.promisifyAll(fs) 17 18const VerifyOpts = figgyPudding({ 19 concurrency: { 20 default: 20 21 }, 22 filter: {}, 23 log: { 24 default: { silly () {} } 25 } 26}) 27 28module.exports = verify 29function verify (cache, opts) { 30 opts = VerifyOpts(opts) 31 opts.log.silly('verify', 'verifying cache at', cache) 32 return BB.reduce([ 33 markStartTime, 34 fixPerms, 35 garbageCollect, 36 rebuildIndex, 37 cleanTmp, 38 writeVerifile, 39 markEndTime 40 ], (stats, step, i) => { 41 const label = step.name || `step #${i}` 42 const start = new Date() 43 return BB.resolve(step(cache, opts)).then(s => { 44 s && Object.keys(s).forEach(k => { 45 stats[k] = s[k] 46 }) 47 const end = new Date() 48 if (!stats.runTime) { stats.runTime = {} } 49 stats.runTime[label] = end - start 50 return stats 51 }) 52 }, {}).tap(stats => { 53 stats.runTime.total = stats.endTime - stats.startTime 54 opts.log.silly('verify', 'verification finished for', cache, 'in', `${stats.runTime.total}ms`) 55 }) 56} 57 58function markStartTime (cache, opts) { 59 return { startTime: new Date() } 60} 61 62function markEndTime (cache, opts) { 63 return { endTime: new Date() } 64} 65 66function fixPerms (cache, opts) { 67 opts.log.silly('verify', 'fixing cache permissions') 68 return fixOwner.mkdirfix(cache, cache).then(() => { 69 // TODO - fix file permissions too 70 return fixOwner.chownr(cache, cache) 71 }).then(() => null) 72} 73 74// Implements a naive mark-and-sweep tracing garbage collector. 75// 76// The algorithm is basically as follows: 77// 1. Read (and filter) all index entries ("pointers") 78// 2. Mark each integrity value as "live" 79// 3. Read entire filesystem tree in `content-vX/` dir 80// 4. If content is live, verify its checksum and delete it if it fails 81// 5. If content is not marked as live, rimraf it. 82// 83function garbageCollect (cache, opts) { 84 opts.log.silly('verify', 'garbage collecting content') 85 const indexStream = index.lsStream(cache) 86 const liveContent = new Set() 87 indexStream.on('data', entry => { 88 if (opts.filter && !opts.filter(entry)) { return } 89 liveContent.add(entry.integrity.toString()) 90 }) 91 return finished(indexStream).then(() => { 92 const contentDir = contentPath._contentDir(cache) 93 return glob(path.join(contentDir, '**'), { 94 follow: false, 95 nodir: true, 96 nosort: true 97 }).then(files => { 98 return BB.resolve({ 99 verifiedContent: 0, 100 reclaimedCount: 0, 101 reclaimedSize: 0, 102 badContentCount: 0, 103 keptSize: 0 104 }).tap((stats) => BB.map(files, (f) => { 105 const split = f.split(/[/\\]/) 106 const digest = split.slice(split.length - 3).join('') 107 const algo = split[split.length - 4] 108 const integrity = ssri.fromHex(digest, algo) 109 if (liveContent.has(integrity.toString())) { 110 return verifyContent(f, integrity).then(info => { 111 if (!info.valid) { 112 stats.reclaimedCount++ 113 stats.badContentCount++ 114 stats.reclaimedSize += info.size 115 } else { 116 stats.verifiedContent++ 117 stats.keptSize += info.size 118 } 119 return stats 120 }) 121 } else { 122 // No entries refer to this content. We can delete. 123 stats.reclaimedCount++ 124 return fs.statAsync(f).then(s => { 125 return rimraf(f).then(() => { 126 stats.reclaimedSize += s.size 127 return stats 128 }) 129 }) 130 } 131 }, { concurrency: opts.concurrency })) 132 }) 133 }) 134} 135 136function verifyContent (filepath, sri) { 137 return fs.statAsync(filepath).then(stat => { 138 const contentInfo = { 139 size: stat.size, 140 valid: true 141 } 142 return ssri.checkStream( 143 fs.createReadStream(filepath), 144 sri 145 ).catch(err => { 146 if (err.code !== 'EINTEGRITY') { throw err } 147 return rimraf(filepath).then(() => { 148 contentInfo.valid = false 149 }) 150 }).then(() => contentInfo) 151 }).catch({ code: 'ENOENT' }, () => ({ size: 0, valid: false })) 152} 153 154function rebuildIndex (cache, opts) { 155 opts.log.silly('verify', 'rebuilding index') 156 return index.ls(cache).then(entries => { 157 const stats = { 158 missingContent: 0, 159 rejectedEntries: 0, 160 totalEntries: 0 161 } 162 const buckets = {} 163 for (let k in entries) { 164 if (entries.hasOwnProperty(k)) { 165 const hashed = index._hashKey(k) 166 const entry = entries[k] 167 const excluded = opts.filter && !opts.filter(entry) 168 excluded && stats.rejectedEntries++ 169 if (buckets[hashed] && !excluded) { 170 buckets[hashed].push(entry) 171 } else if (buckets[hashed] && excluded) { 172 // skip 173 } else if (excluded) { 174 buckets[hashed] = [] 175 buckets[hashed]._path = index._bucketPath(cache, k) 176 } else { 177 buckets[hashed] = [entry] 178 buckets[hashed]._path = index._bucketPath(cache, k) 179 } 180 } 181 } 182 return BB.map(Object.keys(buckets), key => { 183 return rebuildBucket(cache, buckets[key], stats, opts) 184 }, { concurrency: opts.concurrency }).then(() => stats) 185 }) 186} 187 188function rebuildBucket (cache, bucket, stats, opts) { 189 return fs.truncateAsync(bucket._path).then(() => { 190 // This needs to be serialized because cacache explicitly 191 // lets very racy bucket conflicts clobber each other. 192 return BB.mapSeries(bucket, entry => { 193 const content = contentPath(cache, entry.integrity) 194 return fs.statAsync(content).then(() => { 195 return index.insert(cache, entry.key, entry.integrity, { 196 metadata: entry.metadata, 197 size: entry.size 198 }).then(() => { stats.totalEntries++ }) 199 }).catch({ code: 'ENOENT' }, () => { 200 stats.rejectedEntries++ 201 stats.missingContent++ 202 }) 203 }) 204 }) 205} 206 207function cleanTmp (cache, opts) { 208 opts.log.silly('verify', 'cleaning tmp directory') 209 return rimraf(path.join(cache, 'tmp')) 210} 211 212function writeVerifile (cache, opts) { 213 const verifile = path.join(cache, '_lastverified') 214 opts.log.silly('verify', 'writing verifile to ' + verifile) 215 try { 216 return fs.writeFileAsync(verifile, '' + (+(new Date()))) 217 } finally { 218 fixOwner.chownr.sync(cache, verifile) 219 } 220} 221 222module.exports.lastRun = lastRun 223function lastRun (cache) { 224 return fs.readFileAsync( 225 path.join(cache, '_lastverified'), 'utf8' 226 ).then(data => new Date(+data)) 227} 228