1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "extensions/browser/content_hash_fetcher.h"
6
7 #include <algorithm>
8
9 #include "base/base64.h"
10 #include "base/file_util.h"
11 #include "base/files/file_enumerator.h"
12 #include "base/json/json_reader.h"
13 #include "base/memory/ref_counted.h"
14 #include "base/stl_util.h"
15 #include "base/synchronization/lock.h"
16 #include "base/task_runner_util.h"
17 #include "base/version.h"
18 #include "content/public/browser/browser_context.h"
19 #include "content/public/browser/browser_thread.h"
20 #include "crypto/secure_hash.h"
21 #include "crypto/sha2.h"
22 #include "extensions/browser/computed_hashes.h"
23 #include "extensions/browser/content_hash_tree.h"
24 #include "extensions/browser/extension_registry.h"
25 #include "extensions/browser/verified_contents.h"
26 #include "extensions/common/constants.h"
27 #include "extensions/common/extension.h"
28 #include "extensions/common/file_util.h"
29 #include "net/base/load_flags.h"
30 #include "net/url_request/url_fetcher.h"
31 #include "net/url_request/url_fetcher_delegate.h"
32 #include "net/url_request/url_request_status.h"
33
34 namespace {
35
36 typedef std::set<base::FilePath> SortedFilePathSet;
37
38 } // namespace
39
40 namespace extensions {
41
42 // This class takes care of doing the disk and network I/O work to ensure we
43 // have both verified_contents.json files from the webstore and
44 // computed_hashes.json files computed over the files in an extension's
45 // directory.
46 class ContentHashFetcherJob
47 : public base::RefCountedThreadSafe<ContentHashFetcherJob>,
48 public net::URLFetcherDelegate {
49 public:
50 typedef base::Callback<void(ContentHashFetcherJob*)> CompletionCallback;
51 ContentHashFetcherJob(net::URLRequestContextGetter* request_context,
52 ContentVerifierKey key,
53 const std::string& extension_id,
54 const base::FilePath& extension_path,
55 const GURL& fetch_url,
56 bool force,
57 const CompletionCallback& callback);
58
59 void Start();
60
61 // Cancels this job, which will attempt to stop I/O operations sooner than
62 // just waiting for the entire job to complete. Safe to call from any thread.
63 void Cancel();
64
65 // Checks whether this job has been cancelled. Safe to call from any thread.
66 bool IsCancelled();
67
68 // Returns whether this job was successful (we have both verified contents
69 // and computed hashes). Even if the job was a success, there might have been
70 // files that were found to have contents not matching expectations; these
71 // are available by calling hash_mismatch_paths().
success()72 bool success() { return success_; }
73
force()74 bool force() { return force_; }
75
extension_id()76 const std::string& extension_id() { return extension_id_; }
77
78 // Returns the set of paths that had a hash mismatch.
hash_mismatch_paths()79 const std::set<base::FilePath>& hash_mismatch_paths() {
80 return hash_mismatch_paths_;
81 }
82
83 private:
84 friend class base::RefCountedThreadSafe<ContentHashFetcherJob>;
85 virtual ~ContentHashFetcherJob();
86
87 // Callback for when we're done doing file I/O to see if we already have
88 // a verified contents file. If we don't, this will kick off a network
89 // request to get one.
90 void DoneCheckingForVerifiedContents(bool found);
91
92 // URLFetcherDelegate interface
93 virtual void OnURLFetchComplete(const net::URLFetcher* source) OVERRIDE;
94
95 // Callback for when we're done ensuring we have verified contents, and are
96 // ready to move on to MaybeCreateHashes.
97 void DoneFetchingVerifiedContents(bool success);
98
99 // Callback for the job to write the verified contents to the filesystem.
100 void OnVerifiedContentsWritten(size_t expected_size, int write_result);
101
102 // The verified contents file from the webstore only contains the treehash
103 // root hash, but for performance we want to cache the individual block level
104 // hashes. This function will create that cache with block-level hashes for
105 // each file in the extension if needed (the treehash root hash for each of
106 // these should equal what is in the verified contents file from the
107 // webstore).
108 void MaybeCreateHashes();
109
110 // Computes hashes for all files in |extension_path_|, and uses a
111 // ComputedHashes::Writer to write that information into
112 // |hashes_file|. Returns true on success.
113 bool CreateHashes(const base::FilePath& hashes_file);
114
115 // Will call the callback, if we haven't been cancelled.
116 void DispatchCallback();
117
118 net::URLRequestContextGetter* request_context_;
119 std::string extension_id_;
120 base::FilePath extension_path_;
121
122 // The url we'll need to use to fetch a verified_contents.json file.
123 GURL fetch_url_;
124
125 bool force_;
126
127 CompletionCallback callback_;
128 content::BrowserThread::ID creation_thread_;
129
130 // Used for fetching content signatures.
131 scoped_ptr<net::URLFetcher> url_fetcher_;
132
133 // The key used to validate verified_contents.json.
134 ContentVerifierKey key_;
135
136 // Whether this job succeeded.
137 bool success_;
138
139 // Paths that were found to have a mismatching hash.
140 std::set<base::FilePath> hash_mismatch_paths_;
141
142 // The block size to use for hashing.
143 int block_size_;
144
145 // Note: this may be accessed from multiple threads, so all access should
146 // be protected by |cancelled_lock_|.
147 bool cancelled_;
148
149 // A lock for synchronizing access to |cancelled_|.
150 base::Lock cancelled_lock_;
151
152 DISALLOW_COPY_AND_ASSIGN(ContentHashFetcherJob);
153 };
154
ContentHashFetcherJob(net::URLRequestContextGetter * request_context,ContentVerifierKey key,const std::string & extension_id,const base::FilePath & extension_path,const GURL & fetch_url,bool force,const CompletionCallback & callback)155 ContentHashFetcherJob::ContentHashFetcherJob(
156 net::URLRequestContextGetter* request_context,
157 ContentVerifierKey key,
158 const std::string& extension_id,
159 const base::FilePath& extension_path,
160 const GURL& fetch_url,
161 bool force,
162 const CompletionCallback& callback)
163 : request_context_(request_context),
164 extension_id_(extension_id),
165 extension_path_(extension_path),
166 fetch_url_(fetch_url),
167 force_(force),
168 callback_(callback),
169 key_(key),
170 success_(false),
171 // TODO(asargent) - use the value from verified_contents.json for each
172 // file, instead of using a constant.
173 block_size_(4096),
174 cancelled_(false) {
175 bool got_id =
176 content::BrowserThread::GetCurrentThreadIdentifier(&creation_thread_);
177 DCHECK(got_id);
178 }
179
Start()180 void ContentHashFetcherJob::Start() {
181 base::FilePath verified_contents_path =
182 file_util::GetVerifiedContentsPath(extension_path_);
183 base::PostTaskAndReplyWithResult(
184 content::BrowserThread::GetBlockingPool(),
185 FROM_HERE,
186 base::Bind(&base::PathExists, verified_contents_path),
187 base::Bind(&ContentHashFetcherJob::DoneCheckingForVerifiedContents,
188 this));
189 }
190
Cancel()191 void ContentHashFetcherJob::Cancel() {
192 base::AutoLock autolock(cancelled_lock_);
193 cancelled_ = true;
194 }
195
IsCancelled()196 bool ContentHashFetcherJob::IsCancelled() {
197 base::AutoLock autolock(cancelled_lock_);
198 bool result = cancelled_;
199 return result;
200 }
201
~ContentHashFetcherJob()202 ContentHashFetcherJob::~ContentHashFetcherJob() {
203 }
204
DoneCheckingForVerifiedContents(bool found)205 void ContentHashFetcherJob::DoneCheckingForVerifiedContents(bool found) {
206 if (IsCancelled())
207 return;
208 if (found) {
209 VLOG(1) << "Found verified contents for " << extension_id_;
210 DoneFetchingVerifiedContents(true);
211 } else {
212 VLOG(1) << "Missing verified contents for " << extension_id_
213 << ", fetching...";
214 url_fetcher_.reset(
215 net::URLFetcher::Create(fetch_url_, net::URLFetcher::GET, this));
216 url_fetcher_->SetRequestContext(request_context_);
217 url_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SEND_COOKIES |
218 net::LOAD_DO_NOT_SAVE_COOKIES |
219 net::LOAD_DISABLE_CACHE);
220 url_fetcher_->SetAutomaticallyRetryOnNetworkChanges(3);
221 url_fetcher_->Start();
222 }
223 }
224
225 // Helper function to let us pass ownership of a string via base::Bind with the
226 // contents to be written into a file. Also ensures that the directory for
227 // |path| exists, creating it if needed.
WriteFileHelper(const base::FilePath & path,scoped_ptr<std::string> content)228 static int WriteFileHelper(const base::FilePath& path,
229 scoped_ptr<std::string> content) {
230 base::FilePath dir = path.DirName();
231 return (base::CreateDirectoryAndGetError(dir, NULL) &&
232 base::WriteFile(path, content->data(), content->size()));
233 }
234
OnURLFetchComplete(const net::URLFetcher * source)235 void ContentHashFetcherJob::OnURLFetchComplete(const net::URLFetcher* source) {
236 VLOG(1) << "URLFetchComplete for " << extension_id_
237 << " is_success:" << url_fetcher_->GetStatus().is_success() << " "
238 << fetch_url_.possibly_invalid_spec();
239 if (IsCancelled())
240 return;
241 scoped_ptr<std::string> response(new std::string);
242 if (!url_fetcher_->GetStatus().is_success() ||
243 !url_fetcher_->GetResponseAsString(response.get())) {
244 DoneFetchingVerifiedContents(false);
245 return;
246 }
247
248 // Parse the response to make sure it is valid json (on staging sometimes it
249 // can be a login redirect html, xml file, etc. if you aren't logged in with
250 // the right cookies). TODO(asargent) - It would be a nice enhancement to
251 // move to parsing this in a sandboxed helper (crbug.com/372878).
252 scoped_ptr<base::Value> parsed(base::JSONReader::Read(*response));
253 if (parsed) {
254 VLOG(1) << "JSON parsed ok for " << extension_id_;
255
256 parsed.reset(); // no longer needed
257 base::FilePath destination =
258 file_util::GetVerifiedContentsPath(extension_path_);
259 size_t size = response->size();
260 base::PostTaskAndReplyWithResult(
261 content::BrowserThread::GetBlockingPool(),
262 FROM_HERE,
263 base::Bind(&WriteFileHelper, destination, base::Passed(&response)),
264 base::Bind(
265 &ContentHashFetcherJob::OnVerifiedContentsWritten, this, size));
266 } else {
267 DoneFetchingVerifiedContents(false);
268 }
269 }
270
OnVerifiedContentsWritten(size_t expected_size,int write_result)271 void ContentHashFetcherJob::OnVerifiedContentsWritten(size_t expected_size,
272 int write_result) {
273 bool success =
274 (write_result >= 0 && static_cast<size_t>(write_result) == expected_size);
275 DoneFetchingVerifiedContents(success);
276 }
277
DoneFetchingVerifiedContents(bool success)278 void ContentHashFetcherJob::DoneFetchingVerifiedContents(bool success) {
279 if (IsCancelled())
280 return;
281
282 if (!success) {
283 DispatchCallback();
284 return;
285 }
286
287 content::BrowserThread::PostBlockingPoolSequencedTask(
288 "ContentHashFetcher",
289 FROM_HERE,
290 base::Bind(&ContentHashFetcherJob::MaybeCreateHashes, this));
291 }
292
MaybeCreateHashes()293 void ContentHashFetcherJob::MaybeCreateHashes() {
294 if (IsCancelled())
295 return;
296 base::FilePath hashes_file =
297 file_util::GetComputedHashesPath(extension_path_);
298
299 if (!force_ && base::PathExists(hashes_file)) {
300 success_ = true;
301 } else {
302 if (force_)
303 base::DeleteFile(hashes_file, false /* recursive */);
304 success_ = CreateHashes(hashes_file);
305 }
306
307 content::BrowserThread::PostTask(
308 creation_thread_,
309 FROM_HERE,
310 base::Bind(&ContentHashFetcherJob::DispatchCallback, this));
311 }
312
CreateHashes(const base::FilePath & hashes_file)313 bool ContentHashFetcherJob::CreateHashes(const base::FilePath& hashes_file) {
314 if (IsCancelled())
315 return false;
316 // Make sure the directory exists.
317 if (!base::CreateDirectoryAndGetError(hashes_file.DirName(), NULL))
318 return false;
319
320 base::FilePath verified_contents_path =
321 file_util::GetVerifiedContentsPath(extension_path_);
322 VerifiedContents verified_contents(key_.data, key_.size);
323 if (!verified_contents.InitFrom(verified_contents_path, false))
324 return false;
325
326 base::FileEnumerator enumerator(extension_path_,
327 true, /* recursive */
328 base::FileEnumerator::FILES);
329 // First discover all the file paths and put them in a sorted set.
330 SortedFilePathSet paths;
331 for (;;) {
332 if (IsCancelled())
333 return false;
334
335 base::FilePath full_path = enumerator.Next();
336 if (full_path.empty())
337 break;
338 paths.insert(full_path);
339 }
340
341 // Now iterate over all the paths in sorted order and compute the block hashes
342 // for each one.
343 ComputedHashes::Writer writer;
344 for (SortedFilePathSet::iterator i = paths.begin(); i != paths.end(); ++i) {
345 if (IsCancelled())
346 return false;
347 const base::FilePath& full_path = *i;
348 base::FilePath relative_path;
349 extension_path_.AppendRelativePath(full_path, &relative_path);
350
351 const std::string* expected_root =
352 verified_contents.GetTreeHashRoot(relative_path);
353 if (!expected_root)
354 continue;
355
356 std::string contents;
357 if (!base::ReadFileToString(full_path, &contents)) {
358 LOG(ERROR) << "Could not read " << full_path.MaybeAsASCII();
359 continue;
360 }
361
362 // Iterate through taking the hash of each block of size (block_size_) of
363 // the file.
364 std::vector<std::string> hashes;
365 size_t offset = 0;
366 while (offset < contents.size()) {
367 if (IsCancelled())
368 return false;
369 const char* block_start = contents.data() + offset;
370 size_t bytes_to_read =
371 std::min(contents.size() - offset, static_cast<size_t>(block_size_));
372 DCHECK(bytes_to_read > 0);
373 scoped_ptr<crypto::SecureHash> hash(
374 crypto::SecureHash::Create(crypto::SecureHash::SHA256));
375 hash->Update(block_start, bytes_to_read);
376
377 hashes.push_back(std::string());
378 std::string* buffer = &hashes.back();
379 buffer->resize(crypto::kSHA256Length);
380 hash->Finish(string_as_array(buffer), buffer->size());
381
382 // Get ready for next iteration.
383 offset += bytes_to_read;
384 }
385 std::string root =
386 ComputeTreeHashRoot(hashes, block_size_ / crypto::kSHA256Length);
387 if (expected_root && *expected_root != root) {
388 VLOG(1) << "content mismatch for " << relative_path.AsUTF8Unsafe();
389 hash_mismatch_paths_.insert(relative_path);
390 continue;
391 }
392
393 writer.AddHashes(relative_path, block_size_, hashes);
394 }
395 return writer.WriteToFile(hashes_file);
396 }
397
DispatchCallback()398 void ContentHashFetcherJob::DispatchCallback() {
399 {
400 base::AutoLock autolock(cancelled_lock_);
401 if (cancelled_)
402 return;
403 }
404 callback_.Run(this);
405 }
406
407 // ----
408
ContentHashFetcher(content::BrowserContext * context,ContentVerifierDelegate * delegate,const FetchCallback & callback)409 ContentHashFetcher::ContentHashFetcher(content::BrowserContext* context,
410 ContentVerifierDelegate* delegate,
411 const FetchCallback& callback)
412 : context_(context),
413 delegate_(delegate),
414 fetch_callback_(callback),
415 observer_(this),
416 weak_ptr_factory_(this) {
417 }
418
~ContentHashFetcher()419 ContentHashFetcher::~ContentHashFetcher() {
420 for (JobMap::iterator i = jobs_.begin(); i != jobs_.end(); ++i) {
421 i->second->Cancel();
422 }
423 }
424
Start()425 void ContentHashFetcher::Start() {
426 ExtensionRegistry* registry = ExtensionRegistry::Get(context_);
427 observer_.Add(registry);
428 }
429
DoFetch(const Extension * extension,bool force)430 void ContentHashFetcher::DoFetch(const Extension* extension, bool force) {
431 if (!extension || !delegate_->ShouldBeVerified(*extension))
432 return;
433
434 IdAndVersion key(extension->id(), extension->version()->GetString());
435 JobMap::iterator found = jobs_.find(key);
436 if (found != jobs_.end()) {
437 if (!force || found->second->force()) {
438 // Just let the existing job keep running.
439 return;
440 } else {
441 // Kill the existing non-force job, so we can start a new one below.
442 found->second->Cancel();
443 jobs_.erase(found);
444 }
445 }
446
447 // TODO(asargent) - we should do something here to remember recent attempts
448 // to fetch signatures by extension id, and use exponential backoff to avoid
449 // hammering the server when we aren't successful in getting them.
450 // crbug.com/373397
451
452 DCHECK(extension->version());
453 GURL url =
454 delegate_->GetSignatureFetchUrl(extension->id(), *extension->version());
455 ContentHashFetcherJob* job =
456 new ContentHashFetcherJob(context_->GetRequestContext(),
457 delegate_->PublicKey(),
458 extension->id(),
459 extension->path(),
460 url,
461 force,
462 base::Bind(&ContentHashFetcher::JobFinished,
463 weak_ptr_factory_.GetWeakPtr()));
464 jobs_.insert(std::make_pair(key, job));
465 job->Start();
466 }
467
OnExtensionLoaded(content::BrowserContext * browser_context,const Extension * extension)468 void ContentHashFetcher::OnExtensionLoaded(
469 content::BrowserContext* browser_context,
470 const Extension* extension) {
471 CHECK(extension);
472 DoFetch(extension, false);
473 }
474
OnExtensionUnloaded(content::BrowserContext * browser_context,const Extension * extension,UnloadedExtensionInfo::Reason reason)475 void ContentHashFetcher::OnExtensionUnloaded(
476 content::BrowserContext* browser_context,
477 const Extension* extension,
478 UnloadedExtensionInfo::Reason reason) {
479 CHECK(extension);
480 IdAndVersion key(extension->id(), extension->version()->GetString());
481 JobMap::iterator found = jobs_.find(key);
482 if (found != jobs_.end()) {
483 found->second->Cancel();
484 jobs_.erase(found);
485 }
486 }
487
JobFinished(ContentHashFetcherJob * job)488 void ContentHashFetcher::JobFinished(ContentHashFetcherJob* job) {
489 if (!job->IsCancelled()) {
490 fetch_callback_.Run(job->extension_id(),
491 job->success(),
492 job->force(),
493 job->hash_mismatch_paths());
494 }
495
496 for (JobMap::iterator i = jobs_.begin(); i != jobs_.end(); ++i) {
497 if (i->second.get() == job) {
498 jobs_.erase(i);
499 break;
500 }
501 }
502 }
503
504 } // namespace extensions
505