1 // Copyright 2016 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/metrics/file_metrics_provider.h"
6
7 #include <stddef.h>
8
9 #include <memory>
10 #include <vector>
11
12 #include "base/command_line.h"
13 #include "base/containers/flat_map.h"
14 #include "base/feature_list.h"
15 #include "base/files/file.h"
16 #include "base/files/file_enumerator.h"
17 #include "base/files/file_util.h"
18 #include "base/files/memory_mapped_file.h"
19 #include "base/functional/bind.h"
20 #include "base/logging.h"
21 #include "base/metrics/histogram_base.h"
22 #include "base/metrics/histogram_functions.h"
23 #include "base/metrics/histogram_macros.h"
24 #include "base/metrics/persistent_histogram_allocator.h"
25 #include "base/metrics/persistent_memory_allocator.h"
26 #include "base/metrics/ranges_manager.h"
27 #include "base/strings/string_piece.h"
28 #include "base/strings/stringprintf.h"
29 #include "base/task/single_thread_task_runner.h"
30 #include "base/task/task_runner.h"
31 #include "base/task/task_traits.h"
32 #include "base/task/thread_pool.h"
33 #include "base/time/time.h"
34 #include "components/metrics/metrics_pref_names.h"
35 #include "components/metrics/metrics_service.h"
36 #include "components/metrics/persistent_histograms.h"
37 #include "components/metrics/persistent_system_profile.h"
38 #include "components/prefs/pref_registry_simple.h"
39 #include "components/prefs/pref_service.h"
40 #include "components/prefs/scoped_user_pref_update.h"
41
42 namespace metrics {
43 namespace {
44
45 // These structures provide values used to define how files are opened and
46 // accessed. It obviates the need for multiple code-paths within several of
47 // the methods.
48 struct SourceOptions {
49 // The flags to be used to open a file on disk.
50 int file_open_flags;
51
52 // The access mode to be used when mapping a file into memory.
53 base::MemoryMappedFile::Access memory_mapped_access;
54
55 // Indicates if the file is to be accessed read-only.
56 bool is_read_only;
57 };
58
59 // Opening a file typically requires at least these flags.
60 constexpr int STD_OPEN = base::File::FLAG_OPEN | base::File::FLAG_READ;
61
62 constexpr SourceOptions kSourceOptions[] = {
63 // SOURCE_HISTOGRAMS_ATOMIC_FILE
64 {
65 // Ensure that no other process reads this at the same time.
66 STD_OPEN | base::File::FLAG_WIN_EXCLUSIVE_READ,
67 base::MemoryMappedFile::READ_ONLY,
68 true,
69 },
70 // SOURCE_HISTOGRAMS_ATOMIC_DIR
71 {
72 // Ensure that no other process reads this at the same time.
73 STD_OPEN | base::File::FLAG_WIN_EXCLUSIVE_READ,
74 base::MemoryMappedFile::READ_ONLY,
75 true,
76 },
77 // SOURCE_HISTOGRAMS_ACTIVE_FILE
78 {
79 // Allow writing to the file. This is needed so we can keep track of
80 // deltas that have been uploaded (by modifying the file), while the
81 // file may still be open by an external process (e.g. Crashpad).
82 STD_OPEN | base::File::FLAG_WRITE,
83 base::MemoryMappedFile::READ_WRITE,
84 false,
85 },
86 };
87
DeleteFileWhenPossible(const base::FilePath & path)88 void DeleteFileWhenPossible(const base::FilePath& path) {
89 // Open (with delete) and then immediately close the file by going out of
90 // scope. This is the only cross-platform safe way to delete a file that may
91 // be open elsewhere, a distinct possibility given the asynchronous nature
92 // of the delete task.
93 base::File file(path, base::File::FLAG_OPEN | base::File::FLAG_READ |
94 base::File::FLAG_DELETE_ON_CLOSE);
95 }
96
97 // A task runner to use for testing.
98 base::TaskRunner* g_task_runner_for_testing = nullptr;
99
100 // Returns a task runner appropriate for running background tasks that perform
101 // file I/O.
CreateBackgroundTaskRunner()102 scoped_refptr<base::TaskRunner> CreateBackgroundTaskRunner() {
103 if (g_task_runner_for_testing)
104 return scoped_refptr<base::TaskRunner>(g_task_runner_for_testing);
105
106 return base::ThreadPool::CreateTaskRunner(
107 {base::MayBlock(), base::TaskPriority::BEST_EFFORT,
108 base::TaskShutdownBehavior::SKIP_ON_SHUTDOWN});
109 }
110
111 } // namespace
112
113 // This structure stores all the information about the sources being monitored
114 // and their current reporting state.
115 struct FileMetricsProvider::SourceInfo {
SourceInfometrics::FileMetricsProvider::SourceInfo116 SourceInfo(const Params& params)
117 : type(params.type),
118 association(params.association),
119 prefs_key(params.prefs_key),
120 filter(params.filter),
121 max_age(params.max_age),
122 max_dir_kib(params.max_dir_kib),
123 max_dir_files(params.max_dir_files) {
124 switch (type) {
125 case SOURCE_HISTOGRAMS_ACTIVE_FILE:
126 DCHECK(prefs_key.empty());
127 [[fallthrough]];
128 case SOURCE_HISTOGRAMS_ATOMIC_FILE:
129 path = params.path;
130 break;
131 case SOURCE_HISTOGRAMS_ATOMIC_DIR:
132 directory = params.path;
133 break;
134 }
135 }
136
137 SourceInfo(const SourceInfo&) = delete;
138 SourceInfo& operator=(const SourceInfo&) = delete;
139
~SourceInfometrics::FileMetricsProvider::SourceInfo140 ~SourceInfo() {}
141
142 struct FoundFile {
143 base::FilePath path;
144 base::FileEnumerator::FileInfo info;
145 };
146 using FoundFiles = base::flat_map<base::Time, FoundFile>;
147
148 // How to access this source (file/dir, atomic/active).
149 const SourceType type;
150
151 // With what run this source is associated.
152 const SourceAssociation association;
153
154 // Where on disk the directory is located. This will only be populated when
155 // a directory is being monitored.
156 base::FilePath directory;
157
158 // The files found in the above directory, ordered by last-modified.
159 std::unique_ptr<FoundFiles> found_files;
160
161 // Where on disk the file is located. If a directory is being monitored,
162 // this will be updated for whatever file is being read.
163 base::FilePath path;
164
165 // Name used inside prefs to persistent metadata.
166 std::string prefs_key;
167
168 // The filter callback for determining what to do with found files.
169 FilterCallback filter;
170
171 // The maximum allowed age of a file.
172 base::TimeDelta max_age;
173
174 // The maximum allowed bytes in a directory.
175 size_t max_dir_kib;
176
177 // The maximum allowed files in a directory.
178 size_t max_dir_files;
179
180 // The last-seen time of this source to detect change.
181 base::Time last_seen;
182
183 // Indicates if the data has been read out or not.
184 bool read_complete = false;
185
186 // Once a file has been recognized as needing to be read, it is mapped
187 // into memory and assigned to an |allocator| object.
188 std::unique_ptr<base::PersistentHistogramAllocator> allocator;
189 };
190
Params(const base::FilePath & path,SourceType type,SourceAssociation association,base::StringPiece prefs_key)191 FileMetricsProvider::Params::Params(const base::FilePath& path,
192 SourceType type,
193 SourceAssociation association,
194 base::StringPiece prefs_key)
195 : path(path), type(type), association(association), prefs_key(prefs_key) {}
196
~Params()197 FileMetricsProvider::Params::~Params() {}
198
FileMetricsProvider(PrefService * local_state)199 FileMetricsProvider::FileMetricsProvider(PrefService* local_state)
200 : task_runner_(CreateBackgroundTaskRunner()), pref_service_(local_state) {
201 base::StatisticsRecorder::RegisterHistogramProvider(
202 weak_factory_.GetWeakPtr());
203 }
204
~FileMetricsProvider()205 FileMetricsProvider::~FileMetricsProvider() {}
206
RegisterSource(const Params & params)207 void FileMetricsProvider::RegisterSource(const Params& params) {
208 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
209
210 // Ensure that kSourceOptions has been filled for this type.
211 DCHECK_GT(std::size(kSourceOptions), static_cast<size_t>(params.type));
212
213 std::unique_ptr<SourceInfo> source(new SourceInfo(params));
214
215 // |prefs_key| may be empty if the caller does not wish to persist the
216 // state across instances of the program.
217 if (pref_service_ && !params.prefs_key.empty()) {
218 source->last_seen = pref_service_->GetTime(
219 metrics::prefs::kMetricsLastSeenPrefix + source->prefs_key);
220 }
221
222 switch (params.association) {
223 case ASSOCIATE_CURRENT_RUN:
224 case ASSOCIATE_INTERNAL_PROFILE:
225 case ASSOCIATE_INTERNAL_PROFILE_SAMPLES_COUNTER:
226 sources_to_check_.push_back(std::move(source));
227 break;
228 case ASSOCIATE_PREVIOUS_RUN:
229 case ASSOCIATE_INTERNAL_PROFILE_OR_PREVIOUS_RUN:
230 DCHECK_EQ(SOURCE_HISTOGRAMS_ATOMIC_FILE, source->type);
231 sources_for_previous_run_.push_back(std::move(source));
232 break;
233 }
234 }
235
236 // static
RegisterSourcePrefs(PrefRegistrySimple * prefs,const base::StringPiece prefs_key)237 void FileMetricsProvider::RegisterSourcePrefs(
238 PrefRegistrySimple* prefs,
239 const base::StringPiece prefs_key) {
240 prefs->RegisterInt64Pref(
241 metrics::prefs::kMetricsLastSeenPrefix + std::string(prefs_key), 0);
242 }
243
244 // static
RegisterPrefs(PrefRegistrySimple * prefs)245 void FileMetricsProvider::RegisterPrefs(PrefRegistrySimple* prefs) {
246 prefs->RegisterListPref(metrics::prefs::kMetricsFileMetricsMetadata);
247 }
248
249 // static
SetTaskRunnerForTesting(const scoped_refptr<base::TaskRunner> & task_runner)250 void FileMetricsProvider::SetTaskRunnerForTesting(
251 const scoped_refptr<base::TaskRunner>& task_runner) {
252 DCHECK(!g_task_runner_for_testing || !task_runner);
253 g_task_runner_for_testing = task_runner.get();
254 }
255
256 // static
RecordAccessResult(AccessResult result)257 void FileMetricsProvider::RecordAccessResult(AccessResult result) {
258 UMA_HISTOGRAM_ENUMERATION("UMA.FileMetricsProvider.AccessResult", result,
259 ACCESS_RESULT_MAX);
260 }
261
262 // static
LocateNextFileInDirectory(SourceInfo * source)263 bool FileMetricsProvider::LocateNextFileInDirectory(SourceInfo* source) {
264 DCHECK_EQ(SOURCE_HISTOGRAMS_ATOMIC_DIR, source->type);
265 DCHECK(!source->directory.empty());
266
267 // Cumulative directory stats. These will remain zero if the directory isn't
268 // scanned but that's okay since any work they would cause to be done below
269 // would have been done during the first call where the directory was fully
270 // scanned.
271 size_t total_size_kib = 0; // Using KiB allows 4TiB even on 32-bit builds.
272 size_t file_count = 0;
273
274 base::Time now_time = base::Time::Now();
275 if (!source->found_files) {
276 source->found_files = std::make_unique<SourceInfo::FoundFiles>();
277 base::FileEnumerator file_iter(source->directory, /*recursive=*/false,
278 base::FileEnumerator::FILES);
279 SourceInfo::FoundFile found_file;
280
281 // Open the directory and find all the files, remembering the last-modified
282 // time of each.
283 for (found_file.path = file_iter.Next(); !found_file.path.empty();
284 found_file.path = file_iter.Next()) {
285 found_file.info = file_iter.GetInfo();
286
287 // Ignore directories.
288 if (found_file.info.IsDirectory())
289 continue;
290
291 // Ignore temporary files.
292 base::FilePath::CharType first_character =
293 found_file.path.BaseName().value().front();
294 if (first_character == FILE_PATH_LITERAL('.') ||
295 first_character == FILE_PATH_LITERAL('_')) {
296 continue;
297 }
298
299 // Ignore non-PMA (Persistent Memory Allocator) files.
300 if (found_file.path.Extension() !=
301 base::PersistentMemoryAllocator::kFileExtension) {
302 continue;
303 }
304
305 // Process real files.
306 total_size_kib += found_file.info.GetSize() >> 10;
307 base::Time modified = found_file.info.GetLastModifiedTime();
308 if (modified > source->last_seen) {
309 // This file hasn't been read. Remember it (unless from the future).
310 if (modified <= now_time)
311 source->found_files->emplace(modified, std::move(found_file));
312 ++file_count;
313 } else {
314 // This file has been read. Try to delete it. Ignore any errors because
315 // the file may be un-removeable by this process. It could, for example,
316 // have been created by a privileged process like setup.exe. Even if it
317 // is not removed, it will continue to be ignored bacuse of the older
318 // modification time.
319 base::DeleteFile(found_file.path);
320 }
321 }
322 }
323
324 // Filter files from the front until one is found for processing.
325 bool have_file = false;
326 while (!source->found_files->empty()) {
327 SourceInfo::FoundFile found =
328 std::move(source->found_files->begin()->second);
329 source->found_files->erase(source->found_files->begin());
330
331 bool too_many =
332 source->max_dir_files > 0 && file_count > source->max_dir_files;
333 bool too_big =
334 source->max_dir_kib > 0 && total_size_kib > source->max_dir_kib;
335 bool too_old =
336 source->max_age != base::TimeDelta() &&
337 now_time - found.info.GetLastModifiedTime() > source->max_age;
338 if (too_many || too_big || too_old) {
339 base::DeleteFile(found.path);
340 --file_count;
341 total_size_kib -= found.info.GetSize() >> 10;
342 RecordAccessResult(too_many ? ACCESS_RESULT_TOO_MANY_FILES
343 : too_big ? ACCESS_RESULT_TOO_MANY_BYTES
344 : ACCESS_RESULT_TOO_OLD);
345 continue;
346 }
347
348 AccessResult result = HandleFilterSource(source, found.path);
349 if (result == ACCESS_RESULT_SUCCESS) {
350 source->path = std::move(found.path);
351 have_file = true;
352 break;
353 }
354
355 // Record the result. Success will be recorded by the caller.
356 if (result != ACCESS_RESULT_THIS_PID)
357 RecordAccessResult(result);
358 }
359
360 return have_file;
361 }
362
363 // static
FinishedWithSource(SourceInfo * source,AccessResult result)364 void FileMetricsProvider::FinishedWithSource(SourceInfo* source,
365 AccessResult result) {
366 // Different source types require different post-processing.
367 switch (source->type) {
368 case SOURCE_HISTOGRAMS_ATOMIC_FILE:
369 case SOURCE_HISTOGRAMS_ATOMIC_DIR:
370 // Done with this file so delete the allocator and its owned file.
371 source->allocator.reset();
372 // Remove the file if has been recorded. This prevents them from
373 // accumulating or also being recorded by different instances of
374 // the browser.
375 if (result == ACCESS_RESULT_SUCCESS ||
376 result == ACCESS_RESULT_NOT_MODIFIED ||
377 result == ACCESS_RESULT_MEMORY_DELETED ||
378 result == ACCESS_RESULT_TOO_OLD) {
379 DeleteFileWhenPossible(source->path);
380 }
381 break;
382 case SOURCE_HISTOGRAMS_ACTIVE_FILE:
383 // Keep the allocator open so it doesn't have to be re-mapped each
384 // time. This also allows the contents to be merged on-demand.
385 break;
386 }
387 }
388
389 // static
CheckAndMergeMetricSourcesOnTaskRunner(SourceInfoList * sources)390 std::vector<size_t> FileMetricsProvider::CheckAndMergeMetricSourcesOnTaskRunner(
391 SourceInfoList* sources) {
392 // This method has all state information passed in |sources| and is intended
393 // to run on a worker thread rather than the UI thread.
394 std::vector<size_t> samples_counts;
395
396 for (std::unique_ptr<SourceInfo>& source : *sources) {
397 AccessResult result;
398 do {
399 result = CheckAndMapMetricSource(source.get());
400
401 // Some results are not reported in order to keep the dashboard clean.
402 if (result != ACCESS_RESULT_DOESNT_EXIST &&
403 result != ACCESS_RESULT_NOT_MODIFIED &&
404 result != ACCESS_RESULT_THIS_PID) {
405 RecordAccessResult(result);
406 }
407
408 // If there are no files (or no more files) in this source, stop now.
409 if (result == ACCESS_RESULT_DOESNT_EXIST)
410 break;
411
412 // Mapping was successful. Merge it.
413 if (result == ACCESS_RESULT_SUCCESS) {
414 // Metrics associated with internal profiles have to be fetched directly
415 // so just keep the mapping for use by the main thread.
416 if (source->association == ASSOCIATE_INTERNAL_PROFILE)
417 break;
418
419 if (source->association == ASSOCIATE_INTERNAL_PROFILE_SAMPLES_COUNTER) {
420 samples_counts.push_back(CollectFileMetadataFromSource(source.get()));
421 } else {
422 size_t histograms_count =
423 MergeHistogramDeltasFromSource(source.get());
424 if (!source->prefs_key.empty()) {
425 base::UmaHistogramCounts1000(
426 base::StringPrintf(
427 "UMA.FileMetricsProvider.%s.MergedHistogramsCount",
428 source->prefs_key.c_str()),
429 histograms_count);
430 }
431 }
432 DCHECK(source->read_complete);
433 }
434
435 // All done with this source.
436 FinishedWithSource(source.get(), result);
437
438 // If it's a directory, keep trying until a file is successfully opened.
439 // When there are no more files, ACCESS_RESULT_DOESNT_EXIST will be
440 // returned and the loop will exit above.
441 } while (result != ACCESS_RESULT_SUCCESS && !source->directory.empty());
442
443 // If the set of known files is empty, clear the object so the next run
444 // will do a fresh scan of the directory.
445 if (source->found_files && source->found_files->empty())
446 source->found_files.reset();
447 }
448
449 return samples_counts;
450 }
451
452 // This method has all state information passed in |source| and is intended
453 // to run on a worker thread rather than the UI thread.
454 // static
CheckAndMapMetricSource(SourceInfo * source)455 FileMetricsProvider::AccessResult FileMetricsProvider::CheckAndMapMetricSource(
456 SourceInfo* source) {
457 // If source was read, clean up after it.
458 if (source->read_complete)
459 FinishedWithSource(source, ACCESS_RESULT_SUCCESS);
460 source->read_complete = false;
461 DCHECK(!source->allocator);
462
463 // If the source is a directory, look for files within it.
464 if (!source->directory.empty() && !LocateNextFileInDirectory(source))
465 return ACCESS_RESULT_DOESNT_EXIST;
466
467 // Do basic validation on the file metadata.
468 base::File::Info info;
469 if (!base::GetFileInfo(source->path, &info))
470 return ACCESS_RESULT_DOESNT_EXIST;
471
472 if (info.is_directory || info.size == 0)
473 return ACCESS_RESULT_INVALID_FILE;
474
475 if (source->last_seen >= info.last_modified)
476 return ACCESS_RESULT_NOT_MODIFIED;
477 if (source->max_age != base::TimeDelta() &&
478 base::Time::Now() - info.last_modified > source->max_age) {
479 return ACCESS_RESULT_TOO_OLD;
480 }
481
482 // Non-directory files still need to be filtered.
483 if (source->directory.empty()) {
484 AccessResult result = HandleFilterSource(source, source->path);
485 if (result != ACCESS_RESULT_SUCCESS)
486 return result;
487 }
488
489 // A new file of metrics has been found.
490 base::File file(source->path, kSourceOptions[source->type].file_open_flags);
491 if (!file.IsValid())
492 return ACCESS_RESULT_NO_OPEN;
493
494 // Check that file is writable if that is expected. If a write is attempted
495 // on an unwritable memory-mapped file, a SIGBUS will cause a crash.
496 const bool read_only = kSourceOptions[source->type].is_read_only;
497 if (!read_only) {
498 constexpr int kTestSize = 16;
499 char header[kTestSize];
500 int amount = file.Read(0, header, kTestSize);
501 if (amount != kTestSize)
502 return ACCESS_RESULT_INVALID_CONTENTS;
503
504 char zeros[kTestSize] = {0};
505 file.Write(0, zeros, kTestSize);
506 file.Flush();
507
508 // A crash here would be unfortunate as the file would be left invalid
509 // and skipped/deleted by later attempts. This is unlikely, however, and
510 // the benefit of avoiding crashes from mapping as read/write a file that
511 // can't be written more than justifies the risk.
512
513 char check[kTestSize];
514 amount = file.Read(0, check, kTestSize);
515 if (amount != kTestSize)
516 return ACCESS_RESULT_INVALID_CONTENTS;
517 if (memcmp(check, zeros, kTestSize) != 0)
518 return ACCESS_RESULT_NOT_WRITABLE;
519
520 file.Write(0, header, kTestSize);
521 file.Flush();
522 amount = file.Read(0, check, kTestSize);
523 if (amount != kTestSize)
524 return ACCESS_RESULT_INVALID_CONTENTS;
525 if (memcmp(check, header, kTestSize) != 0)
526 return ACCESS_RESULT_NOT_WRITABLE;
527 }
528
529 std::unique_ptr<base::MemoryMappedFile> mapped(new base::MemoryMappedFile());
530 if (!mapped->Initialize(std::move(file),
531 kSourceOptions[source->type].memory_mapped_access)) {
532 return ACCESS_RESULT_SYSTEM_MAP_FAILURE;
533 }
534
535 // Ensure any problems below don't occur repeatedly.
536 source->last_seen = info.last_modified;
537
538 // Test the validity of the file contents.
539 if (!base::FilePersistentMemoryAllocator::IsFileAcceptable(*mapped,
540 read_only)) {
541 return ACCESS_RESULT_INVALID_CONTENTS;
542 }
543
544 // Map the file and validate it.
545 std::unique_ptr<base::FilePersistentMemoryAllocator> memory_allocator =
546 std::make_unique<base::FilePersistentMemoryAllocator>(
547 std::move(mapped), 0, 0, base::StringPiece(), read_only);
548 if (memory_allocator->GetMemoryState() ==
549 base::PersistentMemoryAllocator::MEMORY_DELETED) {
550 return ACCESS_RESULT_MEMORY_DELETED;
551 }
552 if (memory_allocator->IsCorrupt())
553 return ACCESS_RESULT_DATA_CORRUPTION;
554
555 // Cache the file data while running in a background thread so that there
556 // shouldn't be any I/O when the data is accessed from the main thread.
557 // Files with an internal profile, those from previous runs that include
558 // a full system profile and are fetched via ProvideIndependentMetrics(),
559 // are loaded on a background task and so there's no need to cache the
560 // data in advance.
561 if (source->association != ASSOCIATE_INTERNAL_PROFILE)
562 memory_allocator->Cache();
563
564 // Create an allocator for the mapped file. Ownership passes to the allocator.
565 source->allocator = std::make_unique<base::PersistentHistogramAllocator>(
566 std::move(memory_allocator));
567
568 // Check that an "independent" file has the necessary information present.
569 if (source->association == ASSOCIATE_INTERNAL_PROFILE &&
570 !PersistentSystemProfile::GetSystemProfile(
571 *source->allocator->memory_allocator(), nullptr)) {
572 return ACCESS_RESULT_NO_PROFILE;
573 }
574
575 return ACCESS_RESULT_SUCCESS;
576 }
577
578 // static
MergeHistogramDeltasFromSource(SourceInfo * source)579 size_t FileMetricsProvider::MergeHistogramDeltasFromSource(SourceInfo* source) {
580 DCHECK(source->allocator);
581 base::PersistentHistogramAllocator::Iterator histogram_iter(
582 source->allocator.get());
583
584 const bool read_only = kSourceOptions[source->type].is_read_only;
585 size_t histogram_count = 0;
586 while (true) {
587 std::unique_ptr<base::HistogramBase> histogram = histogram_iter.GetNext();
588 if (!histogram)
589 break;
590
591 if (read_only) {
592 source->allocator->MergeHistogramFinalDeltaToStatisticsRecorder(
593 histogram.get());
594 } else {
595 source->allocator->MergeHistogramDeltaToStatisticsRecorder(
596 histogram.get());
597 }
598 ++histogram_count;
599 }
600
601 source->read_complete = true;
602 DVLOG(1) << "Reported " << histogram_count << " histograms from "
603 << source->path.value();
604 return histogram_count;
605 }
606
607 // static
RecordHistogramSnapshotsFromSource(base::HistogramSnapshotManager * snapshot_manager,SourceInfo * source,base::HistogramBase::Flags required_flags)608 void FileMetricsProvider::RecordHistogramSnapshotsFromSource(
609 base::HistogramSnapshotManager* snapshot_manager,
610 SourceInfo* source,
611 base::HistogramBase::Flags required_flags) {
612 DCHECK_NE(SOURCE_HISTOGRAMS_ACTIVE_FILE, source->type);
613
614 base::PersistentHistogramAllocator::Iterator histogram_iter(
615 source->allocator.get());
616
617 int histogram_count = 0;
618 while (true) {
619 std::unique_ptr<base::HistogramBase> histogram = histogram_iter.GetNext();
620 if (!histogram)
621 break;
622 if (histogram->HasFlags(required_flags)) {
623 snapshot_manager->PrepareFinalDelta(histogram.get());
624 ++histogram_count;
625 }
626 }
627
628 source->read_complete = true;
629 DVLOG(1) << "Reported " << histogram_count << " histograms from "
630 << source->path.value();
631 }
632
HandleFilterSource(SourceInfo * source,const base::FilePath & path)633 FileMetricsProvider::AccessResult FileMetricsProvider::HandleFilterSource(
634 SourceInfo* source,
635 const base::FilePath& path) {
636 if (!source->filter)
637 return ACCESS_RESULT_SUCCESS;
638
639 // Alternatively, pass a Params object to the filter like what was originally
640 // used to configure the source.
641 // Params params(path, source->type, source->association, source->prefs_key);
642 FilterAction action = source->filter.Run(path);
643 switch (action) {
644 case FILTER_PROCESS_FILE:
645 // Process the file.
646 return ACCESS_RESULT_SUCCESS;
647
648 case FILTER_ACTIVE_THIS_PID:
649 // Even the file for the current process has to be touched or its stamp
650 // will be less than "last processed" and thus skipped on future runs,
651 // even those done by new instances of the browser if a pref key is
652 // provided so that the last-uploaded stamp is recorded.
653 case FILTER_TRY_LATER: {
654 // Touch the file with the current timestamp making it (presumably) the
655 // newest file in the directory.
656 base::Time now = base::Time::Now();
657 base::TouchFile(path, /*accessed=*/now, /*modified=*/now);
658 if (action == FILTER_ACTIVE_THIS_PID)
659 return ACCESS_RESULT_THIS_PID;
660 return ACCESS_RESULT_FILTER_TRY_LATER;
661 }
662
663 case FILTER_SKIP_FILE:
664 switch (source->type) {
665 case SOURCE_HISTOGRAMS_ATOMIC_FILE:
666 case SOURCE_HISTOGRAMS_ATOMIC_DIR:
667 // Only "atomic" files are deleted (best-effort).
668 DeleteFileWhenPossible(path);
669 break;
670 case SOURCE_HISTOGRAMS_ACTIVE_FILE:
671 // File will presumably get modified elsewhere and thus tried again.
672 break;
673 }
674 return ACCESS_RESULT_FILTER_SKIP_FILE;
675 }
676
677 // Code never gets here but some compilers don't realize that and so complain
678 // that "not all control paths return a value".
679 NOTREACHED();
680 return ACCESS_RESULT_SUCCESS;
681 }
682
683 /* static */
ProvideIndependentMetricsOnTaskRunner(SourceInfo * source,SystemProfileProto * system_profile_proto,base::HistogramSnapshotManager * snapshot_manager)684 bool FileMetricsProvider::ProvideIndependentMetricsOnTaskRunner(
685 SourceInfo* source,
686 SystemProfileProto* system_profile_proto,
687 base::HistogramSnapshotManager* snapshot_manager) {
688 if (PersistentSystemProfile::GetSystemProfile(
689 *source->allocator->memory_allocator(), system_profile_proto)) {
690 // Pass a custom RangesManager so that we do not register the BucketRanges
691 // with the global statistics recorder. Otherwise, it could add unnecessary
692 // contention, and a low amount of extra memory that will never be released.
693 source->allocator->SetRangesManager(new base::RangesManager());
694 system_profile_proto->mutable_stability()->set_from_previous_run(true);
695 RecordHistogramSnapshotsFromSource(
696 snapshot_manager, source,
697 /*required_flags=*/base::HistogramBase::kUmaTargetedHistogramFlag);
698 return true;
699 }
700
701 return false;
702 }
703
AppendToSamplesCountPref(std::vector<size_t> samples_counts)704 void FileMetricsProvider::AppendToSamplesCountPref(
705 std::vector<size_t> samples_counts) {
706 ScopedListPrefUpdate update(pref_service_,
707 metrics::prefs::kMetricsFileMetricsMetadata);
708 for (size_t samples_count : samples_counts) {
709 update->Append(static_cast<int>(samples_count));
710 }
711 }
712
713 // static
CollectFileMetadataFromSource(SourceInfo * source)714 size_t FileMetricsProvider::CollectFileMetadataFromSource(SourceInfo* source) {
715 base::HistogramBase::Count samples_count = 0;
716 base::PersistentHistogramAllocator::Iterator it{source->allocator.get()};
717 std::unique_ptr<base::HistogramBase> histogram;
718 while ((histogram = it.GetNext()) != nullptr) {
719 samples_count += histogram->SnapshotFinalDelta()->TotalCount();
720 }
721 source->read_complete = true;
722 return samples_count;
723 }
724
ScheduleSourcesCheck()725 void FileMetricsProvider::ScheduleSourcesCheck() {
726 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
727
728 if (sources_to_check_.empty())
729 return;
730
731 // Create an independent list of sources for checking. This will be Owned()
732 // by the reply call given to the task-runner, to be deleted when that call
733 // has returned. It is also passed Unretained() to the task itself, safe
734 // because that must complete before the reply runs.
735 SourceInfoList* check_list = new SourceInfoList();
736 std::swap(sources_to_check_, *check_list);
737 task_runner_->PostTaskAndReplyWithResult(
738 FROM_HERE,
739 base::BindOnce(
740 &FileMetricsProvider::CheckAndMergeMetricSourcesOnTaskRunner,
741 base::Unretained(check_list)),
742 base::BindOnce(&FileMetricsProvider::RecordSourcesChecked,
743 weak_factory_.GetWeakPtr(), base::Owned(check_list)));
744 }
745
RecordSourcesChecked(SourceInfoList * checked,std::vector<size_t> samples_counts)746 void FileMetricsProvider::RecordSourcesChecked(
747 SourceInfoList* checked,
748 std::vector<size_t> samples_counts) {
749 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
750
751 AppendToSamplesCountPref(std::move(samples_counts));
752
753 // Sources that still have an allocator at this point are read/write "active"
754 // files that may need their contents merged on-demand. If there is no
755 // allocator (not a read/write file) but a read was done on the task-runner,
756 // try again immediately to see if more is available (in a directory of
757 // files). Otherwise, remember the source for checking again at a later time.
758 bool did_read = false;
759 for (auto iter = checked->begin(); iter != checked->end();) {
760 auto temp = iter++;
761 SourceInfo* source = temp->get();
762 if (source->read_complete) {
763 RecordSourceAsRead(source);
764 did_read = true;
765 }
766 if (source->allocator) {
767 if (source->association == ASSOCIATE_INTERNAL_PROFILE) {
768 sources_with_profile_.splice(sources_with_profile_.end(), *checked,
769 temp);
770 } else {
771 sources_mapped_.splice(sources_mapped_.end(), *checked, temp);
772 }
773 } else {
774 sources_to_check_.splice(sources_to_check_.end(), *checked, temp);
775 }
776 }
777
778 // If a read was done, schedule another one immediately. In the case of a
779 // directory of files, this ensures that all entries get processed. It's
780 // done here instead of as a loop in CheckAndMergeMetricSourcesOnTaskRunner
781 // so that (a) it gives the disk a rest and (b) testing of individual reads
782 // is possible.
783 if (did_read)
784 ScheduleSourcesCheck();
785 }
786
DeleteFileAsync(const base::FilePath & path)787 void FileMetricsProvider::DeleteFileAsync(const base::FilePath& path) {
788 task_runner_->PostTask(FROM_HERE,
789 base::BindOnce(DeleteFileWhenPossible, path));
790 }
791
RecordSourceAsRead(SourceInfo * source)792 void FileMetricsProvider::RecordSourceAsRead(SourceInfo* source) {
793 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
794
795 // Persistently record the "last seen" timestamp of the source file to
796 // ensure that the file is never read again unless it is modified again.
797 if (pref_service_ && !source->prefs_key.empty()) {
798 pref_service_->SetTime(
799 metrics::prefs::kMetricsLastSeenPrefix + source->prefs_key,
800 source->last_seen);
801 }
802 }
803
OnDidCreateMetricsLog()804 void FileMetricsProvider::OnDidCreateMetricsLog() {
805 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
806
807 // Schedule a check to see if there are new metrics to load. If so, they will
808 // be reported during the next collection run after this one. The check is run
809 // off of a MayBlock() TaskRunner so as to not cause delays on the main UI
810 // thread (which is currently where metric collection is done).
811 ScheduleSourcesCheck();
812
813 // Clear any data for initial metrics since they're always reported
814 // before the first call to this method. It couldn't be released after
815 // being reported in RecordInitialHistogramSnapshots because the data
816 // will continue to be used by the caller after that method returns. Once
817 // here, though, all actions to be done on the data have been completed.
818 for (const std::unique_ptr<SourceInfo>& source : sources_for_previous_run_)
819 DeleteFileAsync(source->path);
820 sources_for_previous_run_.clear();
821 }
822
HasIndependentMetrics()823 bool FileMetricsProvider::HasIndependentMetrics() {
824 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
825 return !sources_with_profile_.empty() || SimulateIndependentMetrics();
826 }
827
ProvideIndependentMetrics(base::OnceCallback<void (bool)> done_callback,ChromeUserMetricsExtension * uma_proto,base::HistogramSnapshotManager * snapshot_manager)828 void FileMetricsProvider::ProvideIndependentMetrics(
829 base::OnceCallback<void(bool)> done_callback,
830 ChromeUserMetricsExtension* uma_proto,
831 base::HistogramSnapshotManager* snapshot_manager) {
832 SystemProfileProto* system_profile_proto =
833 uma_proto->mutable_system_profile();
834 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
835
836 if (sources_with_profile_.empty()) {
837 std::move(done_callback).Run(false);
838 return;
839 }
840
841 std::unique_ptr<SourceInfo> source =
842 std::move(*sources_with_profile_.begin());
843 sources_with_profile_.pop_front();
844 SourceInfo* source_ptr = source.get();
845 DCHECK(source->allocator);
846
847 // Do the actual work as a background task.
848 task_runner_->PostTaskAndReplyWithResult(
849 FROM_HERE,
850 base::BindOnce(
851 &FileMetricsProvider::ProvideIndependentMetricsOnTaskRunner,
852 source_ptr, system_profile_proto, snapshot_manager),
853 base::BindOnce(&FileMetricsProvider::ProvideIndependentMetricsCleanup,
854 weak_factory_.GetWeakPtr(), std::move(done_callback),
855 std::move(source)));
856 }
857
ProvideIndependentMetricsCleanup(base::OnceCallback<void (bool)> done_callback,std::unique_ptr<SourceInfo> source,bool success)858 void FileMetricsProvider::ProvideIndependentMetricsCleanup(
859 base::OnceCallback<void(bool)> done_callback,
860 std::unique_ptr<SourceInfo> source,
861 bool success) {
862 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
863
864 // Regardless of whether this source was successfully recorded, it is
865 // never read again.
866 source->read_complete = true;
867 RecordSourceAsRead(source.get());
868 sources_to_check_.push_back(std::move(source));
869 ScheduleSourcesCheck();
870
871 // Execute the chained callback.
872 // TODO(crbug/1052796): Remove the UMA timer code, which is currently used to
873 // determine if it is worth to finalize independent logs in the background
874 // by measuring the time it takes to execute the callback
875 // MetricsService::PrepareProviderMetricsLogDone().
876 base::TimeTicks start_time = base::TimeTicks::Now();
877 std::move(done_callback).Run(success);
878 if (success) {
879 // We don't use the SCOPED_UMA_HISTOGRAM_TIMER macro because we want to
880 // measure the time it takes to finalize an independent log, and that only
881 // happens when |success| is true.
882 base::UmaHistogramTimes(
883 "UMA.IndependentLog.FileMetricsProvider.FinalizeTime",
884 base::TimeTicks::Now() - start_time);
885 }
886 }
887
HasPreviousSessionData()888 bool FileMetricsProvider::HasPreviousSessionData() {
889 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
890
891 // Check all sources for previous run to see if they need to be read.
892 for (auto iter = sources_for_previous_run_.begin();
893 iter != sources_for_previous_run_.end();) {
894 auto temp = iter++;
895 SourceInfo* source = temp->get();
896
897 // This would normally be done on a background I/O thread but there
898 // hasn't been a chance to run any at the time this method is called.
899 // Do the check in-line.
900 AccessResult result = CheckAndMapMetricSource(source);
901 UMA_HISTOGRAM_ENUMERATION("UMA.FileMetricsProvider.InitialAccessResult",
902 result, ACCESS_RESULT_MAX);
903
904 // If it couldn't be accessed, remove it from the list. There is only ever
905 // one chance to record it so no point keeping it around for later. Also
906 // mark it as having been read since uploading it with a future browser
907 // run would associate it with the then-previous run which would no longer
908 // be the run from which it came.
909 if (result != ACCESS_RESULT_SUCCESS) {
910 DCHECK(!source->allocator);
911 RecordSourceAsRead(source);
912 DeleteFileAsync(source->path);
913 sources_for_previous_run_.erase(temp);
914 continue;
915 }
916
917 DCHECK(source->allocator);
918
919 // If the source should be associated with an existing internal profile,
920 // move it to |sources_with_profile_| for later upload.
921 if (source->association == ASSOCIATE_INTERNAL_PROFILE_OR_PREVIOUS_RUN) {
922 if (PersistentSystemProfile::HasSystemProfile(
923 *source->allocator->memory_allocator())) {
924 sources_with_profile_.splice(sources_with_profile_.end(),
925 sources_for_previous_run_, temp);
926 }
927 }
928 }
929
930 return !sources_for_previous_run_.empty();
931 }
932
RecordInitialHistogramSnapshots(base::HistogramSnapshotManager * snapshot_manager)933 void FileMetricsProvider::RecordInitialHistogramSnapshots(
934 base::HistogramSnapshotManager* snapshot_manager) {
935 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
936
937 for (const std::unique_ptr<SourceInfo>& source : sources_for_previous_run_) {
938 // The source needs to have an allocator attached to it in order to read
939 // histograms out of it.
940 DCHECK(!source->read_complete);
941 DCHECK(source->allocator);
942
943 // Dump all stability histograms contained within the source to the
944 // snapshot-manager.
945 RecordHistogramSnapshotsFromSource(
946 snapshot_manager, source.get(),
947 /*required_flags=*/base::HistogramBase::kUmaStabilityHistogramFlag);
948
949 // Update the last-seen time so it isn't read again unless it changes.
950 RecordSourceAsRead(source.get());
951 }
952 }
953
MergeHistogramDeltas()954 void FileMetricsProvider::MergeHistogramDeltas() {
955 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
956
957 for (std::unique_ptr<SourceInfo>& source : sources_mapped_) {
958 MergeHistogramDeltasFromSource(source.get());
959 }
960 }
961
SimulateIndependentMetrics()962 bool FileMetricsProvider::SimulateIndependentMetrics() {
963 if (!pref_service_->HasPrefPath(
964 metrics::prefs::kMetricsFileMetricsMetadata)) {
965 return false;
966 }
967
968 ScopedListPrefUpdate list_pref(pref_service_,
969 metrics::prefs::kMetricsFileMetricsMetadata);
970 base::Value::List& list_value = list_pref.Get();
971 if (list_value.empty())
972 return false;
973
974 size_t count = pref_service_->GetInteger(
975 metrics::prefs::kStabilityFileMetricsUnsentSamplesCount);
976 pref_service_->SetInteger(
977 metrics::prefs::kStabilityFileMetricsUnsentSamplesCount,
978 list_value[0].GetInt() + count);
979 pref_service_->SetInteger(
980 metrics::prefs::kStabilityFileMetricsUnsentFilesCount,
981 list_value.size() - 1);
982 list_value.erase(list_value.begin());
983
984 return true;
985 }
986
987 } // namespace metrics
988