1 // Copyright 2016 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifdef UNSAFE_BUFFERS_BUILD
6 // TODO(crbug.com/40285824): Remove this and convert code to safer constructs.
7 #pragma allow_unsafe_buffers
8 #endif
9
10 #include "components/metrics/file_metrics_provider.h"
11
12 #include <stddef.h>
13
14 #include <memory>
15 #include <string_view>
16 #include <vector>
17
18 #include "base/command_line.h"
19 #include "base/containers/flat_map.h"
20 #include "base/debug/crash_logging.h"
21 #include "base/feature_list.h"
22 #include "base/files/file.h"
23 #include "base/files/file_enumerator.h"
24 #include "base/files/file_util.h"
25 #include "base/files/memory_mapped_file.h"
26 #include "base/functional/bind.h"
27 #include "base/logging.h"
28 #include "base/metrics/histogram_base.h"
29 #include "base/metrics/histogram_functions.h"
30 #include "base/metrics/histogram_macros.h"
31 #include "base/metrics/persistent_histogram_allocator.h"
32 #include "base/metrics/persistent_memory_allocator.h"
33 #include "base/metrics/ranges_manager.h"
34 #include "base/strings/stringprintf.h"
35 #include "base/task/task_traits.h"
36 #include "base/task/thread_pool.h"
37 #include "base/time/time.h"
38 #include "components/metrics/metrics_features.h"
39 #include "components/metrics/metrics_log.h"
40 #include "components/metrics/metrics_pref_names.h"
41 #include "components/metrics/metrics_service.h"
42 #include "components/metrics/persistent_histograms.h"
43 #include "components/metrics/persistent_system_profile.h"
44 #include "components/prefs/pref_registry_simple.h"
45 #include "components/prefs/pref_service.h"
46 #include "components/prefs/scoped_user_pref_update.h"
47
48 namespace metrics {
49 namespace {
50
51 // These structures provide values used to define how files are opened and
52 // accessed. It obviates the need for multiple code-paths within several of
53 // the methods.
54 struct SourceOptions {
55 // The flags to be used to open a file on disk.
56 int file_open_flags;
57
58 // The access mode to be used when mapping a file into memory.
59 base::MemoryMappedFile::Access memory_mapped_access;
60
61 // Indicates if the file is to be accessed read-only.
62 bool is_read_only;
63 };
64
65 // Opening a file typically requires at least these flags.
66 constexpr int STD_OPEN = base::File::FLAG_OPEN | base::File::FLAG_READ;
67
68 constexpr SourceOptions kSourceOptions[] = {
69 // SOURCE_HISTOGRAMS_ATOMIC_FILE
70 {
71 // Ensure that no other process reads this at the same time.
72 STD_OPEN | base::File::FLAG_WIN_EXCLUSIVE_READ,
73 base::MemoryMappedFile::READ_ONLY,
74 true,
75 },
76 // SOURCE_HISTOGRAMS_ATOMIC_DIR
77 {
78 // Ensure that no other process reads this at the same time.
79 STD_OPEN | base::File::FLAG_WIN_EXCLUSIVE_READ,
80 base::MemoryMappedFile::READ_ONLY,
81 true,
82 },
83 // SOURCE_HISTOGRAMS_ACTIVE_FILE
84 {
85 // Allow writing to the file. This is needed so we can keep track of
86 // deltas that have been uploaded (by modifying the file), while the
87 // file may still be open by an external process (e.g. Crashpad).
88 STD_OPEN | base::File::FLAG_WRITE,
89 base::MemoryMappedFile::READ_WRITE,
90 false,
91 },
92 };
93
DeleteFileWhenPossible(const base::FilePath & path)94 void DeleteFileWhenPossible(const base::FilePath& path) {
95 // Open (with delete) and then immediately close the file by going out of
96 // scope. This is the only cross-platform safe way to delete a file that may
97 // be open elsewhere, a distinct possibility given the asynchronous nature
98 // of the delete task.
99 base::File file(path, base::File::FLAG_OPEN | base::File::FLAG_READ |
100 base::File::FLAG_DELETE_ON_CLOSE);
101 }
102
103 } // namespace
104
105 // This structure stores all the information about the sources being monitored
106 // and their current reporting state.
107 struct FileMetricsProvider::SourceInfo {
SourceInfometrics::FileMetricsProvider::SourceInfo108 explicit SourceInfo(const Params& params)
109 : type(params.type),
110 association(params.association),
111 prefs_key(params.prefs_key),
112 filter(params.filter),
113 max_age(params.max_age),
114 max_dir_kib(params.max_dir_kib),
115 max_dir_files(params.max_dir_files) {
116 switch (type) {
117 case SOURCE_HISTOGRAMS_ACTIVE_FILE:
118 DCHECK(prefs_key.empty());
119 [[fallthrough]];
120 case SOURCE_HISTOGRAMS_ATOMIC_FILE:
121 path = params.path;
122 break;
123 case SOURCE_HISTOGRAMS_ATOMIC_DIR:
124 directory = params.path;
125 break;
126 }
127 }
128
129 SourceInfo(const SourceInfo&) = delete;
130 SourceInfo& operator=(const SourceInfo&) = delete;
131
132 ~SourceInfo() = default;
133
134 struct FoundFile {
135 base::FilePath path;
136 base::FileEnumerator::FileInfo info;
137 };
138 using FoundFiles = base::flat_map<base::Time, FoundFile>;
139
140 // How to access this source (file/dir, atomic/active).
141 const SourceType type;
142
143 // With what run this source is associated.
144 const SourceAssociation association;
145
146 // Where on disk the directory is located. This will only be populated when
147 // a directory is being monitored.
148 base::FilePath directory;
149
150 // The files found in the above directory, ordered by last-modified.
151 std::unique_ptr<FoundFiles> found_files;
152
153 // Where on disk the file is located. If a directory is being monitored,
154 // this will be updated for whatever file is being read.
155 base::FilePath path;
156
157 // Name used inside prefs to persistent metadata.
158 std::string prefs_key;
159
160 // The filter callback for determining what to do with found files.
161 FilterCallback filter;
162
163 // The maximum allowed age of a file.
164 base::TimeDelta max_age;
165
166 // The maximum allowed bytes in a directory.
167 size_t max_dir_kib;
168
169 // The maximum allowed files in a directory.
170 size_t max_dir_files;
171
172 // The last-seen time of this source to detect change.
173 base::Time last_seen;
174
175 // Indicates if the data has been read out or not.
176 bool read_complete = false;
177
178 // Once a file has been recognized as needing to be read, it is mapped
179 // into memory and assigned to an |allocator| object.
180 std::unique_ptr<base::PersistentHistogramAllocator> allocator;
181 };
182
Params(const base::FilePath & path,SourceType type,SourceAssociation association,std::string_view prefs_key)183 FileMetricsProvider::Params::Params(const base::FilePath& path,
184 SourceType type,
185 SourceAssociation association,
186 std::string_view prefs_key)
187 : path(path), type(type), association(association), prefs_key(prefs_key) {}
188
189 FileMetricsProvider::Params::~Params() = default;
190
FileMetricsProvider(PrefService * local_state)191 FileMetricsProvider::FileMetricsProvider(PrefService* local_state)
192 : pref_service_(local_state) {
193 base::StatisticsRecorder::RegisterHistogramProvider(
194 weak_factory_.GetWeakPtr());
195 }
196
197 FileMetricsProvider::~FileMetricsProvider() = default;
198
RegisterSource(const Params & params,bool metrics_reporting_enabled)199 void FileMetricsProvider::RegisterSource(const Params& params,
200 bool metrics_reporting_enabled) {
201 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
202
203 // Ensure that kSourceOptions has been filled for this type.
204 DCHECK_GT(std::size(kSourceOptions), static_cast<size_t>(params.type));
205
206 if (!metrics_reporting_enabled) {
207 // When metrics reporting is not enabled, existing files should be deleted,
208 // since they won't be getting deleted as part of the upload flow.
209 if (params.type == SOURCE_HISTOGRAMS_ATOMIC_DIR ||
210 params.type == SOURCE_HISTOGRAMS_ATOMIC_FILE) {
211 base::ThreadPool::PostTask(
212 FROM_HERE,
213 {base::MayBlock(), base::TaskPriority::BEST_EFFORT,
214 base::TaskShutdownBehavior::CONTINUE_ON_SHUTDOWN},
215 params.type == SOURCE_HISTOGRAMS_ATOMIC_DIR
216 ? base::GetDeletePathRecursivelyCallback(params.path)
217 : base::GetDeleteFileCallback(params.path));
218 }
219 return;
220 }
221
222 std::unique_ptr<SourceInfo> source(new SourceInfo(params));
223
224 // |prefs_key| may be empty if the caller does not wish to persist the
225 // state across instances of the program.
226 if (pref_service_ && !params.prefs_key.empty()) {
227 source->last_seen = pref_service_->GetTime(
228 metrics::prefs::kMetricsLastSeenPrefix + source->prefs_key);
229 }
230
231 switch (params.association) {
232 case ASSOCIATE_CURRENT_RUN:
233 case ASSOCIATE_INTERNAL_PROFILE:
234 case ASSOCIATE_INTERNAL_PROFILE_SAMPLES_COUNTER:
235 sources_to_check_.push_back(std::move(source));
236 break;
237 case ASSOCIATE_PREVIOUS_RUN:
238 case ASSOCIATE_INTERNAL_PROFILE_OR_PREVIOUS_RUN:
239 DCHECK_EQ(SOURCE_HISTOGRAMS_ATOMIC_FILE, source->type);
240 sources_for_previous_run_.push_back(std::move(source));
241 break;
242 }
243 }
244
245 // static
RegisterSourcePrefs(PrefRegistrySimple * prefs,std::string_view prefs_key)246 void FileMetricsProvider::RegisterSourcePrefs(PrefRegistrySimple* prefs,
247 std::string_view prefs_key) {
248 prefs->RegisterInt64Pref(
249 metrics::prefs::kMetricsLastSeenPrefix + std::string(prefs_key), 0);
250 }
251
252 // static
RegisterPrefs(PrefRegistrySimple * prefs)253 void FileMetricsProvider::RegisterPrefs(PrefRegistrySimple* prefs) {
254 prefs->RegisterListPref(metrics::prefs::kMetricsFileMetricsMetadata);
255 }
256
257 // static
RecordAccessResult(AccessResult result)258 void FileMetricsProvider::RecordAccessResult(AccessResult result) {
259 UMA_HISTOGRAM_ENUMERATION("UMA.FileMetricsProvider.AccessResult", result,
260 ACCESS_RESULT_MAX);
261 }
262
263 // static
LocateNextFileInDirectory(SourceInfo * source)264 bool FileMetricsProvider::LocateNextFileInDirectory(SourceInfo* source) {
265 DCHECK_EQ(SOURCE_HISTOGRAMS_ATOMIC_DIR, source->type);
266 DCHECK(!source->directory.empty());
267
268 // Cumulative directory stats. These will remain zero if the directory isn't
269 // scanned but that's okay since any work they would cause to be done below
270 // would have been done during the first call where the directory was fully
271 // scanned.
272 size_t total_size_kib = 0; // Using KiB allows 4TiB even on 32-bit builds.
273 size_t file_count = 0;
274
275 base::Time now_time = base::Time::Now();
276 if (!source->found_files) {
277 source->found_files = std::make_unique<SourceInfo::FoundFiles>();
278 base::FileEnumerator file_iter(source->directory, /*recursive=*/false,
279 base::FileEnumerator::FILES);
280 SourceInfo::FoundFile found_file;
281
282 // Open the directory and find all the files, remembering the last-modified
283 // time of each.
284 for (found_file.path = file_iter.Next(); !found_file.path.empty();
285 found_file.path = file_iter.Next()) {
286 found_file.info = file_iter.GetInfo();
287
288 // Ignore directories.
289 if (found_file.info.IsDirectory()) {
290 continue;
291 }
292
293 // Ignore temporary files.
294 base::FilePath::CharType first_character =
295 found_file.path.BaseName().value().front();
296 if (first_character == FILE_PATH_LITERAL('.') ||
297 first_character == FILE_PATH_LITERAL('_')) {
298 continue;
299 }
300
301 // Ignore non-PMA (Persistent Memory Allocator) files.
302 if (found_file.path.Extension() !=
303 base::PersistentMemoryAllocator::kFileExtension) {
304 continue;
305 }
306
307 // Process real files.
308 total_size_kib += found_file.info.GetSize() >> 10;
309 base::Time modified = found_file.info.GetLastModifiedTime();
310 if (modified > source->last_seen) {
311 source->found_files->emplace(modified, std::move(found_file));
312 ++file_count;
313 } else {
314 // This file has been read. Try to delete it. Ignore any errors because
315 // the file may be un-removeable by this process. It could, for example,
316 // have been created by a privileged process like setup.exe. Even if it
317 // is not removed, it will continue to be ignored bacuse of the older
318 // modification time.
319 base::DeleteFile(found_file.path);
320 }
321 }
322 }
323
324 // Filter files from the front until one is found for processing.
325 bool have_file = false;
326 while (!source->found_files->empty()) {
327 SourceInfo::FoundFile found =
328 std::move(source->found_files->begin()->second);
329 source->found_files->erase(source->found_files->begin());
330
331 bool too_many =
332 source->max_dir_files > 0 && file_count > source->max_dir_files;
333 bool too_big =
334 source->max_dir_kib > 0 && total_size_kib > source->max_dir_kib;
335 bool too_old =
336 source->max_age != base::TimeDelta() &&
337 now_time - found.info.GetLastModifiedTime() > source->max_age;
338 if (too_many || too_big || too_old) {
339 base::DeleteFile(found.path);
340 --file_count;
341 total_size_kib -= found.info.GetSize() >> 10;
342 RecordAccessResult(too_many ? ACCESS_RESULT_TOO_MANY_FILES
343 : too_big ? ACCESS_RESULT_TOO_MANY_BYTES
344 : ACCESS_RESULT_TOO_OLD);
345 continue;
346 }
347
348 AccessResult result = HandleFilterSource(source, found.path);
349 if (result == ACCESS_RESULT_SUCCESS) {
350 source->path = std::move(found.path);
351 have_file = true;
352 break;
353 }
354
355 // Record the result. Success will be recorded by the caller.
356 if (result != ACCESS_RESULT_THIS_PID) {
357 RecordAccessResult(result);
358 }
359 }
360
361 return have_file;
362 }
363
364 // static
FinishedWithSource(SourceInfo * source,AccessResult result)365 void FileMetricsProvider::FinishedWithSource(SourceInfo* source,
366 AccessResult result) {
367 // Different source types require different post-processing.
368 switch (source->type) {
369 case SOURCE_HISTOGRAMS_ATOMIC_FILE:
370 case SOURCE_HISTOGRAMS_ATOMIC_DIR:
371 // Done with this file so delete the allocator and its owned file.
372 source->allocator.reset();
373 // Remove the file if has been recorded. This prevents them from
374 // accumulating or also being recorded by different instances of
375 // the browser.
376 if (result == ACCESS_RESULT_SUCCESS ||
377 result == ACCESS_RESULT_NOT_MODIFIED ||
378 result == ACCESS_RESULT_MEMORY_DELETED ||
379 result == ACCESS_RESULT_TOO_OLD) {
380 DeleteFileWhenPossible(source->path);
381 }
382 break;
383 case SOURCE_HISTOGRAMS_ACTIVE_FILE:
384 // Keep the allocator open so it doesn't have to be re-mapped each
385 // time. This also allows the contents to be merged on-demand.
386 break;
387 }
388 }
389
390 // static
CheckAndMergeMetricSourcesOnTaskRunner(SourceInfoList * sources)391 std::vector<size_t> FileMetricsProvider::CheckAndMergeMetricSourcesOnTaskRunner(
392 SourceInfoList* sources) {
393 // This method has all state information passed in |sources| and is intended
394 // to run on a worker thread rather than the UI thread.
395 std::vector<size_t> samples_counts;
396
397 for (std::unique_ptr<SourceInfo>& source : *sources) {
398 AccessResult result;
399 do {
400 result = CheckAndMapMetricSource(source.get());
401
402 // Some results are not reported in order to keep the dashboard clean.
403 if (result != ACCESS_RESULT_DOESNT_EXIST &&
404 result != ACCESS_RESULT_NOT_MODIFIED &&
405 result != ACCESS_RESULT_THIS_PID) {
406 RecordAccessResult(result);
407 }
408
409 // If there are no files (or no more files) in this source, stop now.
410 if (result == ACCESS_RESULT_DOESNT_EXIST)
411 break;
412
413 // Mapping was successful. Merge it.
414 if (result == ACCESS_RESULT_SUCCESS) {
415 // Metrics associated with internal profiles have to be fetched directly
416 // so just keep the mapping for use by the main thread.
417 if (source->association == ASSOCIATE_INTERNAL_PROFILE)
418 break;
419
420 if (source->association == ASSOCIATE_INTERNAL_PROFILE_SAMPLES_COUNTER) {
421 samples_counts.push_back(CollectFileMetadataFromSource(source.get()));
422 } else {
423 size_t histograms_count =
424 MergeHistogramDeltasFromSource(source.get());
425 if (!source->prefs_key.empty()) {
426 base::UmaHistogramCounts1000(
427 base::StringPrintf(
428 "UMA.FileMetricsProvider.%s.MergedHistogramsCount",
429 source->prefs_key.c_str()),
430 histograms_count);
431 }
432 }
433 DCHECK(source->read_complete);
434 }
435
436 // All done with this source.
437 FinishedWithSource(source.get(), result);
438
439 // If it's a directory, keep trying until a file is successfully opened.
440 // When there are no more files, ACCESS_RESULT_DOESNT_EXIST will be
441 // returned and the loop will exit above.
442 } while (result != ACCESS_RESULT_SUCCESS && !source->directory.empty());
443
444 // If the set of known files is empty, clear the object so the next run
445 // will do a fresh scan of the directory.
446 if (source->found_files && source->found_files->empty())
447 source->found_files.reset();
448 }
449
450 return samples_counts;
451 }
452
453 // This method has all state information passed in |source| and is intended
454 // to run on a worker thread rather than the UI thread.
455 // static
CheckAndMapMetricSource(SourceInfo * source)456 FileMetricsProvider::AccessResult FileMetricsProvider::CheckAndMapMetricSource(
457 SourceInfo* source) {
458 // If source was read, clean up after it.
459 if (source->read_complete)
460 FinishedWithSource(source, ACCESS_RESULT_SUCCESS);
461 source->read_complete = false;
462 DCHECK(!source->allocator);
463
464 // If the source is a directory, look for files within it.
465 if (!source->directory.empty() && !LocateNextFileInDirectory(source))
466 return ACCESS_RESULT_DOESNT_EXIST;
467
468 // Do basic validation on the file metadata.
469 base::File::Info info;
470 if (!base::GetFileInfo(source->path, &info))
471 return ACCESS_RESULT_DOESNT_EXIST;
472
473 if (info.is_directory || info.size == 0)
474 return ACCESS_RESULT_INVALID_FILE;
475
476 if (source->last_seen >= info.last_modified)
477 return ACCESS_RESULT_NOT_MODIFIED;
478 if (source->max_age != base::TimeDelta() &&
479 base::Time::Now() - info.last_modified > source->max_age) {
480 return ACCESS_RESULT_TOO_OLD;
481 }
482
483 // Non-directory files still need to be filtered.
484 if (source->directory.empty()) {
485 AccessResult result = HandleFilterSource(source, source->path);
486 if (result != ACCESS_RESULT_SUCCESS)
487 return result;
488 }
489
490 // A new file of metrics has been found.
491 base::File file(source->path, kSourceOptions[source->type].file_open_flags);
492 if (!file.IsValid())
493 return ACCESS_RESULT_NO_OPEN;
494
495 // Check that file is writable if that is expected. If a write is attempted
496 // on an unwritable memory-mapped file, a SIGBUS will cause a crash.
497 const bool read_only = kSourceOptions[source->type].is_read_only;
498 if (!read_only) {
499 constexpr int kTestSize = 16;
500 char header[kTestSize];
501 int amount = file.Read(0, header, kTestSize);
502 if (amount != kTestSize)
503 return ACCESS_RESULT_INVALID_CONTENTS;
504
505 char zeros[kTestSize] = {};
506 file.Write(0, zeros, kTestSize);
507 file.Flush();
508
509 // A crash here would be unfortunate as the file would be left invalid
510 // and skipped/deleted by later attempts. This is unlikely, however, and
511 // the benefit of avoiding crashes from mapping as read/write a file that
512 // can't be written more than justifies the risk.
513
514 char check[kTestSize];
515 amount = file.Read(0, check, kTestSize);
516 if (amount != kTestSize)
517 return ACCESS_RESULT_INVALID_CONTENTS;
518 if (memcmp(check, zeros, kTestSize) != 0)
519 return ACCESS_RESULT_NOT_WRITABLE;
520
521 file.Write(0, header, kTestSize);
522 file.Flush();
523 amount = file.Read(0, check, kTestSize);
524 if (amount != kTestSize)
525 return ACCESS_RESULT_INVALID_CONTENTS;
526 if (memcmp(check, header, kTestSize) != 0)
527 return ACCESS_RESULT_NOT_WRITABLE;
528 }
529
530 std::unique_ptr<base::MemoryMappedFile> mapped(new base::MemoryMappedFile());
531 if (!mapped->Initialize(std::move(file),
532 kSourceOptions[source->type].memory_mapped_access)) {
533 return ACCESS_RESULT_SYSTEM_MAP_FAILURE;
534 }
535
536 // Ensure any problems below don't occur repeatedly.
537 source->last_seen = info.last_modified;
538
539 // Test the validity of the file contents.
540 if (!base::FilePersistentMemoryAllocator::IsFileAcceptable(*mapped,
541 read_only)) {
542 return ACCESS_RESULT_INVALID_CONTENTS;
543 }
544
545 // Map the file and validate it.
546 std::unique_ptr<base::FilePersistentMemoryAllocator> memory_allocator =
547 std::make_unique<base::FilePersistentMemoryAllocator>(
548 std::move(mapped), 0, 0, std::string_view(),
549 read_only ? base::FilePersistentMemoryAllocator::kReadOnly
550 : base::FilePersistentMemoryAllocator::kReadWriteExisting);
551 if (memory_allocator->GetMemoryState() ==
552 base::PersistentMemoryAllocator::MEMORY_DELETED) {
553 return ACCESS_RESULT_MEMORY_DELETED;
554 }
555 if (memory_allocator->IsCorrupt())
556 return ACCESS_RESULT_DATA_CORRUPTION;
557
558 // Cache the file data while running in a background thread so that there
559 // shouldn't be any I/O when the data is accessed from the main thread.
560 // Files with an internal profile, those from previous runs that include
561 // a full system profile and are fetched via ProvideIndependentMetrics(),
562 // are loaded on a background task and so there's no need to cache the
563 // data in advance.
564 if (source->association != ASSOCIATE_INTERNAL_PROFILE)
565 memory_allocator->Cache();
566
567 // Create an allocator for the mapped file. Ownership passes to the allocator.
568 source->allocator = std::make_unique<base::PersistentHistogramAllocator>(
569 std::move(memory_allocator));
570 // Pass a custom RangesManager so that we do not register the BucketRanges
571 // with the global StatisticsRecorder when creating histogram objects using
572 // the allocator's underlying data. This avoids unnecessary contention on the
573 // global StatisticsRecorder lock.
574 // Note: Since RangesManager is not thread safe, this means that |allocator|
575 // must be iterated over one thread at a time (i.e., not concurrently). This
576 // is the case.
577 source->allocator->SetRangesManager(new base::RangesManager());
578
579 // Check that an "independent" file has the necessary information present.
580 if (source->association == ASSOCIATE_INTERNAL_PROFILE &&
581 !PersistentSystemProfile::GetSystemProfile(
582 *source->allocator->memory_allocator(), nullptr)) {
583 return ACCESS_RESULT_NO_PROFILE;
584 }
585
586 return ACCESS_RESULT_SUCCESS;
587 }
588
589 // static
MergeHistogramDeltasFromSource(SourceInfo * source)590 size_t FileMetricsProvider::MergeHistogramDeltasFromSource(SourceInfo* source) {
591 DCHECK(source->allocator);
592 base::PersistentHistogramAllocator::Iterator histogram_iter(
593 source->allocator.get());
594
595 const bool read_only = kSourceOptions[source->type].is_read_only;
596 size_t histogram_count = 0;
597 while (true) {
598 std::unique_ptr<base::HistogramBase> histogram = histogram_iter.GetNext();
599 if (!histogram)
600 break;
601
602 if (read_only) {
603 source->allocator->MergeHistogramFinalDeltaToStatisticsRecorder(
604 histogram.get());
605 } else {
606 source->allocator->MergeHistogramDeltaToStatisticsRecorder(
607 histogram.get());
608 }
609 ++histogram_count;
610 }
611
612 source->read_complete = true;
613 DVLOG(1) << "Reported " << histogram_count << " histograms from "
614 << source->path.value();
615 return histogram_count;
616 }
617
618 // static
RecordHistogramSnapshotsFromSource(base::HistogramSnapshotManager * snapshot_manager,SourceInfo * source,base::HistogramBase::Flags required_flags)619 void FileMetricsProvider::RecordHistogramSnapshotsFromSource(
620 base::HistogramSnapshotManager* snapshot_manager,
621 SourceInfo* source,
622 base::HistogramBase::Flags required_flags) {
623 DCHECK_NE(SOURCE_HISTOGRAMS_ACTIVE_FILE, source->type);
624
625 base::PersistentHistogramAllocator::Iterator histogram_iter(
626 source->allocator.get());
627
628 int histogram_count = 0;
629 while (true) {
630 std::unique_ptr<base::HistogramBase> histogram = histogram_iter.GetNext();
631 if (!histogram)
632 break;
633 if (histogram->HasFlags(required_flags)) {
634 snapshot_manager->PrepareFinalDelta(histogram.get());
635 ++histogram_count;
636 }
637 }
638
639 source->read_complete = true;
640 DVLOG(1) << "Reported " << histogram_count << " histograms from "
641 << source->path.value();
642 }
643
HandleFilterSource(SourceInfo * source,const base::FilePath & path)644 FileMetricsProvider::AccessResult FileMetricsProvider::HandleFilterSource(
645 SourceInfo* source,
646 const base::FilePath& path) {
647 if (!source->filter)
648 return ACCESS_RESULT_SUCCESS;
649
650 // Alternatively, pass a Params object to the filter like what was originally
651 // used to configure the source.
652 // Params params(path, source->type, source->association, source->prefs_key);
653 FilterAction action = source->filter.Run(path);
654 switch (action) {
655 case FILTER_PROCESS_FILE:
656 // Process the file.
657 return ACCESS_RESULT_SUCCESS;
658
659 case FILTER_ACTIVE_THIS_PID:
660 // Even the file for the current process has to be touched or its stamp
661 // will be less than "last processed" and thus skipped on future runs,
662 // even those done by new instances of the browser if a pref key is
663 // provided so that the last-uploaded stamp is recorded.
664 case FILTER_TRY_LATER: {
665 // Touch the file with the current timestamp making it (presumably) the
666 // newest file in the directory.
667 base::Time now = base::Time::Now();
668 base::TouchFile(path, /*accessed=*/now, /*modified=*/now);
669 if (action == FILTER_ACTIVE_THIS_PID)
670 return ACCESS_RESULT_THIS_PID;
671 return ACCESS_RESULT_FILTER_TRY_LATER;
672 }
673
674 case FILTER_SKIP_FILE:
675 switch (source->type) {
676 case SOURCE_HISTOGRAMS_ATOMIC_FILE:
677 case SOURCE_HISTOGRAMS_ATOMIC_DIR:
678 // Only "atomic" files are deleted (best-effort).
679 DeleteFileWhenPossible(path);
680 break;
681 case SOURCE_HISTOGRAMS_ACTIVE_FILE:
682 // File will presumably get modified elsewhere and thus tried again.
683 break;
684 }
685 return ACCESS_RESULT_FILTER_SKIP_FILE;
686 }
687
688 NOTREACHED();
689 }
690
691 /* static */
ProvideIndependentMetricsOnTaskRunner(SourceInfo * source,ChromeUserMetricsExtension * uma_proto,base::HistogramSnapshotManager * snapshot_manager,base::OnceClosure serialize_log_callback)692 bool FileMetricsProvider::ProvideIndependentMetricsOnTaskRunner(
693 SourceInfo* source,
694 ChromeUserMetricsExtension* uma_proto,
695 base::HistogramSnapshotManager* snapshot_manager,
696 base::OnceClosure serialize_log_callback) {
697 // Include various crash keys about the file/allocator being read so that if
698 // there is ever a crash report being dumped while reading its contents, we
699 // have some info about its state.
700 // TODO(crbug.com/40064026): Clean this up.
701
702 // Useful to know the metadata version of the source (e.g. to know if some
703 // fields like memory_state below are up to date).
704 SCOPED_CRASH_KEY_NUMBER("PMA", "version",
705 source->allocator->memory_allocator()->version());
706 // Useful to know whether the source comes from a crashed session.
707 SCOPED_CRASH_KEY_NUMBER(
708 "PMA", "memory_state",
709 source->allocator->memory_allocator()->GetMemoryState());
710 // Useful to know the freeptr as it can help determine if the source comes
711 // from a session that crashed due to failing to allocate an object across
712 // different pages.
713 SCOPED_CRASH_KEY_NUMBER("PMA", "freeptr",
714 source->allocator->memory_allocator()->freeptr());
715 SCOPED_CRASH_KEY_BOOL("PMA", "full",
716 source->allocator->memory_allocator()->IsFull());
717 SCOPED_CRASH_KEY_BOOL("PMA", "corrupt",
718 source->allocator->memory_allocator()->IsCorrupt());
719
720 SystemProfileProto* system_profile_proto =
721 uma_proto->mutable_system_profile();
722
723 if (PersistentSystemProfile::GetSystemProfile(
724 *source->allocator->memory_allocator(), system_profile_proto)) {
725 system_profile_proto->mutable_stability()->set_from_previous_run(true);
726 RecordHistogramSnapshotsFromSource(
727 snapshot_manager, source,
728 /*required_flags=*/base::HistogramBase::kUmaTargetedHistogramFlag);
729
730 // NOTE: If you are adding anything here, consider also changing
731 // MetricsStateMetricsProvider::ProvidePreviousSessionData().
732
733 // Use the client UUID stored in the system profile (if there is one) as the
734 // independent log's client ID. Usually, this has no effect, but there are
735 // scenarios where the log may have come from a session that had a different
736 // client ID than the one currently in use (e.g., client ID was reset due to
737 // being detected as a cloned install), so make sure to associate it with
738 // the proper one.
739 const std::string& client_uuid = system_profile_proto->client_uuid();
740 if (!client_uuid.empty()) {
741 uma_proto->set_client_id(MetricsLog::Hash(client_uuid));
742 }
743
744 // Serialize the log while we are still in the background, instead of on the
745 // callback that runs on the main thread.
746 std::move(serialize_log_callback).Run();
747
748 return true;
749 }
750
751 return false;
752 }
753
AppendToSamplesCountPref(std::vector<size_t> samples_counts)754 void FileMetricsProvider::AppendToSamplesCountPref(
755 std::vector<size_t> samples_counts) {
756 ScopedListPrefUpdate update(pref_service_,
757 metrics::prefs::kMetricsFileMetricsMetadata);
758 for (size_t samples_count : samples_counts) {
759 update->Append(static_cast<int>(samples_count));
760 }
761 }
762
763 // static
CollectFileMetadataFromSource(SourceInfo * source)764 size_t FileMetricsProvider::CollectFileMetadataFromSource(SourceInfo* source) {
765 base::HistogramBase::Count samples_count = 0;
766 base::PersistentHistogramAllocator::Iterator it{source->allocator.get()};
767 std::unique_ptr<base::HistogramBase> histogram;
768 while ((histogram = it.GetNext()) != nullptr) {
769 samples_count += histogram->SnapshotFinalDelta()->TotalCount();
770 }
771 source->read_complete = true;
772 return samples_count;
773 }
774
ScheduleSourcesCheck()775 void FileMetricsProvider::ScheduleSourcesCheck() {
776 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
777
778 if (sources_to_check_.empty())
779 return;
780
781 // Create an independent list of sources for checking. This will be Owned()
782 // by the reply call given to the task-runner, to be deleted when that call
783 // has returned. It is also passed Unretained() to the task itself, safe
784 // because that must complete before the reply runs.
785 SourceInfoList* check_list = new SourceInfoList();
786 std::swap(sources_to_check_, *check_list);
787 base::ThreadPool::PostTaskAndReplyWithResult(
788 FROM_HERE,
789 {base::MayBlock(), base::TaskPriority::BEST_EFFORT,
790 // SKIP_ON_SHUTDOWN because the task must be run to completion once
791 // started. Since the task may merge metrics from files on disk, the task
792 // should be completed so that those files are deleted (to prevent
793 // re-merging them in another session, which would cause duplication).
794 base::TaskShutdownBehavior::SKIP_ON_SHUTDOWN},
795 base::BindOnce(
796 &FileMetricsProvider::CheckAndMergeMetricSourcesOnTaskRunner,
797 base::Unretained(check_list)),
798 base::BindOnce(&FileMetricsProvider::RecordSourcesChecked,
799 weak_factory_.GetWeakPtr(), base::Owned(check_list)));
800 }
801
RecordSourcesChecked(SourceInfoList * checked,std::vector<size_t> samples_counts)802 void FileMetricsProvider::RecordSourcesChecked(
803 SourceInfoList* checked,
804 std::vector<size_t> samples_counts) {
805 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
806
807 AppendToSamplesCountPref(std::move(samples_counts));
808
809 // Sources that still have an allocator at this point are read/write "active"
810 // files that may need their contents merged on-demand. If there is no
811 // allocator (not a read/write file) but a read was done on the task-runner,
812 // try again immediately to see if more is available (in a directory of
813 // files). Otherwise, remember the source for checking again at a later time.
814 bool did_read = false;
815 for (auto iter = checked->begin(); iter != checked->end();) {
816 auto temp = iter++;
817 SourceInfo* source = temp->get();
818 if (source->read_complete) {
819 RecordSourceAsRead(source);
820 did_read = true;
821 }
822 if (source->allocator) {
823 if (source->association == ASSOCIATE_INTERNAL_PROFILE) {
824 sources_with_profile_.splice(sources_with_profile_.end(), *checked,
825 temp);
826 } else {
827 sources_mapped_.splice(sources_mapped_.end(), *checked, temp);
828 }
829 } else {
830 sources_to_check_.splice(sources_to_check_.end(), *checked, temp);
831 }
832 }
833
834 // If a read was done, schedule another one immediately. In the case of a
835 // directory of files, this ensures that all entries get processed. It's
836 // done here instead of as a loop in CheckAndMergeMetricSourcesOnTaskRunner
837 // so that (a) it gives the disk a rest and (b) testing of individual reads
838 // is possible.
839 if (did_read)
840 ScheduleSourcesCheck();
841 }
842
DeleteFileAsync(const base::FilePath & path)843 void FileMetricsProvider::DeleteFileAsync(const base::FilePath& path) {
844 base::ThreadPool::PostTask(
845 FROM_HERE,
846 {base::MayBlock(), base::TaskPriority::BEST_EFFORT,
847 // CONTINUE_ON_SHUTDOWN because files that are scheduled to be deleted
848 // asynchronously are not guaranteed to be deleted this session anyway,
849 // so no need to block shutdown if the task has already started running.
850 // Further, for such files, there are different ways to ensure they won't
851 // be consumed again (i.e., prefs).
852 base::TaskShutdownBehavior::CONTINUE_ON_SHUTDOWN},
853 base::BindOnce(DeleteFileWhenPossible, path));
854 }
855
RecordSourceAsRead(SourceInfo * source)856 void FileMetricsProvider::RecordSourceAsRead(SourceInfo* source) {
857 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
858
859 // Persistently record the "last seen" timestamp of the source file to
860 // ensure that the file is never read again unless it is modified again.
861 if (pref_service_ && !source->prefs_key.empty()) {
862 pref_service_->SetTime(
863 metrics::prefs::kMetricsLastSeenPrefix + source->prefs_key,
864 source->last_seen);
865 }
866 }
867
OnDidCreateMetricsLog()868 void FileMetricsProvider::OnDidCreateMetricsLog() {
869 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
870
871 // Schedule a check to see if there are new metrics to load. If so, they will
872 // be reported during the next collection run after this one. The check is run
873 // off of a MayBlock() TaskRunner so as to not cause delays on the main UI
874 // thread (which is currently where metric collection is done).
875 ScheduleSourcesCheck();
876
877 // Clear any data for initial metrics since they're always reported
878 // before the first call to this method. It couldn't be released after
879 // being reported in RecordInitialHistogramSnapshots because the data
880 // will continue to be used by the caller after that method returns. Once
881 // here, though, all actions to be done on the data have been completed.
882 for (const std::unique_ptr<SourceInfo>& source : sources_for_previous_run_)
883 DeleteFileAsync(source->path);
884 sources_for_previous_run_.clear();
885 }
886
HasIndependentMetrics()887 bool FileMetricsProvider::HasIndependentMetrics() {
888 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
889 return !sources_with_profile_.empty() || SimulateIndependentMetrics();
890 }
891
ProvideIndependentMetrics(base::OnceClosure serialize_log_callback,base::OnceCallback<void (bool)> done_callback,ChromeUserMetricsExtension * uma_proto,base::HistogramSnapshotManager * snapshot_manager)892 void FileMetricsProvider::ProvideIndependentMetrics(
893 base::OnceClosure serialize_log_callback,
894 base::OnceCallback<void(bool)> done_callback,
895 ChromeUserMetricsExtension* uma_proto,
896 base::HistogramSnapshotManager* snapshot_manager) {
897 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
898
899 if (sources_with_profile_.empty()) {
900 std::move(done_callback).Run(false);
901 return;
902 }
903
904 std::unique_ptr<SourceInfo> source =
905 std::move(*sources_with_profile_.begin());
906 sources_with_profile_.pop_front();
907 SourceInfo* source_ptr = source.get();
908 DCHECK(source->allocator);
909
910 // Do the actual work as a background task.
911 base::ThreadPool::PostTaskAndReplyWithResult(
912 FROM_HERE,
913 {base::MayBlock(), base::TaskPriority::BEST_EFFORT,
914 // CONTINUE_ON_SHUTDOWN because the work done is only useful once the
915 // reply task is run (and there are no side effects). So, no need to
916 // block shutdown since the reply task won't be run anyway.
917 base::TaskShutdownBehavior::CONTINUE_ON_SHUTDOWN},
918 base::BindOnce(
919 &FileMetricsProvider::ProvideIndependentMetricsOnTaskRunner,
920 source_ptr, uma_proto, snapshot_manager,
921 std::move(serialize_log_callback)),
922 base::BindOnce(&FileMetricsProvider::ProvideIndependentMetricsCleanup,
923 weak_factory_.GetWeakPtr(), std::move(done_callback),
924 std::move(source)));
925 }
926
ProvideIndependentMetricsCleanup(base::OnceCallback<void (bool)> done_callback,std::unique_ptr<SourceInfo> source,bool success)927 void FileMetricsProvider::ProvideIndependentMetricsCleanup(
928 base::OnceCallback<void(bool)> done_callback,
929 std::unique_ptr<SourceInfo> source,
930 bool success) {
931 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
932
933 // Regardless of whether this source was successfully recorded, it is
934 // never read again.
935 source->read_complete = true;
936 RecordSourceAsRead(source.get());
937 sources_to_check_.push_back(std::move(source));
938 ScheduleSourcesCheck();
939
940 std::move(done_callback).Run(success);
941 }
942
HasPreviousSessionData()943 bool FileMetricsProvider::HasPreviousSessionData() {
944 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
945
946 // Check all sources for previous run to see if they need to be read.
947 for (auto iter = sources_for_previous_run_.begin();
948 iter != sources_for_previous_run_.end();) {
949 auto temp = iter++;
950 SourceInfo* source = temp->get();
951
952 // This would normally be done on a background I/O thread but there
953 // hasn't been a chance to run any at the time this method is called.
954 // Do the check in-line.
955 AccessResult result = CheckAndMapMetricSource(source);
956 UMA_HISTOGRAM_ENUMERATION("UMA.FileMetricsProvider.InitialAccessResult",
957 result, ACCESS_RESULT_MAX);
958
959 // If it couldn't be accessed, remove it from the list. There is only ever
960 // one chance to record it so no point keeping it around for later. Also
961 // mark it as having been read since uploading it with a future browser
962 // run would associate it with the then-previous run which would no longer
963 // be the run from which it came.
964 if (result != ACCESS_RESULT_SUCCESS) {
965 DCHECK(!source->allocator);
966 RecordSourceAsRead(source);
967 DeleteFileAsync(source->path);
968 sources_for_previous_run_.erase(temp);
969 continue;
970 }
971
972 DCHECK(source->allocator);
973
974 // If the source should be associated with an existing internal profile,
975 // move it to |sources_with_profile_| for later upload.
976 if (source->association == ASSOCIATE_INTERNAL_PROFILE_OR_PREVIOUS_RUN) {
977 if (PersistentSystemProfile::HasSystemProfile(
978 *source->allocator->memory_allocator())) {
979 sources_with_profile_.splice(sources_with_profile_.end(),
980 sources_for_previous_run_, temp);
981 }
982 }
983 }
984
985 return !sources_for_previous_run_.empty();
986 }
987
RecordInitialHistogramSnapshots(base::HistogramSnapshotManager * snapshot_manager)988 void FileMetricsProvider::RecordInitialHistogramSnapshots(
989 base::HistogramSnapshotManager* snapshot_manager) {
990 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
991
992 for (const std::unique_ptr<SourceInfo>& source : sources_for_previous_run_) {
993 // The source needs to have an allocator attached to it in order to read
994 // histograms out of it.
995 DCHECK(!source->read_complete);
996 DCHECK(source->allocator);
997
998 // Dump all stability histograms contained within the source to the
999 // snapshot-manager.
1000 RecordHistogramSnapshotsFromSource(
1001 snapshot_manager, source.get(),
1002 /*required_flags=*/base::HistogramBase::kUmaStabilityHistogramFlag);
1003
1004 // Update the last-seen time so it isn't read again unless it changes.
1005 RecordSourceAsRead(source.get());
1006 }
1007 }
1008
MergeHistogramDeltas(bool async,base::OnceClosure done_callback)1009 void FileMetricsProvider::MergeHistogramDeltas(
1010 bool async,
1011 base::OnceClosure done_callback) {
1012 DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
1013 // TODO(crbug.com/40213327): Consider if this work can be done asynchronously.
1014 for (std::unique_ptr<SourceInfo>& source : sources_mapped_) {
1015 MergeHistogramDeltasFromSource(source.get());
1016 }
1017 std::move(done_callback).Run();
1018 }
1019
SimulateIndependentMetrics()1020 bool FileMetricsProvider::SimulateIndependentMetrics() {
1021 if (!pref_service_->HasPrefPath(
1022 metrics::prefs::kMetricsFileMetricsMetadata)) {
1023 return false;
1024 }
1025
1026 ScopedListPrefUpdate list_pref(pref_service_,
1027 metrics::prefs::kMetricsFileMetricsMetadata);
1028 base::Value::List& list_value = list_pref.Get();
1029 if (list_value.empty())
1030 return false;
1031
1032 size_t count = pref_service_->GetInteger(
1033 metrics::prefs::kStabilityFileMetricsUnsentSamplesCount);
1034 pref_service_->SetInteger(
1035 metrics::prefs::kStabilityFileMetricsUnsentSamplesCount,
1036 list_value[0].GetInt() + count);
1037 pref_service_->SetInteger(
1038 metrics::prefs::kStabilityFileMetricsUnsentFilesCount,
1039 list_value.size() - 1);
1040 list_value.erase(list_value.begin());
1041
1042 return true;
1043 }
1044
1045 } // namespace metrics
1046