• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifdef UNSAFE_BUFFERS_BUILD
6 // TODO(crbug.com/40285824): Remove this and convert code to safer constructs.
7 #pragma allow_unsafe_buffers
8 #endif
9 
10 #include "components/metrics/file_metrics_provider.h"
11 
12 #include <stddef.h>
13 
14 #include <memory>
15 #include <string_view>
16 #include <vector>
17 
18 #include "base/command_line.h"
19 #include "base/containers/flat_map.h"
20 #include "base/debug/crash_logging.h"
21 #include "base/feature_list.h"
22 #include "base/files/file.h"
23 #include "base/files/file_enumerator.h"
24 #include "base/files/file_util.h"
25 #include "base/files/memory_mapped_file.h"
26 #include "base/functional/bind.h"
27 #include "base/logging.h"
28 #include "base/metrics/histogram_base.h"
29 #include "base/metrics/histogram_functions.h"
30 #include "base/metrics/histogram_macros.h"
31 #include "base/metrics/persistent_histogram_allocator.h"
32 #include "base/metrics/persistent_memory_allocator.h"
33 #include "base/metrics/ranges_manager.h"
34 #include "base/strings/stringprintf.h"
35 #include "base/task/task_traits.h"
36 #include "base/task/thread_pool.h"
37 #include "base/time/time.h"
38 #include "components/metrics/metrics_features.h"
39 #include "components/metrics/metrics_log.h"
40 #include "components/metrics/metrics_pref_names.h"
41 #include "components/metrics/metrics_service.h"
42 #include "components/metrics/persistent_histograms.h"
43 #include "components/metrics/persistent_system_profile.h"
44 #include "components/prefs/pref_registry_simple.h"
45 #include "components/prefs/pref_service.h"
46 #include "components/prefs/scoped_user_pref_update.h"
47 
48 namespace metrics {
49 namespace {
50 
51 // These structures provide values used to define how files are opened and
52 // accessed. It obviates the need for multiple code-paths within several of
53 // the methods.
54 struct SourceOptions {
55   // The flags to be used to open a file on disk.
56   int file_open_flags;
57 
58   // The access mode to be used when mapping a file into memory.
59   base::MemoryMappedFile::Access memory_mapped_access;
60 
61   // Indicates if the file is to be accessed read-only.
62   bool is_read_only;
63 };
64 
65 // Opening a file typically requires at least these flags.
66 constexpr int STD_OPEN = base::File::FLAG_OPEN | base::File::FLAG_READ;
67 
68 constexpr SourceOptions kSourceOptions[] = {
69     // SOURCE_HISTOGRAMS_ATOMIC_FILE
70     {
71         // Ensure that no other process reads this at the same time.
72         STD_OPEN | base::File::FLAG_WIN_EXCLUSIVE_READ,
73         base::MemoryMappedFile::READ_ONLY,
74         true,
75     },
76     // SOURCE_HISTOGRAMS_ATOMIC_DIR
77     {
78         // Ensure that no other process reads this at the same time.
79         STD_OPEN | base::File::FLAG_WIN_EXCLUSIVE_READ,
80         base::MemoryMappedFile::READ_ONLY,
81         true,
82     },
83     // SOURCE_HISTOGRAMS_ACTIVE_FILE
84     {
85         // Allow writing to the file. This is needed so we can keep track of
86         // deltas that have been uploaded (by modifying the file), while the
87         // file may still be open by an external process (e.g. Crashpad).
88         STD_OPEN | base::File::FLAG_WRITE,
89         base::MemoryMappedFile::READ_WRITE,
90         false,
91     },
92 };
93 
DeleteFileWhenPossible(const base::FilePath & path)94 void DeleteFileWhenPossible(const base::FilePath& path) {
95   // Open (with delete) and then immediately close the file by going out of
96   // scope. This is the only cross-platform safe way to delete a file that may
97   // be open elsewhere, a distinct possibility given the asynchronous nature
98   // of the delete task.
99   base::File file(path, base::File::FLAG_OPEN | base::File::FLAG_READ |
100                             base::File::FLAG_DELETE_ON_CLOSE);
101 }
102 
103 }  // namespace
104 
105 // This structure stores all the information about the sources being monitored
106 // and their current reporting state.
107 struct FileMetricsProvider::SourceInfo {
SourceInfometrics::FileMetricsProvider::SourceInfo108   explicit SourceInfo(const Params& params)
109       : type(params.type),
110         association(params.association),
111         prefs_key(params.prefs_key),
112         filter(params.filter),
113         max_age(params.max_age),
114         max_dir_kib(params.max_dir_kib),
115         max_dir_files(params.max_dir_files) {
116     switch (type) {
117       case SOURCE_HISTOGRAMS_ACTIVE_FILE:
118         DCHECK(prefs_key.empty());
119         [[fallthrough]];
120       case SOURCE_HISTOGRAMS_ATOMIC_FILE:
121         path = params.path;
122         break;
123       case SOURCE_HISTOGRAMS_ATOMIC_DIR:
124         directory = params.path;
125         break;
126     }
127   }
128 
129   SourceInfo(const SourceInfo&) = delete;
130   SourceInfo& operator=(const SourceInfo&) = delete;
131 
132   ~SourceInfo() = default;
133 
134   struct FoundFile {
135     base::FilePath path;
136     base::FileEnumerator::FileInfo info;
137   };
138   using FoundFiles = base::flat_map<base::Time, FoundFile>;
139 
140   // How to access this source (file/dir, atomic/active).
141   const SourceType type;
142 
143   // With what run this source is associated.
144   const SourceAssociation association;
145 
146   // Where on disk the directory is located. This will only be populated when
147   // a directory is being monitored.
148   base::FilePath directory;
149 
150   // The files found in the above directory, ordered by last-modified.
151   std::unique_ptr<FoundFiles> found_files;
152 
153   // Where on disk the file is located. If a directory is being monitored,
154   // this will be updated for whatever file is being read.
155   base::FilePath path;
156 
157   // Name used inside prefs to persistent metadata.
158   std::string prefs_key;
159 
160   // The filter callback for determining what to do with found files.
161   FilterCallback filter;
162 
163   // The maximum allowed age of a file.
164   base::TimeDelta max_age;
165 
166   // The maximum allowed bytes in a directory.
167   size_t max_dir_kib;
168 
169   // The maximum allowed files in a directory.
170   size_t max_dir_files;
171 
172   // The last-seen time of this source to detect change.
173   base::Time last_seen;
174 
175   // Indicates if the data has been read out or not.
176   bool read_complete = false;
177 
178   // Once a file has been recognized as needing to be read, it is mapped
179   // into memory and assigned to an |allocator| object.
180   std::unique_ptr<base::PersistentHistogramAllocator> allocator;
181 };
182 
Params(const base::FilePath & path,SourceType type,SourceAssociation association,std::string_view prefs_key)183 FileMetricsProvider::Params::Params(const base::FilePath& path,
184                                     SourceType type,
185                                     SourceAssociation association,
186                                     std::string_view prefs_key)
187     : path(path), type(type), association(association), prefs_key(prefs_key) {}
188 
189 FileMetricsProvider::Params::~Params() = default;
190 
FileMetricsProvider(PrefService * local_state)191 FileMetricsProvider::FileMetricsProvider(PrefService* local_state)
192     : pref_service_(local_state) {
193   base::StatisticsRecorder::RegisterHistogramProvider(
194       weak_factory_.GetWeakPtr());
195 }
196 
197 FileMetricsProvider::~FileMetricsProvider() = default;
198 
RegisterSource(const Params & params,bool metrics_reporting_enabled)199 void FileMetricsProvider::RegisterSource(const Params& params,
200                                          bool metrics_reporting_enabled) {
201   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
202 
203   // Ensure that kSourceOptions has been filled for this type.
204   DCHECK_GT(std::size(kSourceOptions), static_cast<size_t>(params.type));
205 
206   if (!metrics_reporting_enabled) {
207     // When metrics reporting is not enabled, existing files should be deleted,
208     // since they won't be getting deleted as part of the upload flow.
209     if (params.type == SOURCE_HISTOGRAMS_ATOMIC_DIR ||
210         params.type == SOURCE_HISTOGRAMS_ATOMIC_FILE) {
211       base::ThreadPool::PostTask(
212           FROM_HERE,
213           {base::MayBlock(), base::TaskPriority::BEST_EFFORT,
214            base::TaskShutdownBehavior::CONTINUE_ON_SHUTDOWN},
215           params.type == SOURCE_HISTOGRAMS_ATOMIC_DIR
216               ? base::GetDeletePathRecursivelyCallback(params.path)
217               : base::GetDeleteFileCallback(params.path));
218     }
219     return;
220   }
221 
222   std::unique_ptr<SourceInfo> source(new SourceInfo(params));
223 
224   // |prefs_key| may be empty if the caller does not wish to persist the
225   // state across instances of the program.
226   if (pref_service_ && !params.prefs_key.empty()) {
227     source->last_seen = pref_service_->GetTime(
228         metrics::prefs::kMetricsLastSeenPrefix + source->prefs_key);
229   }
230 
231   switch (params.association) {
232     case ASSOCIATE_CURRENT_RUN:
233     case ASSOCIATE_INTERNAL_PROFILE:
234     case ASSOCIATE_INTERNAL_PROFILE_SAMPLES_COUNTER:
235       sources_to_check_.push_back(std::move(source));
236       break;
237     case ASSOCIATE_PREVIOUS_RUN:
238     case ASSOCIATE_INTERNAL_PROFILE_OR_PREVIOUS_RUN:
239       DCHECK_EQ(SOURCE_HISTOGRAMS_ATOMIC_FILE, source->type);
240       sources_for_previous_run_.push_back(std::move(source));
241       break;
242   }
243 }
244 
245 // static
RegisterSourcePrefs(PrefRegistrySimple * prefs,std::string_view prefs_key)246 void FileMetricsProvider::RegisterSourcePrefs(PrefRegistrySimple* prefs,
247                                               std::string_view prefs_key) {
248   prefs->RegisterInt64Pref(
249       metrics::prefs::kMetricsLastSeenPrefix + std::string(prefs_key), 0);
250 }
251 
252 //  static
RegisterPrefs(PrefRegistrySimple * prefs)253 void FileMetricsProvider::RegisterPrefs(PrefRegistrySimple* prefs) {
254   prefs->RegisterListPref(metrics::prefs::kMetricsFileMetricsMetadata);
255 }
256 
257 // static
RecordAccessResult(AccessResult result)258 void FileMetricsProvider::RecordAccessResult(AccessResult result) {
259   UMA_HISTOGRAM_ENUMERATION("UMA.FileMetricsProvider.AccessResult", result,
260                             ACCESS_RESULT_MAX);
261 }
262 
263 // static
LocateNextFileInDirectory(SourceInfo * source)264 bool FileMetricsProvider::LocateNextFileInDirectory(SourceInfo* source) {
265   DCHECK_EQ(SOURCE_HISTOGRAMS_ATOMIC_DIR, source->type);
266   DCHECK(!source->directory.empty());
267 
268   // Cumulative directory stats. These will remain zero if the directory isn't
269   // scanned but that's okay since any work they would cause to be done below
270   // would have been done during the first call where the directory was fully
271   // scanned.
272   size_t total_size_kib = 0;  // Using KiB allows 4TiB even on 32-bit builds.
273   size_t file_count = 0;
274 
275   base::Time now_time = base::Time::Now();
276   if (!source->found_files) {
277     source->found_files = std::make_unique<SourceInfo::FoundFiles>();
278     base::FileEnumerator file_iter(source->directory, /*recursive=*/false,
279                                    base::FileEnumerator::FILES);
280     SourceInfo::FoundFile found_file;
281 
282     // Open the directory and find all the files, remembering the last-modified
283     // time of each.
284     for (found_file.path = file_iter.Next(); !found_file.path.empty();
285          found_file.path = file_iter.Next()) {
286       found_file.info = file_iter.GetInfo();
287 
288       // Ignore directories.
289       if (found_file.info.IsDirectory()) {
290         continue;
291       }
292 
293       // Ignore temporary files.
294       base::FilePath::CharType first_character =
295           found_file.path.BaseName().value().front();
296       if (first_character == FILE_PATH_LITERAL('.') ||
297           first_character == FILE_PATH_LITERAL('_')) {
298         continue;
299       }
300 
301       // Ignore non-PMA (Persistent Memory Allocator) files.
302       if (found_file.path.Extension() !=
303           base::PersistentMemoryAllocator::kFileExtension) {
304         continue;
305       }
306 
307       // Process real files.
308       total_size_kib += found_file.info.GetSize() >> 10;
309       base::Time modified = found_file.info.GetLastModifiedTime();
310       if (modified > source->last_seen) {
311         source->found_files->emplace(modified, std::move(found_file));
312         ++file_count;
313       } else {
314         // This file has been read. Try to delete it. Ignore any errors because
315         // the file may be un-removeable by this process. It could, for example,
316         // have been created by a privileged process like setup.exe. Even if it
317         // is not removed, it will continue to be ignored bacuse of the older
318         // modification time.
319         base::DeleteFile(found_file.path);
320       }
321     }
322   }
323 
324   // Filter files from the front until one is found for processing.
325   bool have_file = false;
326   while (!source->found_files->empty()) {
327     SourceInfo::FoundFile found =
328         std::move(source->found_files->begin()->second);
329     source->found_files->erase(source->found_files->begin());
330 
331     bool too_many =
332         source->max_dir_files > 0 && file_count > source->max_dir_files;
333     bool too_big =
334         source->max_dir_kib > 0 && total_size_kib > source->max_dir_kib;
335     bool too_old =
336         source->max_age != base::TimeDelta() &&
337         now_time - found.info.GetLastModifiedTime() > source->max_age;
338     if (too_many || too_big || too_old) {
339       base::DeleteFile(found.path);
340       --file_count;
341       total_size_kib -= found.info.GetSize() >> 10;
342       RecordAccessResult(too_many ? ACCESS_RESULT_TOO_MANY_FILES
343                                   : too_big ? ACCESS_RESULT_TOO_MANY_BYTES
344                                             : ACCESS_RESULT_TOO_OLD);
345       continue;
346     }
347 
348     AccessResult result = HandleFilterSource(source, found.path);
349     if (result == ACCESS_RESULT_SUCCESS) {
350       source->path = std::move(found.path);
351       have_file = true;
352       break;
353     }
354 
355     // Record the result. Success will be recorded by the caller.
356     if (result != ACCESS_RESULT_THIS_PID) {
357       RecordAccessResult(result);
358     }
359   }
360 
361   return have_file;
362 }
363 
364 // static
FinishedWithSource(SourceInfo * source,AccessResult result)365 void FileMetricsProvider::FinishedWithSource(SourceInfo* source,
366                                              AccessResult result) {
367   // Different source types require different post-processing.
368   switch (source->type) {
369     case SOURCE_HISTOGRAMS_ATOMIC_FILE:
370     case SOURCE_HISTOGRAMS_ATOMIC_DIR:
371       // Done with this file so delete the allocator and its owned file.
372       source->allocator.reset();
373       // Remove the file if has been recorded. This prevents them from
374       // accumulating or also being recorded by different instances of
375       // the browser.
376       if (result == ACCESS_RESULT_SUCCESS ||
377           result == ACCESS_RESULT_NOT_MODIFIED ||
378           result == ACCESS_RESULT_MEMORY_DELETED ||
379           result == ACCESS_RESULT_TOO_OLD) {
380         DeleteFileWhenPossible(source->path);
381       }
382       break;
383     case SOURCE_HISTOGRAMS_ACTIVE_FILE:
384       // Keep the allocator open so it doesn't have to be re-mapped each
385       // time. This also allows the contents to be merged on-demand.
386       break;
387   }
388 }
389 
390 // static
CheckAndMergeMetricSourcesOnTaskRunner(SourceInfoList * sources)391 std::vector<size_t> FileMetricsProvider::CheckAndMergeMetricSourcesOnTaskRunner(
392     SourceInfoList* sources) {
393   // This method has all state information passed in |sources| and is intended
394   // to run on a worker thread rather than the UI thread.
395   std::vector<size_t> samples_counts;
396 
397   for (std::unique_ptr<SourceInfo>& source : *sources) {
398     AccessResult result;
399     do {
400       result = CheckAndMapMetricSource(source.get());
401 
402       // Some results are not reported in order to keep the dashboard clean.
403       if (result != ACCESS_RESULT_DOESNT_EXIST &&
404           result != ACCESS_RESULT_NOT_MODIFIED &&
405           result != ACCESS_RESULT_THIS_PID) {
406         RecordAccessResult(result);
407       }
408 
409       // If there are no files (or no more files) in this source, stop now.
410       if (result == ACCESS_RESULT_DOESNT_EXIST)
411         break;
412 
413       // Mapping was successful. Merge it.
414       if (result == ACCESS_RESULT_SUCCESS) {
415         // Metrics associated with internal profiles have to be fetched directly
416         // so just keep the mapping for use by the main thread.
417         if (source->association == ASSOCIATE_INTERNAL_PROFILE)
418           break;
419 
420         if (source->association == ASSOCIATE_INTERNAL_PROFILE_SAMPLES_COUNTER) {
421           samples_counts.push_back(CollectFileMetadataFromSource(source.get()));
422         } else {
423           size_t histograms_count =
424               MergeHistogramDeltasFromSource(source.get());
425           if (!source->prefs_key.empty()) {
426             base::UmaHistogramCounts1000(
427                 base::StringPrintf(
428                     "UMA.FileMetricsProvider.%s.MergedHistogramsCount",
429                     source->prefs_key.c_str()),
430                 histograms_count);
431           }
432         }
433         DCHECK(source->read_complete);
434       }
435 
436       // All done with this source.
437       FinishedWithSource(source.get(), result);
438 
439       // If it's a directory, keep trying until a file is successfully opened.
440       // When there are no more files, ACCESS_RESULT_DOESNT_EXIST will be
441       // returned and the loop will exit above.
442     } while (result != ACCESS_RESULT_SUCCESS && !source->directory.empty());
443 
444     // If the set of known files is empty, clear the object so the next run
445     // will do a fresh scan of the directory.
446     if (source->found_files && source->found_files->empty())
447       source->found_files.reset();
448   }
449 
450   return samples_counts;
451 }
452 
453 // This method has all state information passed in |source| and is intended
454 // to run on a worker thread rather than the UI thread.
455 // static
CheckAndMapMetricSource(SourceInfo * source)456 FileMetricsProvider::AccessResult FileMetricsProvider::CheckAndMapMetricSource(
457     SourceInfo* source) {
458   // If source was read, clean up after it.
459   if (source->read_complete)
460     FinishedWithSource(source, ACCESS_RESULT_SUCCESS);
461   source->read_complete = false;
462   DCHECK(!source->allocator);
463 
464   // If the source is a directory, look for files within it.
465   if (!source->directory.empty() && !LocateNextFileInDirectory(source))
466     return ACCESS_RESULT_DOESNT_EXIST;
467 
468   // Do basic validation on the file metadata.
469   base::File::Info info;
470   if (!base::GetFileInfo(source->path, &info))
471     return ACCESS_RESULT_DOESNT_EXIST;
472 
473   if (info.is_directory || info.size == 0)
474     return ACCESS_RESULT_INVALID_FILE;
475 
476   if (source->last_seen >= info.last_modified)
477     return ACCESS_RESULT_NOT_MODIFIED;
478   if (source->max_age != base::TimeDelta() &&
479       base::Time::Now() - info.last_modified > source->max_age) {
480     return ACCESS_RESULT_TOO_OLD;
481   }
482 
483   // Non-directory files still need to be filtered.
484   if (source->directory.empty()) {
485     AccessResult result = HandleFilterSource(source, source->path);
486     if (result != ACCESS_RESULT_SUCCESS)
487       return result;
488   }
489 
490   // A new file of metrics has been found.
491   base::File file(source->path, kSourceOptions[source->type].file_open_flags);
492   if (!file.IsValid())
493     return ACCESS_RESULT_NO_OPEN;
494 
495   // Check that file is writable if that is expected. If a write is attempted
496   // on an unwritable memory-mapped file, a SIGBUS will cause a crash.
497   const bool read_only = kSourceOptions[source->type].is_read_only;
498   if (!read_only) {
499     constexpr int kTestSize = 16;
500     char header[kTestSize];
501     int amount = file.Read(0, header, kTestSize);
502     if (amount != kTestSize)
503       return ACCESS_RESULT_INVALID_CONTENTS;
504 
505     char zeros[kTestSize] = {};
506     file.Write(0, zeros, kTestSize);
507     file.Flush();
508 
509     // A crash here would be unfortunate as the file would be left invalid
510     // and skipped/deleted by later attempts. This is unlikely, however, and
511     // the benefit of avoiding crashes from mapping as read/write a file that
512     // can't be written more than justifies the risk.
513 
514     char check[kTestSize];
515     amount = file.Read(0, check, kTestSize);
516     if (amount != kTestSize)
517       return ACCESS_RESULT_INVALID_CONTENTS;
518     if (memcmp(check, zeros, kTestSize) != 0)
519       return ACCESS_RESULT_NOT_WRITABLE;
520 
521     file.Write(0, header, kTestSize);
522     file.Flush();
523     amount = file.Read(0, check, kTestSize);
524     if (amount != kTestSize)
525       return ACCESS_RESULT_INVALID_CONTENTS;
526     if (memcmp(check, header, kTestSize) != 0)
527       return ACCESS_RESULT_NOT_WRITABLE;
528   }
529 
530   std::unique_ptr<base::MemoryMappedFile> mapped(new base::MemoryMappedFile());
531   if (!mapped->Initialize(std::move(file),
532                           kSourceOptions[source->type].memory_mapped_access)) {
533     return ACCESS_RESULT_SYSTEM_MAP_FAILURE;
534   }
535 
536   // Ensure any problems below don't occur repeatedly.
537   source->last_seen = info.last_modified;
538 
539   // Test the validity of the file contents.
540   if (!base::FilePersistentMemoryAllocator::IsFileAcceptable(*mapped,
541                                                              read_only)) {
542     return ACCESS_RESULT_INVALID_CONTENTS;
543   }
544 
545   // Map the file and validate it.
546   std::unique_ptr<base::FilePersistentMemoryAllocator> memory_allocator =
547       std::make_unique<base::FilePersistentMemoryAllocator>(
548           std::move(mapped), 0, 0, std::string_view(),
549           read_only ? base::FilePersistentMemoryAllocator::kReadOnly
550                     : base::FilePersistentMemoryAllocator::kReadWriteExisting);
551   if (memory_allocator->GetMemoryState() ==
552       base::PersistentMemoryAllocator::MEMORY_DELETED) {
553     return ACCESS_RESULT_MEMORY_DELETED;
554   }
555   if (memory_allocator->IsCorrupt())
556     return ACCESS_RESULT_DATA_CORRUPTION;
557 
558   // Cache the file data while running in a background thread so that there
559   // shouldn't be any I/O when the data is accessed from the main thread.
560   // Files with an internal profile, those from previous runs that include
561   // a full system profile and are fetched via ProvideIndependentMetrics(),
562   // are loaded on a background task and so there's no need to cache the
563   // data in advance.
564   if (source->association != ASSOCIATE_INTERNAL_PROFILE)
565     memory_allocator->Cache();
566 
567   // Create an allocator for the mapped file. Ownership passes to the allocator.
568   source->allocator = std::make_unique<base::PersistentHistogramAllocator>(
569       std::move(memory_allocator));
570   // Pass a custom RangesManager so that we do not register the BucketRanges
571   // with the global StatisticsRecorder when creating histogram objects using
572   // the allocator's underlying data. This avoids unnecessary contention on the
573   // global StatisticsRecorder lock.
574   // Note: Since RangesManager is not thread safe, this means that |allocator|
575   // must be iterated over one thread at a time (i.e., not concurrently). This
576   // is the case.
577   source->allocator->SetRangesManager(new base::RangesManager());
578 
579   // Check that an "independent" file has the necessary information present.
580   if (source->association == ASSOCIATE_INTERNAL_PROFILE &&
581       !PersistentSystemProfile::GetSystemProfile(
582           *source->allocator->memory_allocator(), nullptr)) {
583     return ACCESS_RESULT_NO_PROFILE;
584   }
585 
586   return ACCESS_RESULT_SUCCESS;
587 }
588 
589 // static
MergeHistogramDeltasFromSource(SourceInfo * source)590 size_t FileMetricsProvider::MergeHistogramDeltasFromSource(SourceInfo* source) {
591   DCHECK(source->allocator);
592   base::PersistentHistogramAllocator::Iterator histogram_iter(
593       source->allocator.get());
594 
595   const bool read_only = kSourceOptions[source->type].is_read_only;
596   size_t histogram_count = 0;
597   while (true) {
598     std::unique_ptr<base::HistogramBase> histogram = histogram_iter.GetNext();
599     if (!histogram)
600       break;
601 
602     if (read_only) {
603       source->allocator->MergeHistogramFinalDeltaToStatisticsRecorder(
604           histogram.get());
605     } else {
606       source->allocator->MergeHistogramDeltaToStatisticsRecorder(
607           histogram.get());
608     }
609     ++histogram_count;
610   }
611 
612   source->read_complete = true;
613   DVLOG(1) << "Reported " << histogram_count << " histograms from "
614            << source->path.value();
615   return histogram_count;
616 }
617 
618 // static
RecordHistogramSnapshotsFromSource(base::HistogramSnapshotManager * snapshot_manager,SourceInfo * source,base::HistogramBase::Flags required_flags)619 void FileMetricsProvider::RecordHistogramSnapshotsFromSource(
620     base::HistogramSnapshotManager* snapshot_manager,
621     SourceInfo* source,
622     base::HistogramBase::Flags required_flags) {
623   DCHECK_NE(SOURCE_HISTOGRAMS_ACTIVE_FILE, source->type);
624 
625   base::PersistentHistogramAllocator::Iterator histogram_iter(
626       source->allocator.get());
627 
628   int histogram_count = 0;
629   while (true) {
630     std::unique_ptr<base::HistogramBase> histogram = histogram_iter.GetNext();
631     if (!histogram)
632       break;
633     if (histogram->HasFlags(required_flags)) {
634       snapshot_manager->PrepareFinalDelta(histogram.get());
635       ++histogram_count;
636     }
637   }
638 
639   source->read_complete = true;
640   DVLOG(1) << "Reported " << histogram_count << " histograms from "
641            << source->path.value();
642 }
643 
HandleFilterSource(SourceInfo * source,const base::FilePath & path)644 FileMetricsProvider::AccessResult FileMetricsProvider::HandleFilterSource(
645     SourceInfo* source,
646     const base::FilePath& path) {
647   if (!source->filter)
648     return ACCESS_RESULT_SUCCESS;
649 
650   // Alternatively, pass a Params object to the filter like what was originally
651   // used to configure the source.
652   // Params params(path, source->type, source->association, source->prefs_key);
653   FilterAction action = source->filter.Run(path);
654   switch (action) {
655     case FILTER_PROCESS_FILE:
656       // Process the file.
657       return ACCESS_RESULT_SUCCESS;
658 
659     case FILTER_ACTIVE_THIS_PID:
660     // Even the file for the current process has to be touched or its stamp
661     // will be less than "last processed" and thus skipped on future runs,
662     // even those done by new instances of the browser if a pref key is
663     // provided so that the last-uploaded stamp is recorded.
664     case FILTER_TRY_LATER: {
665       // Touch the file with the current timestamp making it (presumably) the
666       // newest file in the directory.
667       base::Time now = base::Time::Now();
668       base::TouchFile(path, /*accessed=*/now, /*modified=*/now);
669       if (action == FILTER_ACTIVE_THIS_PID)
670         return ACCESS_RESULT_THIS_PID;
671       return ACCESS_RESULT_FILTER_TRY_LATER;
672     }
673 
674     case FILTER_SKIP_FILE:
675       switch (source->type) {
676         case SOURCE_HISTOGRAMS_ATOMIC_FILE:
677         case SOURCE_HISTOGRAMS_ATOMIC_DIR:
678           // Only "atomic" files are deleted (best-effort).
679           DeleteFileWhenPossible(path);
680           break;
681         case SOURCE_HISTOGRAMS_ACTIVE_FILE:
682           // File will presumably get modified elsewhere and thus tried again.
683           break;
684       }
685       return ACCESS_RESULT_FILTER_SKIP_FILE;
686   }
687 
688   NOTREACHED();
689 }
690 
691 /* static */
ProvideIndependentMetricsOnTaskRunner(SourceInfo * source,ChromeUserMetricsExtension * uma_proto,base::HistogramSnapshotManager * snapshot_manager,base::OnceClosure serialize_log_callback)692 bool FileMetricsProvider::ProvideIndependentMetricsOnTaskRunner(
693     SourceInfo* source,
694     ChromeUserMetricsExtension* uma_proto,
695     base::HistogramSnapshotManager* snapshot_manager,
696     base::OnceClosure serialize_log_callback) {
697   // Include various crash keys about the file/allocator being read so that if
698   // there is ever a crash report being dumped while reading its contents, we
699   // have some info about its state.
700   // TODO(crbug.com/40064026): Clean this up.
701 
702   // Useful to know the metadata version of the source (e.g. to know if some
703   // fields like memory_state below are up to date).
704   SCOPED_CRASH_KEY_NUMBER("PMA", "version",
705                           source->allocator->memory_allocator()->version());
706   // Useful to know whether the source comes from a crashed session.
707   SCOPED_CRASH_KEY_NUMBER(
708       "PMA", "memory_state",
709       source->allocator->memory_allocator()->GetMemoryState());
710   // Useful to know the freeptr as it can help determine if the source comes
711   // from a session that crashed due to failing to allocate an object across
712   // different pages.
713   SCOPED_CRASH_KEY_NUMBER("PMA", "freeptr",
714                           source->allocator->memory_allocator()->freeptr());
715   SCOPED_CRASH_KEY_BOOL("PMA", "full",
716                         source->allocator->memory_allocator()->IsFull());
717   SCOPED_CRASH_KEY_BOOL("PMA", "corrupt",
718                         source->allocator->memory_allocator()->IsCorrupt());
719 
720   SystemProfileProto* system_profile_proto =
721       uma_proto->mutable_system_profile();
722 
723   if (PersistentSystemProfile::GetSystemProfile(
724           *source->allocator->memory_allocator(), system_profile_proto)) {
725     system_profile_proto->mutable_stability()->set_from_previous_run(true);
726     RecordHistogramSnapshotsFromSource(
727         snapshot_manager, source,
728         /*required_flags=*/base::HistogramBase::kUmaTargetedHistogramFlag);
729 
730     // NOTE: If you are adding anything here, consider also changing
731     // MetricsStateMetricsProvider::ProvidePreviousSessionData().
732 
733     // Use the client UUID stored in the system profile (if there is one) as the
734     // independent log's client ID. Usually, this has no effect, but there are
735     // scenarios where the log may have come from a session that had a different
736     // client ID than the one currently in use (e.g., client ID was reset due to
737     // being detected as a cloned install), so make sure to associate it with
738     // the proper one.
739     const std::string& client_uuid = system_profile_proto->client_uuid();
740     if (!client_uuid.empty()) {
741       uma_proto->set_client_id(MetricsLog::Hash(client_uuid));
742     }
743 
744     // Serialize the log while we are still in the background, instead of on the
745     // callback that runs on the main thread.
746     std::move(serialize_log_callback).Run();
747 
748     return true;
749   }
750 
751   return false;
752 }
753 
AppendToSamplesCountPref(std::vector<size_t> samples_counts)754 void FileMetricsProvider::AppendToSamplesCountPref(
755     std::vector<size_t> samples_counts) {
756   ScopedListPrefUpdate update(pref_service_,
757                               metrics::prefs::kMetricsFileMetricsMetadata);
758   for (size_t samples_count : samples_counts) {
759     update->Append(static_cast<int>(samples_count));
760   }
761 }
762 
763 // static
CollectFileMetadataFromSource(SourceInfo * source)764 size_t FileMetricsProvider::CollectFileMetadataFromSource(SourceInfo* source) {
765   base::HistogramBase::Count samples_count = 0;
766   base::PersistentHistogramAllocator::Iterator it{source->allocator.get()};
767   std::unique_ptr<base::HistogramBase> histogram;
768   while ((histogram = it.GetNext()) != nullptr) {
769     samples_count += histogram->SnapshotFinalDelta()->TotalCount();
770   }
771   source->read_complete = true;
772   return samples_count;
773 }
774 
ScheduleSourcesCheck()775 void FileMetricsProvider::ScheduleSourcesCheck() {
776   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
777 
778   if (sources_to_check_.empty())
779     return;
780 
781   // Create an independent list of sources for checking. This will be Owned()
782   // by the reply call given to the task-runner, to be deleted when that call
783   // has returned. It is also passed Unretained() to the task itself, safe
784   // because that must complete before the reply runs.
785   SourceInfoList* check_list = new SourceInfoList();
786   std::swap(sources_to_check_, *check_list);
787   base::ThreadPool::PostTaskAndReplyWithResult(
788       FROM_HERE,
789       {base::MayBlock(), base::TaskPriority::BEST_EFFORT,
790        // SKIP_ON_SHUTDOWN because the task must be run to completion once
791        // started. Since the task may merge metrics from files on disk, the task
792        // should be completed so that those files are deleted (to prevent
793        // re-merging them in another session, which would cause duplication).
794        base::TaskShutdownBehavior::SKIP_ON_SHUTDOWN},
795       base::BindOnce(
796           &FileMetricsProvider::CheckAndMergeMetricSourcesOnTaskRunner,
797           base::Unretained(check_list)),
798       base::BindOnce(&FileMetricsProvider::RecordSourcesChecked,
799                      weak_factory_.GetWeakPtr(), base::Owned(check_list)));
800 }
801 
RecordSourcesChecked(SourceInfoList * checked,std::vector<size_t> samples_counts)802 void FileMetricsProvider::RecordSourcesChecked(
803     SourceInfoList* checked,
804     std::vector<size_t> samples_counts) {
805   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
806 
807   AppendToSamplesCountPref(std::move(samples_counts));
808 
809   // Sources that still have an allocator at this point are read/write "active"
810   // files that may need their contents merged on-demand. If there is no
811   // allocator (not a read/write file) but a read was done on the task-runner,
812   // try again immediately to see if more is available (in a directory of
813   // files). Otherwise, remember the source for checking again at a later time.
814   bool did_read = false;
815   for (auto iter = checked->begin(); iter != checked->end();) {
816     auto temp = iter++;
817     SourceInfo* source = temp->get();
818     if (source->read_complete) {
819       RecordSourceAsRead(source);
820       did_read = true;
821     }
822     if (source->allocator) {
823       if (source->association == ASSOCIATE_INTERNAL_PROFILE) {
824         sources_with_profile_.splice(sources_with_profile_.end(), *checked,
825                                      temp);
826       } else {
827         sources_mapped_.splice(sources_mapped_.end(), *checked, temp);
828       }
829     } else {
830       sources_to_check_.splice(sources_to_check_.end(), *checked, temp);
831     }
832   }
833 
834   // If a read was done, schedule another one immediately. In the case of a
835   // directory of files, this ensures that all entries get processed. It's
836   // done here instead of as a loop in CheckAndMergeMetricSourcesOnTaskRunner
837   // so that (a) it gives the disk a rest and (b) testing of individual reads
838   // is possible.
839   if (did_read)
840     ScheduleSourcesCheck();
841 }
842 
DeleteFileAsync(const base::FilePath & path)843 void FileMetricsProvider::DeleteFileAsync(const base::FilePath& path) {
844   base::ThreadPool::PostTask(
845       FROM_HERE,
846       {base::MayBlock(), base::TaskPriority::BEST_EFFORT,
847        // CONTINUE_ON_SHUTDOWN because files that are scheduled to be deleted
848        // asynchronously are not guaranteed to be deleted this session anyway,
849        // so no need to block shutdown if the task has already started running.
850        // Further, for such files, there are different ways to ensure they won't
851        // be consumed again (i.e., prefs).
852        base::TaskShutdownBehavior::CONTINUE_ON_SHUTDOWN},
853       base::BindOnce(DeleteFileWhenPossible, path));
854 }
855 
RecordSourceAsRead(SourceInfo * source)856 void FileMetricsProvider::RecordSourceAsRead(SourceInfo* source) {
857   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
858 
859   // Persistently record the "last seen" timestamp of the source file to
860   // ensure that the file is never read again unless it is modified again.
861   if (pref_service_ && !source->prefs_key.empty()) {
862     pref_service_->SetTime(
863         metrics::prefs::kMetricsLastSeenPrefix + source->prefs_key,
864         source->last_seen);
865   }
866 }
867 
OnDidCreateMetricsLog()868 void FileMetricsProvider::OnDidCreateMetricsLog() {
869   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
870 
871   // Schedule a check to see if there are new metrics to load. If so, they will
872   // be reported during the next collection run after this one. The check is run
873   // off of a MayBlock() TaskRunner so as to not cause delays on the main UI
874   // thread (which is currently where metric collection is done).
875   ScheduleSourcesCheck();
876 
877   // Clear any data for initial metrics since they're always reported
878   // before the first call to this method. It couldn't be released after
879   // being reported in RecordInitialHistogramSnapshots because the data
880   // will continue to be used by the caller after that method returns. Once
881   // here, though, all actions to be done on the data have been completed.
882   for (const std::unique_ptr<SourceInfo>& source : sources_for_previous_run_)
883     DeleteFileAsync(source->path);
884   sources_for_previous_run_.clear();
885 }
886 
HasIndependentMetrics()887 bool FileMetricsProvider::HasIndependentMetrics() {
888   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
889   return !sources_with_profile_.empty() || SimulateIndependentMetrics();
890 }
891 
ProvideIndependentMetrics(base::OnceClosure serialize_log_callback,base::OnceCallback<void (bool)> done_callback,ChromeUserMetricsExtension * uma_proto,base::HistogramSnapshotManager * snapshot_manager)892 void FileMetricsProvider::ProvideIndependentMetrics(
893     base::OnceClosure serialize_log_callback,
894     base::OnceCallback<void(bool)> done_callback,
895     ChromeUserMetricsExtension* uma_proto,
896     base::HistogramSnapshotManager* snapshot_manager) {
897   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
898 
899   if (sources_with_profile_.empty()) {
900     std::move(done_callback).Run(false);
901     return;
902   }
903 
904   std::unique_ptr<SourceInfo> source =
905       std::move(*sources_with_profile_.begin());
906   sources_with_profile_.pop_front();
907   SourceInfo* source_ptr = source.get();
908   DCHECK(source->allocator);
909 
910   // Do the actual work as a background task.
911   base::ThreadPool::PostTaskAndReplyWithResult(
912       FROM_HERE,
913       {base::MayBlock(), base::TaskPriority::BEST_EFFORT,
914        // CONTINUE_ON_SHUTDOWN because the work done is only useful once the
915        // reply task is run (and there are no side effects). So, no need to
916        // block shutdown since the reply task won't be run anyway.
917        base::TaskShutdownBehavior::CONTINUE_ON_SHUTDOWN},
918       base::BindOnce(
919           &FileMetricsProvider::ProvideIndependentMetricsOnTaskRunner,
920           source_ptr, uma_proto, snapshot_manager,
921           std::move(serialize_log_callback)),
922       base::BindOnce(&FileMetricsProvider::ProvideIndependentMetricsCleanup,
923                      weak_factory_.GetWeakPtr(), std::move(done_callback),
924                      std::move(source)));
925 }
926 
ProvideIndependentMetricsCleanup(base::OnceCallback<void (bool)> done_callback,std::unique_ptr<SourceInfo> source,bool success)927 void FileMetricsProvider::ProvideIndependentMetricsCleanup(
928     base::OnceCallback<void(bool)> done_callback,
929     std::unique_ptr<SourceInfo> source,
930     bool success) {
931   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
932 
933   // Regardless of whether this source was successfully recorded, it is
934   // never read again.
935   source->read_complete = true;
936   RecordSourceAsRead(source.get());
937   sources_to_check_.push_back(std::move(source));
938   ScheduleSourcesCheck();
939 
940   std::move(done_callback).Run(success);
941 }
942 
HasPreviousSessionData()943 bool FileMetricsProvider::HasPreviousSessionData() {
944   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
945 
946   // Check all sources for previous run to see if they need to be read.
947   for (auto iter = sources_for_previous_run_.begin();
948        iter != sources_for_previous_run_.end();) {
949     auto temp = iter++;
950     SourceInfo* source = temp->get();
951 
952     // This would normally be done on a background I/O thread but there
953     // hasn't been a chance to run any at the time this method is called.
954     // Do the check in-line.
955     AccessResult result = CheckAndMapMetricSource(source);
956     UMA_HISTOGRAM_ENUMERATION("UMA.FileMetricsProvider.InitialAccessResult",
957                               result, ACCESS_RESULT_MAX);
958 
959     // If it couldn't be accessed, remove it from the list. There is only ever
960     // one chance to record it so no point keeping it around for later. Also
961     // mark it as having been read since uploading it with a future browser
962     // run would associate it with the then-previous run which would no longer
963     // be the run from which it came.
964     if (result != ACCESS_RESULT_SUCCESS) {
965       DCHECK(!source->allocator);
966       RecordSourceAsRead(source);
967       DeleteFileAsync(source->path);
968       sources_for_previous_run_.erase(temp);
969       continue;
970     }
971 
972     DCHECK(source->allocator);
973 
974     // If the source should be associated with an existing internal profile,
975     // move it to |sources_with_profile_| for later upload.
976     if (source->association == ASSOCIATE_INTERNAL_PROFILE_OR_PREVIOUS_RUN) {
977       if (PersistentSystemProfile::HasSystemProfile(
978               *source->allocator->memory_allocator())) {
979         sources_with_profile_.splice(sources_with_profile_.end(),
980                                      sources_for_previous_run_, temp);
981       }
982     }
983   }
984 
985   return !sources_for_previous_run_.empty();
986 }
987 
RecordInitialHistogramSnapshots(base::HistogramSnapshotManager * snapshot_manager)988 void FileMetricsProvider::RecordInitialHistogramSnapshots(
989     base::HistogramSnapshotManager* snapshot_manager) {
990   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
991 
992   for (const std::unique_ptr<SourceInfo>& source : sources_for_previous_run_) {
993     // The source needs to have an allocator attached to it in order to read
994     // histograms out of it.
995     DCHECK(!source->read_complete);
996     DCHECK(source->allocator);
997 
998     // Dump all stability histograms contained within the source to the
999     // snapshot-manager.
1000     RecordHistogramSnapshotsFromSource(
1001         snapshot_manager, source.get(),
1002         /*required_flags=*/base::HistogramBase::kUmaStabilityHistogramFlag);
1003 
1004     // Update the last-seen time so it isn't read again unless it changes.
1005     RecordSourceAsRead(source.get());
1006   }
1007 }
1008 
MergeHistogramDeltas(bool async,base::OnceClosure done_callback)1009 void FileMetricsProvider::MergeHistogramDeltas(
1010     bool async,
1011     base::OnceClosure done_callback) {
1012   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
1013   // TODO(crbug.com/40213327): Consider if this work can be done asynchronously.
1014   for (std::unique_ptr<SourceInfo>& source : sources_mapped_) {
1015     MergeHistogramDeltasFromSource(source.get());
1016   }
1017   std::move(done_callback).Run();
1018 }
1019 
SimulateIndependentMetrics()1020 bool FileMetricsProvider::SimulateIndependentMetrics() {
1021   if (!pref_service_->HasPrefPath(
1022           metrics::prefs::kMetricsFileMetricsMetadata)) {
1023     return false;
1024   }
1025 
1026   ScopedListPrefUpdate list_pref(pref_service_,
1027                                  metrics::prefs::kMetricsFileMetricsMetadata);
1028   base::Value::List& list_value = list_pref.Get();
1029   if (list_value.empty())
1030     return false;
1031 
1032   size_t count = pref_service_->GetInteger(
1033       metrics::prefs::kStabilityFileMetricsUnsentSamplesCount);
1034   pref_service_->SetInteger(
1035       metrics::prefs::kStabilityFileMetricsUnsentSamplesCount,
1036       list_value[0].GetInt() + count);
1037   pref_service_->SetInteger(
1038       metrics::prefs::kStabilityFileMetricsUnsentFilesCount,
1039       list_value.size() - 1);
1040   list_value.erase(list_value.begin());
1041 
1042   return true;
1043 }
1044 
1045 }  // namespace metrics
1046