1 // Copyright 2016 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/metrics/file_metrics_provider.h"
6 
7 #include <stddef.h>
8 
9 #include <memory>
10 #include <vector>
11 
12 #include "base/command_line.h"
13 #include "base/containers/flat_map.h"
14 #include "base/feature_list.h"
15 #include "base/files/file.h"
16 #include "base/files/file_enumerator.h"
17 #include "base/files/file_util.h"
18 #include "base/files/memory_mapped_file.h"
19 #include "base/functional/bind.h"
20 #include "base/logging.h"
21 #include "base/metrics/histogram_base.h"
22 #include "base/metrics/histogram_functions.h"
23 #include "base/metrics/histogram_macros.h"
24 #include "base/metrics/persistent_histogram_allocator.h"
25 #include "base/metrics/persistent_memory_allocator.h"
26 #include "base/metrics/ranges_manager.h"
27 #include "base/strings/string_piece.h"
28 #include "base/strings/stringprintf.h"
29 #include "base/task/single_thread_task_runner.h"
30 #include "base/task/task_runner.h"
31 #include "base/task/task_traits.h"
32 #include "base/task/thread_pool.h"
33 #include "base/time/time.h"
34 #include "components/metrics/metrics_pref_names.h"
35 #include "components/metrics/metrics_service.h"
36 #include "components/metrics/persistent_histograms.h"
37 #include "components/metrics/persistent_system_profile.h"
38 #include "components/prefs/pref_registry_simple.h"
39 #include "components/prefs/pref_service.h"
40 #include "components/prefs/scoped_user_pref_update.h"
41 
42 namespace metrics {
43 namespace {
44 
45 // These structures provide values used to define how files are opened and
46 // accessed. It obviates the need for multiple code-paths within several of
47 // the methods.
48 struct SourceOptions {
49   // The flags to be used to open a file on disk.
50   int file_open_flags;
51 
52   // The access mode to be used when mapping a file into memory.
53   base::MemoryMappedFile::Access memory_mapped_access;
54 
55   // Indicates if the file is to be accessed read-only.
56   bool is_read_only;
57 };
58 
59 // Opening a file typically requires at least these flags.
60 constexpr int STD_OPEN = base::File::FLAG_OPEN | base::File::FLAG_READ;
61 
62 constexpr SourceOptions kSourceOptions[] = {
63     // SOURCE_HISTOGRAMS_ATOMIC_FILE
64     {
65         // Ensure that no other process reads this at the same time.
66         STD_OPEN | base::File::FLAG_WIN_EXCLUSIVE_READ,
67         base::MemoryMappedFile::READ_ONLY,
68         true,
69     },
70     // SOURCE_HISTOGRAMS_ATOMIC_DIR
71     {
72         // Ensure that no other process reads this at the same time.
73         STD_OPEN | base::File::FLAG_WIN_EXCLUSIVE_READ,
74         base::MemoryMappedFile::READ_ONLY,
75         true,
76     },
77     // SOURCE_HISTOGRAMS_ACTIVE_FILE
78     {
79         // Allow writing to the file. This is needed so we can keep track of
80         // deltas that have been uploaded (by modifying the file), while the
81         // file may still be open by an external process (e.g. Crashpad).
82         STD_OPEN | base::File::FLAG_WRITE,
83         base::MemoryMappedFile::READ_WRITE,
84         false,
85     },
86 };
87 
DeleteFileWhenPossible(const base::FilePath & path)88 void DeleteFileWhenPossible(const base::FilePath& path) {
89   // Open (with delete) and then immediately close the file by going out of
90   // scope. This is the only cross-platform safe way to delete a file that may
91   // be open elsewhere, a distinct possibility given the asynchronous nature
92   // of the delete task.
93   base::File file(path, base::File::FLAG_OPEN | base::File::FLAG_READ |
94                             base::File::FLAG_DELETE_ON_CLOSE);
95 }
96 
97 // A task runner to use for testing.
98 base::TaskRunner* g_task_runner_for_testing = nullptr;
99 
100 // Returns a task runner appropriate for running background tasks that perform
101 // file I/O.
CreateBackgroundTaskRunner()102 scoped_refptr<base::TaskRunner> CreateBackgroundTaskRunner() {
103   if (g_task_runner_for_testing)
104     return scoped_refptr<base::TaskRunner>(g_task_runner_for_testing);
105 
106   return base::ThreadPool::CreateTaskRunner(
107       {base::MayBlock(), base::TaskPriority::BEST_EFFORT,
108        base::TaskShutdownBehavior::SKIP_ON_SHUTDOWN});
109 }
110 
111 }  // namespace
112 
113 // This structure stores all the information about the sources being monitored
114 // and their current reporting state.
115 struct FileMetricsProvider::SourceInfo {
SourceInfometrics::FileMetricsProvider::SourceInfo116   SourceInfo(const Params& params)
117       : type(params.type),
118         association(params.association),
119         prefs_key(params.prefs_key),
120         filter(params.filter),
121         max_age(params.max_age),
122         max_dir_kib(params.max_dir_kib),
123         max_dir_files(params.max_dir_files) {
124     switch (type) {
125       case SOURCE_HISTOGRAMS_ACTIVE_FILE:
126         DCHECK(prefs_key.empty());
127         [[fallthrough]];
128       case SOURCE_HISTOGRAMS_ATOMIC_FILE:
129         path = params.path;
130         break;
131       case SOURCE_HISTOGRAMS_ATOMIC_DIR:
132         directory = params.path;
133         break;
134     }
135   }
136 
137   SourceInfo(const SourceInfo&) = delete;
138   SourceInfo& operator=(const SourceInfo&) = delete;
139 
~SourceInfometrics::FileMetricsProvider::SourceInfo140   ~SourceInfo() {}
141 
142   struct FoundFile {
143     base::FilePath path;
144     base::FileEnumerator::FileInfo info;
145   };
146   using FoundFiles = base::flat_map<base::Time, FoundFile>;
147 
148   // How to access this source (file/dir, atomic/active).
149   const SourceType type;
150 
151   // With what run this source is associated.
152   const SourceAssociation association;
153 
154   // Where on disk the directory is located. This will only be populated when
155   // a directory is being monitored.
156   base::FilePath directory;
157 
158   // The files found in the above directory, ordered by last-modified.
159   std::unique_ptr<FoundFiles> found_files;
160 
161   // Where on disk the file is located. If a directory is being monitored,
162   // this will be updated for whatever file is being read.
163   base::FilePath path;
164 
165   // Name used inside prefs to persistent metadata.
166   std::string prefs_key;
167 
168   // The filter callback for determining what to do with found files.
169   FilterCallback filter;
170 
171   // The maximum allowed age of a file.
172   base::TimeDelta max_age;
173 
174   // The maximum allowed bytes in a directory.
175   size_t max_dir_kib;
176 
177   // The maximum allowed files in a directory.
178   size_t max_dir_files;
179 
180   // The last-seen time of this source to detect change.
181   base::Time last_seen;
182 
183   // Indicates if the data has been read out or not.
184   bool read_complete = false;
185 
186   // Once a file has been recognized as needing to be read, it is mapped
187   // into memory and assigned to an |allocator| object.
188   std::unique_ptr<base::PersistentHistogramAllocator> allocator;
189 };
190 
Params(const base::FilePath & path,SourceType type,SourceAssociation association,base::StringPiece prefs_key)191 FileMetricsProvider::Params::Params(const base::FilePath& path,
192                                     SourceType type,
193                                     SourceAssociation association,
194                                     base::StringPiece prefs_key)
195     : path(path), type(type), association(association), prefs_key(prefs_key) {}
196 
~Params()197 FileMetricsProvider::Params::~Params() {}
198 
FileMetricsProvider(PrefService * local_state)199 FileMetricsProvider::FileMetricsProvider(PrefService* local_state)
200     : task_runner_(CreateBackgroundTaskRunner()), pref_service_(local_state) {
201   base::StatisticsRecorder::RegisterHistogramProvider(
202       weak_factory_.GetWeakPtr());
203 }
204 
~FileMetricsProvider()205 FileMetricsProvider::~FileMetricsProvider() {}
206 
RegisterSource(const Params & params)207 void FileMetricsProvider::RegisterSource(const Params& params) {
208   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
209 
210   // Ensure that kSourceOptions has been filled for this type.
211   DCHECK_GT(std::size(kSourceOptions), static_cast<size_t>(params.type));
212 
213   std::unique_ptr<SourceInfo> source(new SourceInfo(params));
214 
215   // |prefs_key| may be empty if the caller does not wish to persist the
216   // state across instances of the program.
217   if (pref_service_ && !params.prefs_key.empty()) {
218     source->last_seen = pref_service_->GetTime(
219         metrics::prefs::kMetricsLastSeenPrefix + source->prefs_key);
220   }
221 
222   switch (params.association) {
223     case ASSOCIATE_CURRENT_RUN:
224     case ASSOCIATE_INTERNAL_PROFILE:
225     case ASSOCIATE_INTERNAL_PROFILE_SAMPLES_COUNTER:
226       sources_to_check_.push_back(std::move(source));
227       break;
228     case ASSOCIATE_PREVIOUS_RUN:
229     case ASSOCIATE_INTERNAL_PROFILE_OR_PREVIOUS_RUN:
230       DCHECK_EQ(SOURCE_HISTOGRAMS_ATOMIC_FILE, source->type);
231       sources_for_previous_run_.push_back(std::move(source));
232       break;
233   }
234 }
235 
236 // static
RegisterSourcePrefs(PrefRegistrySimple * prefs,const base::StringPiece prefs_key)237 void FileMetricsProvider::RegisterSourcePrefs(
238     PrefRegistrySimple* prefs,
239     const base::StringPiece prefs_key) {
240   prefs->RegisterInt64Pref(
241       metrics::prefs::kMetricsLastSeenPrefix + std::string(prefs_key), 0);
242 }
243 
244 //  static
RegisterPrefs(PrefRegistrySimple * prefs)245 void FileMetricsProvider::RegisterPrefs(PrefRegistrySimple* prefs) {
246   prefs->RegisterListPref(metrics::prefs::kMetricsFileMetricsMetadata);
247 }
248 
249 // static
SetTaskRunnerForTesting(const scoped_refptr<base::TaskRunner> & task_runner)250 void FileMetricsProvider::SetTaskRunnerForTesting(
251     const scoped_refptr<base::TaskRunner>& task_runner) {
252   DCHECK(!g_task_runner_for_testing || !task_runner);
253   g_task_runner_for_testing = task_runner.get();
254 }
255 
256 // static
RecordAccessResult(AccessResult result)257 void FileMetricsProvider::RecordAccessResult(AccessResult result) {
258   UMA_HISTOGRAM_ENUMERATION("UMA.FileMetricsProvider.AccessResult", result,
259                             ACCESS_RESULT_MAX);
260 }
261 
262 // static
LocateNextFileInDirectory(SourceInfo * source)263 bool FileMetricsProvider::LocateNextFileInDirectory(SourceInfo* source) {
264   DCHECK_EQ(SOURCE_HISTOGRAMS_ATOMIC_DIR, source->type);
265   DCHECK(!source->directory.empty());
266 
267   // Cumulative directory stats. These will remain zero if the directory isn't
268   // scanned but that's okay since any work they would cause to be done below
269   // would have been done during the first call where the directory was fully
270   // scanned.
271   size_t total_size_kib = 0;  // Using KiB allows 4TiB even on 32-bit builds.
272   size_t file_count = 0;
273 
274   base::Time now_time = base::Time::Now();
275   if (!source->found_files) {
276     source->found_files = std::make_unique<SourceInfo::FoundFiles>();
277     base::FileEnumerator file_iter(source->directory, /*recursive=*/false,
278                                    base::FileEnumerator::FILES);
279     SourceInfo::FoundFile found_file;
280 
281     // Open the directory and find all the files, remembering the last-modified
282     // time of each.
283     for (found_file.path = file_iter.Next(); !found_file.path.empty();
284          found_file.path = file_iter.Next()) {
285       found_file.info = file_iter.GetInfo();
286 
287       // Ignore directories.
288       if (found_file.info.IsDirectory())
289         continue;
290 
291       // Ignore temporary files.
292       base::FilePath::CharType first_character =
293           found_file.path.BaseName().value().front();
294       if (first_character == FILE_PATH_LITERAL('.') ||
295           first_character == FILE_PATH_LITERAL('_')) {
296         continue;
297       }
298 
299       // Ignore non-PMA (Persistent Memory Allocator) files.
300       if (found_file.path.Extension() !=
301           base::PersistentMemoryAllocator::kFileExtension) {
302         continue;
303       }
304 
305       // Process real files.
306       total_size_kib += found_file.info.GetSize() >> 10;
307       base::Time modified = found_file.info.GetLastModifiedTime();
308       if (modified > source->last_seen) {
309         // This file hasn't been read. Remember it (unless from the future).
310         if (modified <= now_time)
311           source->found_files->emplace(modified, std::move(found_file));
312         ++file_count;
313       } else {
314         // This file has been read. Try to delete it. Ignore any errors because
315         // the file may be un-removeable by this process. It could, for example,
316         // have been created by a privileged process like setup.exe. Even if it
317         // is not removed, it will continue to be ignored bacuse of the older
318         // modification time.
319         base::DeleteFile(found_file.path);
320       }
321     }
322   }
323 
324   // Filter files from the front until one is found for processing.
325   bool have_file = false;
326   while (!source->found_files->empty()) {
327     SourceInfo::FoundFile found =
328         std::move(source->found_files->begin()->second);
329     source->found_files->erase(source->found_files->begin());
330 
331     bool too_many =
332         source->max_dir_files > 0 && file_count > source->max_dir_files;
333     bool too_big =
334         source->max_dir_kib > 0 && total_size_kib > source->max_dir_kib;
335     bool too_old =
336         source->max_age != base::TimeDelta() &&
337         now_time - found.info.GetLastModifiedTime() > source->max_age;
338     if (too_many || too_big || too_old) {
339       base::DeleteFile(found.path);
340       --file_count;
341       total_size_kib -= found.info.GetSize() >> 10;
342       RecordAccessResult(too_many ? ACCESS_RESULT_TOO_MANY_FILES
343                                   : too_big ? ACCESS_RESULT_TOO_MANY_BYTES
344                                             : ACCESS_RESULT_TOO_OLD);
345       continue;
346     }
347 
348     AccessResult result = HandleFilterSource(source, found.path);
349     if (result == ACCESS_RESULT_SUCCESS) {
350       source->path = std::move(found.path);
351       have_file = true;
352       break;
353     }
354 
355     // Record the result. Success will be recorded by the caller.
356     if (result != ACCESS_RESULT_THIS_PID)
357       RecordAccessResult(result);
358   }
359 
360   return have_file;
361 }
362 
363 // static
FinishedWithSource(SourceInfo * source,AccessResult result)364 void FileMetricsProvider::FinishedWithSource(SourceInfo* source,
365                                              AccessResult result) {
366   // Different source types require different post-processing.
367   switch (source->type) {
368     case SOURCE_HISTOGRAMS_ATOMIC_FILE:
369     case SOURCE_HISTOGRAMS_ATOMIC_DIR:
370       // Done with this file so delete the allocator and its owned file.
371       source->allocator.reset();
372       // Remove the file if has been recorded. This prevents them from
373       // accumulating or also being recorded by different instances of
374       // the browser.
375       if (result == ACCESS_RESULT_SUCCESS ||
376           result == ACCESS_RESULT_NOT_MODIFIED ||
377           result == ACCESS_RESULT_MEMORY_DELETED ||
378           result == ACCESS_RESULT_TOO_OLD) {
379         DeleteFileWhenPossible(source->path);
380       }
381       break;
382     case SOURCE_HISTOGRAMS_ACTIVE_FILE:
383       // Keep the allocator open so it doesn't have to be re-mapped each
384       // time. This also allows the contents to be merged on-demand.
385       break;
386   }
387 }
388 
389 // static
CheckAndMergeMetricSourcesOnTaskRunner(SourceInfoList * sources)390 std::vector<size_t> FileMetricsProvider::CheckAndMergeMetricSourcesOnTaskRunner(
391     SourceInfoList* sources) {
392   // This method has all state information passed in |sources| and is intended
393   // to run on a worker thread rather than the UI thread.
394   std::vector<size_t> samples_counts;
395 
396   for (std::unique_ptr<SourceInfo>& source : *sources) {
397     AccessResult result;
398     do {
399       result = CheckAndMapMetricSource(source.get());
400 
401       // Some results are not reported in order to keep the dashboard clean.
402       if (result != ACCESS_RESULT_DOESNT_EXIST &&
403           result != ACCESS_RESULT_NOT_MODIFIED &&
404           result != ACCESS_RESULT_THIS_PID) {
405         RecordAccessResult(result);
406       }
407 
408       // If there are no files (or no more files) in this source, stop now.
409       if (result == ACCESS_RESULT_DOESNT_EXIST)
410         break;
411 
412       // Mapping was successful. Merge it.
413       if (result == ACCESS_RESULT_SUCCESS) {
414         // Metrics associated with internal profiles have to be fetched directly
415         // so just keep the mapping for use by the main thread.
416         if (source->association == ASSOCIATE_INTERNAL_PROFILE)
417           break;
418 
419         if (source->association == ASSOCIATE_INTERNAL_PROFILE_SAMPLES_COUNTER) {
420           samples_counts.push_back(CollectFileMetadataFromSource(source.get()));
421         } else {
422           size_t histograms_count =
423               MergeHistogramDeltasFromSource(source.get());
424           if (!source->prefs_key.empty()) {
425             base::UmaHistogramCounts1000(
426                 base::StringPrintf(
427                     "UMA.FileMetricsProvider.%s.MergedHistogramsCount",
428                     source->prefs_key.c_str()),
429                 histograms_count);
430           }
431         }
432         DCHECK(source->read_complete);
433       }
434 
435       // All done with this source.
436       FinishedWithSource(source.get(), result);
437 
438       // If it's a directory, keep trying until a file is successfully opened.
439       // When there are no more files, ACCESS_RESULT_DOESNT_EXIST will be
440       // returned and the loop will exit above.
441     } while (result != ACCESS_RESULT_SUCCESS && !source->directory.empty());
442 
443     // If the set of known files is empty, clear the object so the next run
444     // will do a fresh scan of the directory.
445     if (source->found_files && source->found_files->empty())
446       source->found_files.reset();
447   }
448 
449   return samples_counts;
450 }
451 
452 // This method has all state information passed in |source| and is intended
453 // to run on a worker thread rather than the UI thread.
454 // static
CheckAndMapMetricSource(SourceInfo * source)455 FileMetricsProvider::AccessResult FileMetricsProvider::CheckAndMapMetricSource(
456     SourceInfo* source) {
457   // If source was read, clean up after it.
458   if (source->read_complete)
459     FinishedWithSource(source, ACCESS_RESULT_SUCCESS);
460   source->read_complete = false;
461   DCHECK(!source->allocator);
462 
463   // If the source is a directory, look for files within it.
464   if (!source->directory.empty() && !LocateNextFileInDirectory(source))
465     return ACCESS_RESULT_DOESNT_EXIST;
466 
467   // Do basic validation on the file metadata.
468   base::File::Info info;
469   if (!base::GetFileInfo(source->path, &info))
470     return ACCESS_RESULT_DOESNT_EXIST;
471 
472   if (info.is_directory || info.size == 0)
473     return ACCESS_RESULT_INVALID_FILE;
474 
475   if (source->last_seen >= info.last_modified)
476     return ACCESS_RESULT_NOT_MODIFIED;
477   if (source->max_age != base::TimeDelta() &&
478       base::Time::Now() - info.last_modified > source->max_age) {
479     return ACCESS_RESULT_TOO_OLD;
480   }
481 
482   // Non-directory files still need to be filtered.
483   if (source->directory.empty()) {
484     AccessResult result = HandleFilterSource(source, source->path);
485     if (result != ACCESS_RESULT_SUCCESS)
486       return result;
487   }
488 
489   // A new file of metrics has been found.
490   base::File file(source->path, kSourceOptions[source->type].file_open_flags);
491   if (!file.IsValid())
492     return ACCESS_RESULT_NO_OPEN;
493 
494   // Check that file is writable if that is expected. If a write is attempted
495   // on an unwritable memory-mapped file, a SIGBUS will cause a crash.
496   const bool read_only = kSourceOptions[source->type].is_read_only;
497   if (!read_only) {
498     constexpr int kTestSize = 16;
499     char header[kTestSize];
500     int amount = file.Read(0, header, kTestSize);
501     if (amount != kTestSize)
502       return ACCESS_RESULT_INVALID_CONTENTS;
503 
504     char zeros[kTestSize] = {0};
505     file.Write(0, zeros, kTestSize);
506     file.Flush();
507 
508     // A crash here would be unfortunate as the file would be left invalid
509     // and skipped/deleted by later attempts. This is unlikely, however, and
510     // the benefit of avoiding crashes from mapping as read/write a file that
511     // can't be written more than justifies the risk.
512 
513     char check[kTestSize];
514     amount = file.Read(0, check, kTestSize);
515     if (amount != kTestSize)
516       return ACCESS_RESULT_INVALID_CONTENTS;
517     if (memcmp(check, zeros, kTestSize) != 0)
518       return ACCESS_RESULT_NOT_WRITABLE;
519 
520     file.Write(0, header, kTestSize);
521     file.Flush();
522     amount = file.Read(0, check, kTestSize);
523     if (amount != kTestSize)
524       return ACCESS_RESULT_INVALID_CONTENTS;
525     if (memcmp(check, header, kTestSize) != 0)
526       return ACCESS_RESULT_NOT_WRITABLE;
527   }
528 
529   std::unique_ptr<base::MemoryMappedFile> mapped(new base::MemoryMappedFile());
530   if (!mapped->Initialize(std::move(file),
531                           kSourceOptions[source->type].memory_mapped_access)) {
532     return ACCESS_RESULT_SYSTEM_MAP_FAILURE;
533   }
534 
535   // Ensure any problems below don't occur repeatedly.
536   source->last_seen = info.last_modified;
537 
538   // Test the validity of the file contents.
539   if (!base::FilePersistentMemoryAllocator::IsFileAcceptable(*mapped,
540                                                              read_only)) {
541     return ACCESS_RESULT_INVALID_CONTENTS;
542   }
543 
544   // Map the file and validate it.
545   std::unique_ptr<base::FilePersistentMemoryAllocator> memory_allocator =
546       std::make_unique<base::FilePersistentMemoryAllocator>(
547           std::move(mapped), 0, 0, base::StringPiece(), read_only);
548   if (memory_allocator->GetMemoryState() ==
549       base::PersistentMemoryAllocator::MEMORY_DELETED) {
550     return ACCESS_RESULT_MEMORY_DELETED;
551   }
552   if (memory_allocator->IsCorrupt())
553     return ACCESS_RESULT_DATA_CORRUPTION;
554 
555   // Cache the file data while running in a background thread so that there
556   // shouldn't be any I/O when the data is accessed from the main thread.
557   // Files with an internal profile, those from previous runs that include
558   // a full system profile and are fetched via ProvideIndependentMetrics(),
559   // are loaded on a background task and so there's no need to cache the
560   // data in advance.
561   if (source->association != ASSOCIATE_INTERNAL_PROFILE)
562     memory_allocator->Cache();
563 
564   // Create an allocator for the mapped file. Ownership passes to the allocator.
565   source->allocator = std::make_unique<base::PersistentHistogramAllocator>(
566       std::move(memory_allocator));
567 
568   // Check that an "independent" file has the necessary information present.
569   if (source->association == ASSOCIATE_INTERNAL_PROFILE &&
570       !PersistentSystemProfile::GetSystemProfile(
571           *source->allocator->memory_allocator(), nullptr)) {
572     return ACCESS_RESULT_NO_PROFILE;
573   }
574 
575   return ACCESS_RESULT_SUCCESS;
576 }
577 
578 // static
MergeHistogramDeltasFromSource(SourceInfo * source)579 size_t FileMetricsProvider::MergeHistogramDeltasFromSource(SourceInfo* source) {
580   DCHECK(source->allocator);
581   base::PersistentHistogramAllocator::Iterator histogram_iter(
582       source->allocator.get());
583 
584   const bool read_only = kSourceOptions[source->type].is_read_only;
585   size_t histogram_count = 0;
586   while (true) {
587     std::unique_ptr<base::HistogramBase> histogram = histogram_iter.GetNext();
588     if (!histogram)
589       break;
590 
591     if (read_only) {
592       source->allocator->MergeHistogramFinalDeltaToStatisticsRecorder(
593           histogram.get());
594     } else {
595       source->allocator->MergeHistogramDeltaToStatisticsRecorder(
596           histogram.get());
597     }
598     ++histogram_count;
599   }
600 
601   source->read_complete = true;
602   DVLOG(1) << "Reported " << histogram_count << " histograms from "
603            << source->path.value();
604   return histogram_count;
605 }
606 
607 // static
RecordHistogramSnapshotsFromSource(base::HistogramSnapshotManager * snapshot_manager,SourceInfo * source,base::HistogramBase::Flags required_flags)608 void FileMetricsProvider::RecordHistogramSnapshotsFromSource(
609     base::HistogramSnapshotManager* snapshot_manager,
610     SourceInfo* source,
611     base::HistogramBase::Flags required_flags) {
612   DCHECK_NE(SOURCE_HISTOGRAMS_ACTIVE_FILE, source->type);
613 
614   base::PersistentHistogramAllocator::Iterator histogram_iter(
615       source->allocator.get());
616 
617   int histogram_count = 0;
618   while (true) {
619     std::unique_ptr<base::HistogramBase> histogram = histogram_iter.GetNext();
620     if (!histogram)
621       break;
622     if (histogram->HasFlags(required_flags)) {
623       snapshot_manager->PrepareFinalDelta(histogram.get());
624       ++histogram_count;
625     }
626   }
627 
628   source->read_complete = true;
629   DVLOG(1) << "Reported " << histogram_count << " histograms from "
630            << source->path.value();
631 }
632 
HandleFilterSource(SourceInfo * source,const base::FilePath & path)633 FileMetricsProvider::AccessResult FileMetricsProvider::HandleFilterSource(
634     SourceInfo* source,
635     const base::FilePath& path) {
636   if (!source->filter)
637     return ACCESS_RESULT_SUCCESS;
638 
639   // Alternatively, pass a Params object to the filter like what was originally
640   // used to configure the source.
641   // Params params(path, source->type, source->association, source->prefs_key);
642   FilterAction action = source->filter.Run(path);
643   switch (action) {
644     case FILTER_PROCESS_FILE:
645       // Process the file.
646       return ACCESS_RESULT_SUCCESS;
647 
648     case FILTER_ACTIVE_THIS_PID:
649     // Even the file for the current process has to be touched or its stamp
650     // will be less than "last processed" and thus skipped on future runs,
651     // even those done by new instances of the browser if a pref key is
652     // provided so that the last-uploaded stamp is recorded.
653     case FILTER_TRY_LATER: {
654       // Touch the file with the current timestamp making it (presumably) the
655       // newest file in the directory.
656       base::Time now = base::Time::Now();
657       base::TouchFile(path, /*accessed=*/now, /*modified=*/now);
658       if (action == FILTER_ACTIVE_THIS_PID)
659         return ACCESS_RESULT_THIS_PID;
660       return ACCESS_RESULT_FILTER_TRY_LATER;
661     }
662 
663     case FILTER_SKIP_FILE:
664       switch (source->type) {
665         case SOURCE_HISTOGRAMS_ATOMIC_FILE:
666         case SOURCE_HISTOGRAMS_ATOMIC_DIR:
667           // Only "atomic" files are deleted (best-effort).
668           DeleteFileWhenPossible(path);
669           break;
670         case SOURCE_HISTOGRAMS_ACTIVE_FILE:
671           // File will presumably get modified elsewhere and thus tried again.
672           break;
673       }
674       return ACCESS_RESULT_FILTER_SKIP_FILE;
675   }
676 
677   // Code never gets here but some compilers don't realize that and so complain
678   // that "not all control paths return a value".
679   NOTREACHED();
680   return ACCESS_RESULT_SUCCESS;
681 }
682 
683 /* static */
ProvideIndependentMetricsOnTaskRunner(SourceInfo * source,SystemProfileProto * system_profile_proto,base::HistogramSnapshotManager * snapshot_manager)684 bool FileMetricsProvider::ProvideIndependentMetricsOnTaskRunner(
685     SourceInfo* source,
686     SystemProfileProto* system_profile_proto,
687     base::HistogramSnapshotManager* snapshot_manager) {
688   if (PersistentSystemProfile::GetSystemProfile(
689           *source->allocator->memory_allocator(), system_profile_proto)) {
690     // Pass a custom RangesManager so that we do not register the BucketRanges
691     // with the global statistics recorder. Otherwise, it could add unnecessary
692     // contention, and a low amount of extra memory that will never be released.
693     source->allocator->SetRangesManager(new base::RangesManager());
694     system_profile_proto->mutable_stability()->set_from_previous_run(true);
695     RecordHistogramSnapshotsFromSource(
696         snapshot_manager, source,
697         /*required_flags=*/base::HistogramBase::kUmaTargetedHistogramFlag);
698     return true;
699   }
700 
701   return false;
702 }
703 
AppendToSamplesCountPref(std::vector<size_t> samples_counts)704 void FileMetricsProvider::AppendToSamplesCountPref(
705     std::vector<size_t> samples_counts) {
706   ScopedListPrefUpdate update(pref_service_,
707                               metrics::prefs::kMetricsFileMetricsMetadata);
708   for (size_t samples_count : samples_counts) {
709     update->Append(static_cast<int>(samples_count));
710   }
711 }
712 
713 // static
CollectFileMetadataFromSource(SourceInfo * source)714 size_t FileMetricsProvider::CollectFileMetadataFromSource(SourceInfo* source) {
715   base::HistogramBase::Count samples_count = 0;
716   base::PersistentHistogramAllocator::Iterator it{source->allocator.get()};
717   std::unique_ptr<base::HistogramBase> histogram;
718   while ((histogram = it.GetNext()) != nullptr) {
719     samples_count += histogram->SnapshotFinalDelta()->TotalCount();
720   }
721   source->read_complete = true;
722   return samples_count;
723 }
724 
ScheduleSourcesCheck()725 void FileMetricsProvider::ScheduleSourcesCheck() {
726   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
727 
728   if (sources_to_check_.empty())
729     return;
730 
731   // Create an independent list of sources for checking. This will be Owned()
732   // by the reply call given to the task-runner, to be deleted when that call
733   // has returned. It is also passed Unretained() to the task itself, safe
734   // because that must complete before the reply runs.
735   SourceInfoList* check_list = new SourceInfoList();
736   std::swap(sources_to_check_, *check_list);
737   task_runner_->PostTaskAndReplyWithResult(
738       FROM_HERE,
739       base::BindOnce(
740           &FileMetricsProvider::CheckAndMergeMetricSourcesOnTaskRunner,
741           base::Unretained(check_list)),
742       base::BindOnce(&FileMetricsProvider::RecordSourcesChecked,
743                      weak_factory_.GetWeakPtr(), base::Owned(check_list)));
744 }
745 
RecordSourcesChecked(SourceInfoList * checked,std::vector<size_t> samples_counts)746 void FileMetricsProvider::RecordSourcesChecked(
747     SourceInfoList* checked,
748     std::vector<size_t> samples_counts) {
749   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
750 
751   AppendToSamplesCountPref(std::move(samples_counts));
752 
753   // Sources that still have an allocator at this point are read/write "active"
754   // files that may need their contents merged on-demand. If there is no
755   // allocator (not a read/write file) but a read was done on the task-runner,
756   // try again immediately to see if more is available (in a directory of
757   // files). Otherwise, remember the source for checking again at a later time.
758   bool did_read = false;
759   for (auto iter = checked->begin(); iter != checked->end();) {
760     auto temp = iter++;
761     SourceInfo* source = temp->get();
762     if (source->read_complete) {
763       RecordSourceAsRead(source);
764       did_read = true;
765     }
766     if (source->allocator) {
767       if (source->association == ASSOCIATE_INTERNAL_PROFILE) {
768         sources_with_profile_.splice(sources_with_profile_.end(), *checked,
769                                      temp);
770       } else {
771         sources_mapped_.splice(sources_mapped_.end(), *checked, temp);
772       }
773     } else {
774       sources_to_check_.splice(sources_to_check_.end(), *checked, temp);
775     }
776   }
777 
778   // If a read was done, schedule another one immediately. In the case of a
779   // directory of files, this ensures that all entries get processed. It's
780   // done here instead of as a loop in CheckAndMergeMetricSourcesOnTaskRunner
781   // so that (a) it gives the disk a rest and (b) testing of individual reads
782   // is possible.
783   if (did_read)
784     ScheduleSourcesCheck();
785 }
786 
DeleteFileAsync(const base::FilePath & path)787 void FileMetricsProvider::DeleteFileAsync(const base::FilePath& path) {
788   task_runner_->PostTask(FROM_HERE,
789                          base::BindOnce(DeleteFileWhenPossible, path));
790 }
791 
RecordSourceAsRead(SourceInfo * source)792 void FileMetricsProvider::RecordSourceAsRead(SourceInfo* source) {
793   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
794 
795   // Persistently record the "last seen" timestamp of the source file to
796   // ensure that the file is never read again unless it is modified again.
797   if (pref_service_ && !source->prefs_key.empty()) {
798     pref_service_->SetTime(
799         metrics::prefs::kMetricsLastSeenPrefix + source->prefs_key,
800         source->last_seen);
801   }
802 }
803 
OnDidCreateMetricsLog()804 void FileMetricsProvider::OnDidCreateMetricsLog() {
805   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
806 
807   // Schedule a check to see if there are new metrics to load. If so, they will
808   // be reported during the next collection run after this one. The check is run
809   // off of a MayBlock() TaskRunner so as to not cause delays on the main UI
810   // thread (which is currently where metric collection is done).
811   ScheduleSourcesCheck();
812 
813   // Clear any data for initial metrics since they're always reported
814   // before the first call to this method. It couldn't be released after
815   // being reported in RecordInitialHistogramSnapshots because the data
816   // will continue to be used by the caller after that method returns. Once
817   // here, though, all actions to be done on the data have been completed.
818   for (const std::unique_ptr<SourceInfo>& source : sources_for_previous_run_)
819     DeleteFileAsync(source->path);
820   sources_for_previous_run_.clear();
821 }
822 
HasIndependentMetrics()823 bool FileMetricsProvider::HasIndependentMetrics() {
824   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
825   return !sources_with_profile_.empty() || SimulateIndependentMetrics();
826 }
827 
ProvideIndependentMetrics(base::OnceCallback<void (bool)> done_callback,ChromeUserMetricsExtension * uma_proto,base::HistogramSnapshotManager * snapshot_manager)828 void FileMetricsProvider::ProvideIndependentMetrics(
829     base::OnceCallback<void(bool)> done_callback,
830     ChromeUserMetricsExtension* uma_proto,
831     base::HistogramSnapshotManager* snapshot_manager) {
832   SystemProfileProto* system_profile_proto =
833       uma_proto->mutable_system_profile();
834   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
835 
836   if (sources_with_profile_.empty()) {
837     std::move(done_callback).Run(false);
838     return;
839   }
840 
841   std::unique_ptr<SourceInfo> source =
842       std::move(*sources_with_profile_.begin());
843   sources_with_profile_.pop_front();
844   SourceInfo* source_ptr = source.get();
845   DCHECK(source->allocator);
846 
847   // Do the actual work as a background task.
848   task_runner_->PostTaskAndReplyWithResult(
849       FROM_HERE,
850       base::BindOnce(
851           &FileMetricsProvider::ProvideIndependentMetricsOnTaskRunner,
852           source_ptr, system_profile_proto, snapshot_manager),
853       base::BindOnce(&FileMetricsProvider::ProvideIndependentMetricsCleanup,
854                      weak_factory_.GetWeakPtr(), std::move(done_callback),
855                      std::move(source)));
856 }
857 
ProvideIndependentMetricsCleanup(base::OnceCallback<void (bool)> done_callback,std::unique_ptr<SourceInfo> source,bool success)858 void FileMetricsProvider::ProvideIndependentMetricsCleanup(
859     base::OnceCallback<void(bool)> done_callback,
860     std::unique_ptr<SourceInfo> source,
861     bool success) {
862   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
863 
864   // Regardless of whether this source was successfully recorded, it is
865   // never read again.
866   source->read_complete = true;
867   RecordSourceAsRead(source.get());
868   sources_to_check_.push_back(std::move(source));
869   ScheduleSourcesCheck();
870 
871   // Execute the chained callback.
872   // TODO(crbug/1052796): Remove the UMA timer code, which is currently used to
873   // determine if it is worth to finalize independent logs in the background
874   // by measuring the time it takes to execute the callback
875   // MetricsService::PrepareProviderMetricsLogDone().
876   base::TimeTicks start_time = base::TimeTicks::Now();
877   std::move(done_callback).Run(success);
878   if (success) {
879     // We don't use the SCOPED_UMA_HISTOGRAM_TIMER macro because we want to
880     // measure the time it takes to finalize an independent log, and that only
881     // happens when |success| is true.
882     base::UmaHistogramTimes(
883         "UMA.IndependentLog.FileMetricsProvider.FinalizeTime",
884         base::TimeTicks::Now() - start_time);
885   }
886 }
887 
HasPreviousSessionData()888 bool FileMetricsProvider::HasPreviousSessionData() {
889   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
890 
891   // Check all sources for previous run to see if they need to be read.
892   for (auto iter = sources_for_previous_run_.begin();
893        iter != sources_for_previous_run_.end();) {
894     auto temp = iter++;
895     SourceInfo* source = temp->get();
896 
897     // This would normally be done on a background I/O thread but there
898     // hasn't been a chance to run any at the time this method is called.
899     // Do the check in-line.
900     AccessResult result = CheckAndMapMetricSource(source);
901     UMA_HISTOGRAM_ENUMERATION("UMA.FileMetricsProvider.InitialAccessResult",
902                               result, ACCESS_RESULT_MAX);
903 
904     // If it couldn't be accessed, remove it from the list. There is only ever
905     // one chance to record it so no point keeping it around for later. Also
906     // mark it as having been read since uploading it with a future browser
907     // run would associate it with the then-previous run which would no longer
908     // be the run from which it came.
909     if (result != ACCESS_RESULT_SUCCESS) {
910       DCHECK(!source->allocator);
911       RecordSourceAsRead(source);
912       DeleteFileAsync(source->path);
913       sources_for_previous_run_.erase(temp);
914       continue;
915     }
916 
917     DCHECK(source->allocator);
918 
919     // If the source should be associated with an existing internal profile,
920     // move it to |sources_with_profile_| for later upload.
921     if (source->association == ASSOCIATE_INTERNAL_PROFILE_OR_PREVIOUS_RUN) {
922       if (PersistentSystemProfile::HasSystemProfile(
923               *source->allocator->memory_allocator())) {
924         sources_with_profile_.splice(sources_with_profile_.end(),
925                                      sources_for_previous_run_, temp);
926       }
927     }
928   }
929 
930   return !sources_for_previous_run_.empty();
931 }
932 
RecordInitialHistogramSnapshots(base::HistogramSnapshotManager * snapshot_manager)933 void FileMetricsProvider::RecordInitialHistogramSnapshots(
934     base::HistogramSnapshotManager* snapshot_manager) {
935   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
936 
937   for (const std::unique_ptr<SourceInfo>& source : sources_for_previous_run_) {
938     // The source needs to have an allocator attached to it in order to read
939     // histograms out of it.
940     DCHECK(!source->read_complete);
941     DCHECK(source->allocator);
942 
943     // Dump all stability histograms contained within the source to the
944     // snapshot-manager.
945     RecordHistogramSnapshotsFromSource(
946         snapshot_manager, source.get(),
947         /*required_flags=*/base::HistogramBase::kUmaStabilityHistogramFlag);
948 
949     // Update the last-seen time so it isn't read again unless it changes.
950     RecordSourceAsRead(source.get());
951   }
952 }
953 
MergeHistogramDeltas()954 void FileMetricsProvider::MergeHistogramDeltas() {
955   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
956 
957   for (std::unique_ptr<SourceInfo>& source : sources_mapped_) {
958     MergeHistogramDeltasFromSource(source.get());
959   }
960 }
961 
SimulateIndependentMetrics()962 bool FileMetricsProvider::SimulateIndependentMetrics() {
963   if (!pref_service_->HasPrefPath(
964           metrics::prefs::kMetricsFileMetricsMetadata)) {
965     return false;
966   }
967 
968   ScopedListPrefUpdate list_pref(pref_service_,
969                                  metrics::prefs::kMetricsFileMetricsMetadata);
970   base::Value::List& list_value = list_pref.Get();
971   if (list_value.empty())
972     return false;
973 
974   size_t count = pref_service_->GetInteger(
975       metrics::prefs::kStabilityFileMetricsUnsentSamplesCount);
976   pref_service_->SetInteger(
977       metrics::prefs::kStabilityFileMetricsUnsentSamplesCount,
978       list_value[0].GetInt() + count);
979   pref_service_->SetInteger(
980       metrics::prefs::kStabilityFileMetricsUnsentFilesCount,
981       list_value.size() - 1);
982   list_value.erase(list_value.begin());
983 
984   return true;
985 }
986 
987 }  // namespace metrics
988