• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef COMPONENTS_METRICS_FILE_METRICS_PROVIDER_H_
6 #define COMPONENTS_METRICS_FILE_METRICS_PROVIDER_H_
7 
8 #include <stddef.h>
9 
10 #include <list>
11 #include <memory>
12 #include <string_view>
13 #include <vector>
14 
15 #include "base/files/file_path.h"
16 #include "base/functional/callback_forward.h"
17 #include "base/gtest_prod_util.h"
18 #include "base/memory/raw_ptr.h"
19 #include "base/memory/scoped_refptr.h"
20 #include "base/memory/weak_ptr.h"
21 #include "base/metrics/statistics_recorder.h"
22 #include "base/sequence_checker.h"
23 #include "base/time/time.h"
24 #include "components/metrics/metrics_provider.h"
25 
26 class PrefRegistrySimple;
27 class PrefService;
28 
29 namespace metrics {
30 
31 // FileMetricsProvider gathers and logs histograms written to files on disk.
32 // Any number of files can be registered and will be polled once per upload
33 // cycle (at startup and periodically thereafter -- about every 30 minutes
34 // for desktop) for data to send.
35 class FileMetricsProvider : public MetricsProvider,
36                             public base::StatisticsRecorder::HistogramProvider {
37  public:
38   struct Params;
39 
40   enum SourceType {
41     // "Atomic" files are a collection of histograms that are written
42     // completely in a single atomic operation (typically a write followed
43     // by an atomic rename) and the file is never updated again except to
44     // be replaced by a completely new set of histograms. This is the only
45     // option that can be used if the file is not writeable by *this*
46     // process. Once the file has been read, an attempt will be made to
47     // delete it thus providing some measure of safety should different
48     // instantiations (such as by different users of a system-level install)
49     // try to read it. In case the delete operation fails, this class
50     // persistently tracks the last-modified time of the file so it will
51     // not be read a second time.
52     SOURCE_HISTOGRAMS_ATOMIC_FILE,
53 
54     // A directory of atomic PMA files. This handles a directory in which
55     // files of metrics are atomically added. Only files ending with ".pma"
56     // will be read. They are read according to their last-modified time and
57     // never read more that once (unless they change). Only one file will
58     // be read per reporting cycle. Filenames that start with a dot (.) or
59     // an underscore (_) are ignored so temporary files (perhaps created by
60     // the ImportantFileWriter) will not get read. Files that have been
61     // read will be attempted to be deleted; should those files not be
62     // deletable by this process, it is the reponsibility of the producer
63     // to keep the directory pruned in some manner. Added files must have a
64     // timestamp later (not the same or earlier) than the newest file that
65     // already exists or it may be assumed to have been already uploaded.
66     SOURCE_HISTOGRAMS_ATOMIC_DIR,
67 
68     // "Active" files may be open by one or more other processes and updated
69     // at any time with new samples or new histograms. Such files may also be
70     // inactive for any period of time only to be opened again and have new
71     // data written to them. The file should probably never be deleted because
72     // there would be no guarantee that the data has been reported.
73     SOURCE_HISTOGRAMS_ACTIVE_FILE,
74   };
75 
76   enum SourceAssociation {
77     // Associates the metrics in the file with the current run of the browser.
78     // The reporting will take place as part of the normal logging of
79     // histograms.
80     ASSOCIATE_CURRENT_RUN,
81 
82     // Associates the metrics in the file with the previous run of the browesr.
83     // The reporting will take place as part of the "stability" histograms.
84     // This is important when metrics are dumped as part of a crash of the
85     // previous run. This can only be used with FILE_HISTOGRAMS_ATOMIC.
86     ASSOCIATE_PREVIOUS_RUN,
87 
88     // Associates the metrics in the file with the a profile embedded in the
89     // same file. The reporting will take place at a convenient time after
90     // startup when the browser is otherwise idle. If there is no embedded
91     // system profile, these metrics will be lost.
92     ASSOCIATE_INTERNAL_PROFILE,
93 
94     // Like above but fall back to ASSOCIATE_PREVIOUS_RUN if there is no
95     // embedded profile. This has a small cost during startup as that is
96     // when previous-run metrics are sent so the file has be checked at
97     // that time even though actual transfer will be delayed if an
98     // embedded profile is found.
99     ASSOCIATE_INTERNAL_PROFILE_OR_PREVIOUS_RUN,
100 
101     // Used to only record the metadata of |ASSOCIATE_INTERNAL_PROFILE| but not
102     // merge the metrics. Instead, write metadata such as the samples count etc,
103     // to prefs then delete file. To precisely simulate the
104     // |ASSOCIATE_INTERNAL_PROFILE| behavior, one file record will be read out
105     // and added to the stability prefs each time the metrics service requests
106     // the |ASSOCIATE_INTERNAL_PROFILE| source metrics. Finally, the results
107     // will be recoreded as stability metrics in the next run.
108     ASSOCIATE_INTERNAL_PROFILE_SAMPLES_COUNTER,
109   };
110 
111   enum FilterAction {
112     // Process this file normally.
113     FILTER_PROCESS_FILE,
114 
115     // This file is the active metrics file for the current process.  Don't
116     // do anything with it. This is effectively "try later" but isn't
117     // added to the results histogram because the file has to be ignored
118     // throughout the life of the browser and that skews the distribution.
119     FILTER_ACTIVE_THIS_PID,
120 
121     // Try again. This could happen within milliseconds or minutes but no other
122     // files from the same source will get processed in between. The process
123     // must have permission to "touch" the file and alter its last-modified
124     // time because files are always processed in order of those stamps.
125     FILTER_TRY_LATER,
126 
127     // Skip this file. This file will not be processed until it has changed
128     // (i.e. had its last-modifided time updated). If it is "atomic", an
129     // attempt will be made to delete it.
130     FILTER_SKIP_FILE,
131   };
132 
133   // A "filter" can be defined to determine what to do on a per-file basis.
134   // This is called only after a file has been found to be the next one to
135   // be processed so it's okay if filter calls are relatively expensive.
136   // Calls are made on a background thread of low-priority and capable of
137   // doing I/O.
138   using FilterCallback =
139       base::RepeatingCallback<FilterAction(const base::FilePath& path)>;
140 
141   // Parameters for RegisterSource, defined as a structure to allow new
142   // ones to be added (with default values) that doesn't require changes
143   // to all call sites.
144   struct Params {
145     Params(const base::FilePath& path,
146            SourceType type,
147            SourceAssociation association,
148            std::string_view prefs_key = std::string_view());
149 
150     ~Params();
151 
152     // The standard parameters, set during construction.
153     const base::FilePath path;
154     const SourceType type;
155     const SourceAssociation association;
156     const std::string_view prefs_key;
157 
158     // Other parameters that can be set after construction.
159     FilterCallback filter;       // Run-time check for what to do with file.
160     base::TimeDelta max_age;     // Maximum age of a file (0=unlimited).
161     size_t max_dir_kib = 0;      // Maximum bytes in a directory (0=inf).
162     size_t max_dir_files = 100;  // Maximum files in a directory (0=inf).
163   };
164 
165   explicit FileMetricsProvider(PrefService* local_state);
166 
167   FileMetricsProvider(const FileMetricsProvider&) = delete;
168   FileMetricsProvider& operator=(const FileMetricsProvider&) = delete;
169 
170   ~FileMetricsProvider() override;
171 
172   // Indicates a file or directory to be monitored and how the file or files
173   // within that directory are used. Because some metadata may need to persist
174   // across process restarts, preferences entries are used based on the
175   // |prefs_key| name. Call RegisterSourcePrefs() with the same name to create
176   // the necessary keys in advance. Set |prefs_key| empty (nullptr will work) if
177   // no persistence is required. ACTIVE files shouldn't have a pref key as
178   // they update internal state about what has been previously sent.
179   // If `metrics_reporting_enabled` is false, the associated file or directory
180   // is deleted (except for ACTIVE files).
181   void RegisterSource(const Params& params, bool metrics_reporting_enabled);
182 
183   // Registers all necessary preferences for maintaining persistent state
184   // about a monitored file across process restarts. The |prefs_key| is
185   // typically the filename.
186   static void RegisterSourcePrefs(PrefRegistrySimple* prefs,
187                                   std::string_view prefs_key);
188 
189   static void RegisterPrefs(PrefRegistrySimple* prefs);
190 
191  private:
192   friend class FileMetricsProviderTest;
193   friend class TestFileMetricsProvider;
194 
195   // The different results that can occur accessing a file.
196   enum AccessResult {
197     // File was successfully mapped.
198     ACCESS_RESULT_SUCCESS,
199 
200     // File does not exist.
201     ACCESS_RESULT_DOESNT_EXIST,
202 
203     // File exists but not modified since last read.
204     ACCESS_RESULT_NOT_MODIFIED,
205 
206     // File is not valid: is a directory or zero-size.
207     ACCESS_RESULT_INVALID_FILE,
208 
209     // System could not map file into memory.
210     ACCESS_RESULT_SYSTEM_MAP_FAILURE,
211 
212     // File had invalid contents.
213     ACCESS_RESULT_INVALID_CONTENTS,
214 
215     // File could not be opened.
216     ACCESS_RESULT_NO_OPEN,
217 
218     // File contents were internally deleted.
219     ACCESS_RESULT_MEMORY_DELETED,
220 
221     // File is scheduled to be tried again later.
222     ACCESS_RESULT_FILTER_TRY_LATER,
223 
224     // File was skipped according to filtering rules.
225     ACCESS_RESULT_FILTER_SKIP_FILE,
226 
227     // File was skipped because it exceeds the maximum age.
228     ACCESS_RESULT_TOO_OLD,
229 
230     // File was skipped because too many files in directory.
231     ACCESS_RESULT_TOO_MANY_FILES,
232 
233     // File was skipped because too many bytes in directory.
234     ACCESS_RESULT_TOO_MANY_BYTES,
235 
236     // The file was skipped because it's being written by this process.
237     ACCESS_RESULT_THIS_PID,
238 
239     // The file had no embedded system profile.
240     ACCESS_RESULT_NO_PROFILE,
241 
242     // The file had internal data corruption.
243     ACCESS_RESULT_DATA_CORRUPTION,
244 
245     // The file is not writable when it should be.
246     ACCESS_RESULT_NOT_WRITABLE,
247 
248     ACCESS_RESULT_MAX
249   };
250 
251   // Information about sources being monitored; defined and used exclusively
252   // inside the .cc file.
253   struct SourceInfo;
254   using SourceInfoList = std::list<std::unique_ptr<SourceInfo>>;
255 
256   // Records an access result in a histogram.
257   static void RecordAccessResult(AccessResult result);
258 
259   // Looks for the next file to read within a directory. Returns true if a
260   // file was found. This is part of CheckAndMapNewMetricSourcesOnTaskRunner
261   // and so runs on an thread capable of I/O. The |source| structure will
262   // be internally updated to indicate the next file to be read.
263   static bool LocateNextFileInDirectory(SourceInfo* source);
264 
265   // Handles the completion of a source.
266   static void FinishedWithSource(SourceInfo* source, AccessResult result);
267 
268   // Checks a list of sources (on a task-runner allowed to do I/O) and merge
269   // any data found within them.
270   // Returns a list of histogram sample counts for sources of type
271   // ASSOCIATE_INTERNAL_PROFILE_SAMPLES_COUNTER that were processed.
272   static std::vector<size_t> CheckAndMergeMetricSourcesOnTaskRunner(
273       SourceInfoList* sources);
274 
275   // Checks a single source and maps it into memory.
276   static AccessResult CheckAndMapMetricSource(SourceInfo* source);
277 
278   // Merges all of the histograms from a |source| to the StatisticsRecorder.
279   // Returns the number of histograms merged.
280   static size_t MergeHistogramDeltasFromSource(SourceInfo* source);
281 
282   // Records all histograms from a given source via a snapshot-manager. Only the
283   // histograms that have |required_flags| will be recorded.
284   static void RecordHistogramSnapshotsFromSource(
285       base::HistogramSnapshotManager* snapshot_manager,
286       SourceInfo* source,
287       base::HistogramBase::Flags required_flags);
288 
289   // Calls source filter (if any) and returns the desired action.
290   static AccessResult HandleFilterSource(SourceInfo* source,
291                                          const base::FilePath& path);
292 
293   // The part of ProvideIndependentMetrics that runs as a background task.
294   static bool ProvideIndependentMetricsOnTaskRunner(
295       SourceInfo* source,
296       ChromeUserMetricsExtension* uma_proto,
297       base::HistogramSnapshotManager* snapshot_manager,
298       base::OnceClosure serialize_log_callback);
299 
300   // Collects the metadata of the |source|.
301   // Returns the number of histogram samples from that source.
302   static size_t CollectFileMetadataFromSource(SourceInfo* source);
303 
304   // Appends the samples count to pref on UI thread.
305   void AppendToSamplesCountPref(std::vector<size_t> samples_count);
306 
307   // Creates a task to check all monitored sources for updates.
308   void ScheduleSourcesCheck();
309 
310   // Takes a list of sources checked by an external task and determines what
311   // to do with each. Virtual for testing.
312   virtual void RecordSourcesChecked(SourceInfoList* checked,
313                                     std::vector<size_t> samples_counts);
314 
315   // Schedules the deletion of a file in the background using the task-runner.
316   void DeleteFileAsync(const base::FilePath& path);
317 
318   // Updates the persistent state information to show a source as being read.
319   void RecordSourceAsRead(SourceInfo* source);
320 
321   // metrics::MetricsProvider:
322   void OnDidCreateMetricsLog() override;
323   bool HasIndependentMetrics() override;
324   void ProvideIndependentMetrics(
325       base::OnceClosure serialize_log_callback,
326       base::OnceCallback<void(bool)> done_callback,
327       ChromeUserMetricsExtension* uma_proto,
328       base::HistogramSnapshotManager* snapshot_manager) override;
329   bool HasPreviousSessionData() override;
330   void RecordInitialHistogramSnapshots(
331       base::HistogramSnapshotManager* snapshot_manager) override;
332 
333   // base::StatisticsRecorder::HistogramProvider:
334   void MergeHistogramDeltas(bool async,
335                             base::OnceClosure done_callback) override;
336 
337   // The part of ProvideIndependentMetrics that runs after background task.
338   void ProvideIndependentMetricsCleanup(
339       base::OnceCallback<void(bool)> done_callback,
340       std::unique_ptr<SourceInfo> source,
341       bool success);
342 
343   // Simulates the independent metrics to read the first item from
344   // kMetricsBrowserMetricsMetadata and updates the stability prefs accordingly,
345   // return true if the pref isn't empty.
346   bool SimulateIndependentMetrics();
347 
348   // A list of sources not currently active that need to be checked for changes.
349   SourceInfoList sources_to_check_;
350 
351   // A list of currently active sources to be merged when required.
352   SourceInfoList sources_mapped_;
353 
354   // A list of currently active sources to be merged when required.
355   SourceInfoList sources_with_profile_;
356 
357   // A list of sources for a previous run. These are held separately because
358   // they are not subject to the periodic background checking that handles
359   // metrics for the current run.
360   SourceInfoList sources_for_previous_run_;
361 
362   // The preferences-service used to store persistent state about sources.
363   raw_ptr<PrefService> pref_service_;
364 
365   SEQUENCE_CHECKER(sequence_checker_);
366   base::WeakPtrFactory<FileMetricsProvider> weak_factory_{this};
367 };
368 
369 }  // namespace metrics
370 
371 #endif  // COMPONENTS_METRICS_FILE_METRICS_PROVIDER_H_
372