• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef COMPONENTS_METRICS_FILE_METRICS_PROVIDER_H_
6 #define COMPONENTS_METRICS_FILE_METRICS_PROVIDER_H_
7 
8 #include <stddef.h>
9 
10 #include <list>
11 #include <memory>
12 #include <vector>
13 
14 #include "base/files/file_path.h"
15 #include "base/functional/callback_forward.h"
16 #include "base/gtest_prod_util.h"
17 #include "base/memory/raw_ptr.h"
18 #include "base/memory/scoped_refptr.h"
19 #include "base/memory/weak_ptr.h"
20 #include "base/metrics/statistics_recorder.h"
21 #include "base/sequence_checker.h"
22 #include "base/time/time.h"
23 #include "components/metrics/metrics_provider.h"
24 
25 class PrefRegistrySimple;
26 class PrefService;
27 
28 namespace base {
29 class TaskRunner;
30 }
31 
32 namespace metrics {
33 
34 // FileMetricsProvider gathers and logs histograms written to files on disk.
35 // Any number of files can be registered and will be polled once per upload
36 // cycle (at startup and periodically thereafter -- about every 30 minutes
37 // for desktop) for data to send.
38 class FileMetricsProvider : public MetricsProvider,
39                             public base::StatisticsRecorder::HistogramProvider {
40  public:
41   struct Params;
42 
43   enum SourceType {
44     // "Atomic" files are a collection of histograms that are written
45     // completely in a single atomic operation (typically a write followed
46     // by an atomic rename) and the file is never updated again except to
47     // be replaced by a completely new set of histograms. This is the only
48     // option that can be used if the file is not writeable by *this*
49     // process. Once the file has been read, an attempt will be made to
50     // delete it thus providing some measure of safety should different
51     // instantiations (such as by different users of a system-level install)
52     // try to read it. In case the delete operation fails, this class
53     // persistently tracks the last-modified time of the file so it will
54     // not be read a second time.
55     SOURCE_HISTOGRAMS_ATOMIC_FILE,
56 
57     // A directory of atomic PMA files. This handles a directory in which
58     // files of metrics are atomically added. Only files ending with ".pma"
59     // will be read. They are read according to their last-modified time and
60     // never read more that once (unless they change). Only one file will
61     // be read per reporting cycle. Filenames that start with a dot (.) or
62     // an underscore (_) are ignored so temporary files (perhaps created by
63     // the ImportantFileWriter) will not get read. Files that have been
64     // read will be attempted to be deleted; should those files not be
65     // deletable by this process, it is the reponsibility of the producer
66     // to keep the directory pruned in some manner. Added files must have a
67     // timestamp later (not the same or earlier) than the newest file that
68     // already exists or it may be assumed to have been already uploaded.
69     SOURCE_HISTOGRAMS_ATOMIC_DIR,
70 
71     // "Active" files may be open by one or more other processes and updated
72     // at any time with new samples or new histograms. Such files may also be
73     // inactive for any period of time only to be opened again and have new
74     // data written to them. The file should probably never be deleted because
75     // there would be no guarantee that the data has been reported.
76     SOURCE_HISTOGRAMS_ACTIVE_FILE,
77   };
78 
79   enum SourceAssociation {
80     // Associates the metrics in the file with the current run of the browser.
81     // The reporting will take place as part of the normal logging of
82     // histograms.
83     ASSOCIATE_CURRENT_RUN,
84 
85     // Associates the metrics in the file with the previous run of the browesr.
86     // The reporting will take place as part of the "stability" histograms.
87     // This is important when metrics are dumped as part of a crash of the
88     // previous run. This can only be used with FILE_HISTOGRAMS_ATOMIC.
89     ASSOCIATE_PREVIOUS_RUN,
90 
91     // Associates the metrics in the file with the a profile embedded in the
92     // same file. The reporting will take place at a convenient time after
93     // startup when the browser is otherwise idle. If there is no embedded
94     // system profile, these metrics will be lost.
95     ASSOCIATE_INTERNAL_PROFILE,
96 
97     // Like above but fall back to ASSOCIATE_PREVIOUS_RUN if there is no
98     // embedded profile. This has a small cost during startup as that is
99     // when previous-run metrics are sent so the file has be checked at
100     // that time even though actual transfer will be delayed if an
101     // embedded profile is found.
102     ASSOCIATE_INTERNAL_PROFILE_OR_PREVIOUS_RUN,
103 
104     // Used to only record the metadata of |ASSOCIATE_INTERNAL_PROFILE| but not
105     // merge the metrics. Instead, write metadata such as the samples count etc,
106     // to prefs then delete file. To precisely simulate the
107     // |ASSOCIATE_INTERNAL_PROFILE| behavior, one file record will be read out
108     // and added to the stability prefs each time the metrics service requests
109     // the |ASSOCIATE_INTERNAL_PROFILE| source metrics. Finally, the results
110     // will be recoreded as stability metrics in the next run.
111     ASSOCIATE_INTERNAL_PROFILE_SAMPLES_COUNTER,
112   };
113 
114   enum FilterAction {
115     // Process this file normally.
116     FILTER_PROCESS_FILE,
117 
118     // This file is the active metrics file for the current process.  Don't
119     // do anything with it. This is effectively "try later" but isn't
120     // added to the results histogram because the file has to be ignored
121     // throughout the life of the browser and that skews the distribution.
122     FILTER_ACTIVE_THIS_PID,
123 
124     // Try again. This could happen within milliseconds or minutes but no other
125     // files from the same source will get processed in between. The process
126     // must have permission to "touch" the file and alter its last-modified
127     // time because files are always processed in order of those stamps.
128     FILTER_TRY_LATER,
129 
130     // Skip this file. This file will not be processed until it has changed
131     // (i.e. had its last-modifided time updated). If it is "atomic", an
132     // attempt will be made to delete it.
133     FILTER_SKIP_FILE,
134   };
135 
136   // A "filter" can be defined to determine what to do on a per-file basis.
137   // This is called only after a file has been found to be the next one to
138   // be processed so it's okay if filter calls are relatively expensive.
139   // Calls are made on a background thread of low-priority and capable of
140   // doing I/O.
141   using FilterCallback =
142       base::RepeatingCallback<FilterAction(const base::FilePath& path)>;
143 
144   // Parameters for RegisterSource, defined as a structure to allow new
145   // ones to be added (with default values) that doesn't require changes
146   // to all call sites.
147   struct Params {
148     Params(const base::FilePath& path,
149            SourceType type,
150            SourceAssociation association,
151            base::StringPiece prefs_key = base::StringPiece());
152 
153     ~Params();
154 
155     // The standard parameters, set during construction.
156     const base::FilePath path;
157     const SourceType type;
158     const SourceAssociation association;
159     const base::StringPiece prefs_key;
160 
161     // Other parameters that can be set after construction.
162     FilterCallback filter;       // Run-time check for what to do with file.
163     base::TimeDelta max_age;     // Maximum age of a file (0=unlimited).
164     size_t max_dir_kib = 0;      // Maximum bytes in a directory (0=inf).
165     size_t max_dir_files = 100;  // Maximum files in a directory (0=inf).
166   };
167 
168   explicit FileMetricsProvider(PrefService* local_state);
169 
170   FileMetricsProvider(const FileMetricsProvider&) = delete;
171   FileMetricsProvider& operator=(const FileMetricsProvider&) = delete;
172 
173   ~FileMetricsProvider() override;
174 
175   // Indicates a file or directory to be monitored and how the file or files
176   // within that directory are used. Because some metadata may need to persist
177   // across process restarts, preferences entries are used based on the
178   // |prefs_key| name. Call RegisterSourcePrefs() with the same name to create
179   // the necessary keys in advance. Set |prefs_key| empty (nullptr will work) if
180   // no persistence is required. ACTIVE files shouldn't have a pref key as
181   // they update internal state about what has been previously sent.
182   void RegisterSource(const Params& params);
183 
184   // Registers all necessary preferences for maintaining persistent state
185   // about a monitored file across process restarts. The |prefs_key| is
186   // typically the filename.
187   static void RegisterSourcePrefs(PrefRegistrySimple* prefs,
188                                   const base::StringPiece prefs_key);
189 
190   static void RegisterPrefs(PrefRegistrySimple* prefs);
191 
192   // Sets the task runner to use for testing.
193   static void SetTaskRunnerForTesting(
194       const scoped_refptr<base::TaskRunner>& task_runner);
195 
196  private:
197   friend class FileMetricsProviderTest;
198 
199   // The different results that can occur accessing a file.
200   enum AccessResult {
201     // File was successfully mapped.
202     ACCESS_RESULT_SUCCESS,
203 
204     // File does not exist.
205     ACCESS_RESULT_DOESNT_EXIST,
206 
207     // File exists but not modified since last read.
208     ACCESS_RESULT_NOT_MODIFIED,
209 
210     // File is not valid: is a directory or zero-size.
211     ACCESS_RESULT_INVALID_FILE,
212 
213     // System could not map file into memory.
214     ACCESS_RESULT_SYSTEM_MAP_FAILURE,
215 
216     // File had invalid contents.
217     ACCESS_RESULT_INVALID_CONTENTS,
218 
219     // File could not be opened.
220     ACCESS_RESULT_NO_OPEN,
221 
222     // File contents were internally deleted.
223     ACCESS_RESULT_MEMORY_DELETED,
224 
225     // File is scheduled to be tried again later.
226     ACCESS_RESULT_FILTER_TRY_LATER,
227 
228     // File was skipped according to filtering rules.
229     ACCESS_RESULT_FILTER_SKIP_FILE,
230 
231     // File was skipped because it exceeds the maximum age.
232     ACCESS_RESULT_TOO_OLD,
233 
234     // File was skipped because too many files in directory.
235     ACCESS_RESULT_TOO_MANY_FILES,
236 
237     // File was skipped because too many bytes in directory.
238     ACCESS_RESULT_TOO_MANY_BYTES,
239 
240     // The file was skipped because it's being written by this process.
241     ACCESS_RESULT_THIS_PID,
242 
243     // The file had no embedded system profile.
244     ACCESS_RESULT_NO_PROFILE,
245 
246     // The file had internal data corruption.
247     ACCESS_RESULT_DATA_CORRUPTION,
248 
249     // The file is not writable when it should be.
250     ACCESS_RESULT_NOT_WRITABLE,
251 
252     ACCESS_RESULT_MAX
253   };
254 
255   // Information about sources being monitored; defined and used exclusively
256   // inside the .cc file.
257   struct SourceInfo;
258   using SourceInfoList = std::list<std::unique_ptr<SourceInfo>>;
259 
260   // Records an access result in a histogram.
261   static void RecordAccessResult(AccessResult result);
262 
263   // Looks for the next file to read within a directory. Returns true if a
264   // file was found. This is part of CheckAndMapNewMetricSourcesOnTaskRunner
265   // and so runs on an thread capable of I/O. The |source| structure will
266   // be internally updated to indicate the next file to be read.
267   static bool LocateNextFileInDirectory(SourceInfo* source);
268 
269   // Handles the completion of a source.
270   static void FinishedWithSource(SourceInfo* source, AccessResult result);
271 
272   // Checks a list of sources (on a task-runner allowed to do I/O) and merge
273   // any data found within them.
274   // Returns a list of histogram sample counts for sources of type
275   // ASSOCIATE_INTERNAL_PROFILE_SAMPLES_COUNTER that were processed.
276   static std::vector<size_t> CheckAndMergeMetricSourcesOnTaskRunner(
277       SourceInfoList* sources);
278 
279   // Checks a single source and maps it into memory.
280   static AccessResult CheckAndMapMetricSource(SourceInfo* source);
281 
282   // Merges all of the histograms from a |source| to the StatisticsRecorder.
283   // Returns the number of histograms merged.
284   static size_t MergeHistogramDeltasFromSource(SourceInfo* source);
285 
286   // Records all histograms from a given source via a snapshot-manager. Only the
287   // histograms that have |required_flags| will be recorded.
288   static void RecordHistogramSnapshotsFromSource(
289       base::HistogramSnapshotManager* snapshot_manager,
290       SourceInfo* source,
291       base::HistogramBase::Flags required_flags);
292 
293   // Calls source filter (if any) and returns the desired action.
294   static AccessResult HandleFilterSource(SourceInfo* source,
295                                          const base::FilePath& path);
296 
297   // The part of ProvideIndependentMetrics that runs as a background task.
298   static bool ProvideIndependentMetricsOnTaskRunner(
299       SourceInfo* source,
300       SystemProfileProto* system_profile_proto,
301       base::HistogramSnapshotManager* snapshot_manager);
302 
303   // Collects the metadata of the |source|.
304   // Returns the number of histogram samples from that source.
305   static size_t CollectFileMetadataFromSource(SourceInfo* source);
306 
307   // Appends the samples count to pref on UI thread.
308   void AppendToSamplesCountPref(std::vector<size_t> samples_count);
309 
310   // Creates a task to check all monitored sources for updates.
311   void ScheduleSourcesCheck();
312 
313   // Takes a list of sources checked by an external task and determines what
314   // to do with each.
315   void RecordSourcesChecked(SourceInfoList* checked,
316                             std::vector<size_t> samples_counts);
317 
318   // Schedules the deletion of a file in the background using the task-runner.
319   void DeleteFileAsync(const base::FilePath& path);
320 
321   // Updates the persistent state information to show a source as being read.
322   void RecordSourceAsRead(SourceInfo* source);
323 
324   // metrics::MetricsProvider:
325   void OnDidCreateMetricsLog() override;
326   bool HasIndependentMetrics() override;
327   void ProvideIndependentMetrics(
328       base::OnceCallback<void(bool)> done_callback,
329       ChromeUserMetricsExtension* uma_proto,
330       base::HistogramSnapshotManager* snapshot_manager) override;
331   bool HasPreviousSessionData() override;
332   void RecordInitialHistogramSnapshots(
333       base::HistogramSnapshotManager* snapshot_manager) override;
334 
335   // base::StatisticsRecorder::HistogramProvider:
336   void MergeHistogramDeltas() override;
337 
338   // The part of ProvideIndependentMetrics that runs after background task.
339   void ProvideIndependentMetricsCleanup(
340       base::OnceCallback<void(bool)> done_callback,
341       std::unique_ptr<SourceInfo> source,
342       bool success);
343 
344   // Simulates the independent metrics to read the first item from
345   // kMetricsBrowserMetricsMetadata and updates the stability prefs accordingly,
346   // return true if the pref isn't empty.
347   bool SimulateIndependentMetrics();
348 
349   // A task-runner capable of performing I/O.
350   scoped_refptr<base::TaskRunner> task_runner_;
351 
352   // A list of sources not currently active that need to be checked for changes.
353   SourceInfoList sources_to_check_;
354 
355   // A list of currently active sources to be merged when required.
356   SourceInfoList sources_mapped_;
357 
358   // A list of currently active sources to be merged when required.
359   SourceInfoList sources_with_profile_;
360 
361   // A list of sources for a previous run. These are held separately because
362   // they are not subject to the periodic background checking that handles
363   // metrics for the current run.
364   SourceInfoList sources_for_previous_run_;
365 
366   // The preferences-service used to store persistent state about sources.
367   raw_ptr<PrefService> pref_service_;
368 
369   SEQUENCE_CHECKER(sequence_checker_);
370   base::WeakPtrFactory<FileMetricsProvider> weak_factory_{this};
371 };
372 
373 }  // namespace metrics
374 
375 #endif  // COMPONENTS_METRICS_FILE_METRICS_PROVIDER_H_
376