1 // Copyright 2016 The Chromium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef COMPONENTS_METRICS_FILE_METRICS_PROVIDER_H_ 6 #define COMPONENTS_METRICS_FILE_METRICS_PROVIDER_H_ 7 8 #include <stddef.h> 9 10 #include <list> 11 #include <memory> 12 #include <string_view> 13 #include <vector> 14 15 #include "base/files/file_path.h" 16 #include "base/functional/callback_forward.h" 17 #include "base/gtest_prod_util.h" 18 #include "base/memory/raw_ptr.h" 19 #include "base/memory/scoped_refptr.h" 20 #include "base/memory/weak_ptr.h" 21 #include "base/metrics/statistics_recorder.h" 22 #include "base/sequence_checker.h" 23 #include "base/time/time.h" 24 #include "components/metrics/metrics_provider.h" 25 26 class PrefRegistrySimple; 27 class PrefService; 28 29 namespace metrics { 30 31 // FileMetricsProvider gathers and logs histograms written to files on disk. 32 // Any number of files can be registered and will be polled once per upload 33 // cycle (at startup and periodically thereafter -- about every 30 minutes 34 // for desktop) for data to send. 35 class FileMetricsProvider : public MetricsProvider, 36 public base::StatisticsRecorder::HistogramProvider { 37 public: 38 struct Params; 39 40 enum SourceType { 41 // "Atomic" files are a collection of histograms that are written 42 // completely in a single atomic operation (typically a write followed 43 // by an atomic rename) and the file is never updated again except to 44 // be replaced by a completely new set of histograms. This is the only 45 // option that can be used if the file is not writeable by *this* 46 // process. Once the file has been read, an attempt will be made to 47 // delete it thus providing some measure of safety should different 48 // instantiations (such as by different users of a system-level install) 49 // try to read it. In case the delete operation fails, this class 50 // persistently tracks the last-modified time of the file so it will 51 // not be read a second time. 52 SOURCE_HISTOGRAMS_ATOMIC_FILE, 53 54 // A directory of atomic PMA files. This handles a directory in which 55 // files of metrics are atomically added. Only files ending with ".pma" 56 // will be read. They are read according to their last-modified time and 57 // never read more that once (unless they change). Only one file will 58 // be read per reporting cycle. Filenames that start with a dot (.) or 59 // an underscore (_) are ignored so temporary files (perhaps created by 60 // the ImportantFileWriter) will not get read. Files that have been 61 // read will be attempted to be deleted; should those files not be 62 // deletable by this process, it is the reponsibility of the producer 63 // to keep the directory pruned in some manner. Added files must have a 64 // timestamp later (not the same or earlier) than the newest file that 65 // already exists or it may be assumed to have been already uploaded. 66 SOURCE_HISTOGRAMS_ATOMIC_DIR, 67 68 // "Active" files may be open by one or more other processes and updated 69 // at any time with new samples or new histograms. Such files may also be 70 // inactive for any period of time only to be opened again and have new 71 // data written to them. The file should probably never be deleted because 72 // there would be no guarantee that the data has been reported. 73 SOURCE_HISTOGRAMS_ACTIVE_FILE, 74 }; 75 76 enum SourceAssociation { 77 // Associates the metrics in the file with the current run of the browser. 78 // The reporting will take place as part of the normal logging of 79 // histograms. 80 ASSOCIATE_CURRENT_RUN, 81 82 // Associates the metrics in the file with the previous run of the browesr. 83 // The reporting will take place as part of the "stability" histograms. 84 // This is important when metrics are dumped as part of a crash of the 85 // previous run. This can only be used with FILE_HISTOGRAMS_ATOMIC. 86 ASSOCIATE_PREVIOUS_RUN, 87 88 // Associates the metrics in the file with the a profile embedded in the 89 // same file. The reporting will take place at a convenient time after 90 // startup when the browser is otherwise idle. If there is no embedded 91 // system profile, these metrics will be lost. 92 ASSOCIATE_INTERNAL_PROFILE, 93 94 // Like above but fall back to ASSOCIATE_PREVIOUS_RUN if there is no 95 // embedded profile. This has a small cost during startup as that is 96 // when previous-run metrics are sent so the file has be checked at 97 // that time even though actual transfer will be delayed if an 98 // embedded profile is found. 99 ASSOCIATE_INTERNAL_PROFILE_OR_PREVIOUS_RUN, 100 101 // Used to only record the metadata of |ASSOCIATE_INTERNAL_PROFILE| but not 102 // merge the metrics. Instead, write metadata such as the samples count etc, 103 // to prefs then delete file. To precisely simulate the 104 // |ASSOCIATE_INTERNAL_PROFILE| behavior, one file record will be read out 105 // and added to the stability prefs each time the metrics service requests 106 // the |ASSOCIATE_INTERNAL_PROFILE| source metrics. Finally, the results 107 // will be recoreded as stability metrics in the next run. 108 ASSOCIATE_INTERNAL_PROFILE_SAMPLES_COUNTER, 109 }; 110 111 enum FilterAction { 112 // Process this file normally. 113 FILTER_PROCESS_FILE, 114 115 // This file is the active metrics file for the current process. Don't 116 // do anything with it. This is effectively "try later" but isn't 117 // added to the results histogram because the file has to be ignored 118 // throughout the life of the browser and that skews the distribution. 119 FILTER_ACTIVE_THIS_PID, 120 121 // Try again. This could happen within milliseconds or minutes but no other 122 // files from the same source will get processed in between. The process 123 // must have permission to "touch" the file and alter its last-modified 124 // time because files are always processed in order of those stamps. 125 FILTER_TRY_LATER, 126 127 // Skip this file. This file will not be processed until it has changed 128 // (i.e. had its last-modifided time updated). If it is "atomic", an 129 // attempt will be made to delete it. 130 FILTER_SKIP_FILE, 131 }; 132 133 // A "filter" can be defined to determine what to do on a per-file basis. 134 // This is called only after a file has been found to be the next one to 135 // be processed so it's okay if filter calls are relatively expensive. 136 // Calls are made on a background thread of low-priority and capable of 137 // doing I/O. 138 using FilterCallback = 139 base::RepeatingCallback<FilterAction(const base::FilePath& path)>; 140 141 // Parameters for RegisterSource, defined as a structure to allow new 142 // ones to be added (with default values) that doesn't require changes 143 // to all call sites. 144 struct Params { 145 Params(const base::FilePath& path, 146 SourceType type, 147 SourceAssociation association, 148 std::string_view prefs_key = std::string_view()); 149 150 ~Params(); 151 152 // The standard parameters, set during construction. 153 const base::FilePath path; 154 const SourceType type; 155 const SourceAssociation association; 156 const std::string_view prefs_key; 157 158 // Other parameters that can be set after construction. 159 FilterCallback filter; // Run-time check for what to do with file. 160 base::TimeDelta max_age; // Maximum age of a file (0=unlimited). 161 size_t max_dir_kib = 0; // Maximum bytes in a directory (0=inf). 162 size_t max_dir_files = 100; // Maximum files in a directory (0=inf). 163 }; 164 165 explicit FileMetricsProvider(PrefService* local_state); 166 167 FileMetricsProvider(const FileMetricsProvider&) = delete; 168 FileMetricsProvider& operator=(const FileMetricsProvider&) = delete; 169 170 ~FileMetricsProvider() override; 171 172 // Indicates a file or directory to be monitored and how the file or files 173 // within that directory are used. Because some metadata may need to persist 174 // across process restarts, preferences entries are used based on the 175 // |prefs_key| name. Call RegisterSourcePrefs() with the same name to create 176 // the necessary keys in advance. Set |prefs_key| empty (nullptr will work) if 177 // no persistence is required. ACTIVE files shouldn't have a pref key as 178 // they update internal state about what has been previously sent. 179 // If `metrics_reporting_enabled` is false, the associated file or directory 180 // is deleted (except for ACTIVE files). 181 void RegisterSource(const Params& params, bool metrics_reporting_enabled); 182 183 // Registers all necessary preferences for maintaining persistent state 184 // about a monitored file across process restarts. The |prefs_key| is 185 // typically the filename. 186 static void RegisterSourcePrefs(PrefRegistrySimple* prefs, 187 std::string_view prefs_key); 188 189 static void RegisterPrefs(PrefRegistrySimple* prefs); 190 191 private: 192 friend class FileMetricsProviderTest; 193 friend class TestFileMetricsProvider; 194 195 // The different results that can occur accessing a file. 196 enum AccessResult { 197 // File was successfully mapped. 198 ACCESS_RESULT_SUCCESS, 199 200 // File does not exist. 201 ACCESS_RESULT_DOESNT_EXIST, 202 203 // File exists but not modified since last read. 204 ACCESS_RESULT_NOT_MODIFIED, 205 206 // File is not valid: is a directory or zero-size. 207 ACCESS_RESULT_INVALID_FILE, 208 209 // System could not map file into memory. 210 ACCESS_RESULT_SYSTEM_MAP_FAILURE, 211 212 // File had invalid contents. 213 ACCESS_RESULT_INVALID_CONTENTS, 214 215 // File could not be opened. 216 ACCESS_RESULT_NO_OPEN, 217 218 // File contents were internally deleted. 219 ACCESS_RESULT_MEMORY_DELETED, 220 221 // File is scheduled to be tried again later. 222 ACCESS_RESULT_FILTER_TRY_LATER, 223 224 // File was skipped according to filtering rules. 225 ACCESS_RESULT_FILTER_SKIP_FILE, 226 227 // File was skipped because it exceeds the maximum age. 228 ACCESS_RESULT_TOO_OLD, 229 230 // File was skipped because too many files in directory. 231 ACCESS_RESULT_TOO_MANY_FILES, 232 233 // File was skipped because too many bytes in directory. 234 ACCESS_RESULT_TOO_MANY_BYTES, 235 236 // The file was skipped because it's being written by this process. 237 ACCESS_RESULT_THIS_PID, 238 239 // The file had no embedded system profile. 240 ACCESS_RESULT_NO_PROFILE, 241 242 // The file had internal data corruption. 243 ACCESS_RESULT_DATA_CORRUPTION, 244 245 // The file is not writable when it should be. 246 ACCESS_RESULT_NOT_WRITABLE, 247 248 ACCESS_RESULT_MAX 249 }; 250 251 // Information about sources being monitored; defined and used exclusively 252 // inside the .cc file. 253 struct SourceInfo; 254 using SourceInfoList = std::list<std::unique_ptr<SourceInfo>>; 255 256 // Records an access result in a histogram. 257 static void RecordAccessResult(AccessResult result); 258 259 // Looks for the next file to read within a directory. Returns true if a 260 // file was found. This is part of CheckAndMapNewMetricSourcesOnTaskRunner 261 // and so runs on an thread capable of I/O. The |source| structure will 262 // be internally updated to indicate the next file to be read. 263 static bool LocateNextFileInDirectory(SourceInfo* source); 264 265 // Handles the completion of a source. 266 static void FinishedWithSource(SourceInfo* source, AccessResult result); 267 268 // Checks a list of sources (on a task-runner allowed to do I/O) and merge 269 // any data found within them. 270 // Returns a list of histogram sample counts for sources of type 271 // ASSOCIATE_INTERNAL_PROFILE_SAMPLES_COUNTER that were processed. 272 static std::vector<size_t> CheckAndMergeMetricSourcesOnTaskRunner( 273 SourceInfoList* sources); 274 275 // Checks a single source and maps it into memory. 276 static AccessResult CheckAndMapMetricSource(SourceInfo* source); 277 278 // Merges all of the histograms from a |source| to the StatisticsRecorder. 279 // Returns the number of histograms merged. 280 static size_t MergeHistogramDeltasFromSource(SourceInfo* source); 281 282 // Records all histograms from a given source via a snapshot-manager. Only the 283 // histograms that have |required_flags| will be recorded. 284 static void RecordHistogramSnapshotsFromSource( 285 base::HistogramSnapshotManager* snapshot_manager, 286 SourceInfo* source, 287 base::HistogramBase::Flags required_flags); 288 289 // Calls source filter (if any) and returns the desired action. 290 static AccessResult HandleFilterSource(SourceInfo* source, 291 const base::FilePath& path); 292 293 // The part of ProvideIndependentMetrics that runs as a background task. 294 static bool ProvideIndependentMetricsOnTaskRunner( 295 SourceInfo* source, 296 ChromeUserMetricsExtension* uma_proto, 297 base::HistogramSnapshotManager* snapshot_manager, 298 base::OnceClosure serialize_log_callback); 299 300 // Collects the metadata of the |source|. 301 // Returns the number of histogram samples from that source. 302 static size_t CollectFileMetadataFromSource(SourceInfo* source); 303 304 // Appends the samples count to pref on UI thread. 305 void AppendToSamplesCountPref(std::vector<size_t> samples_count); 306 307 // Creates a task to check all monitored sources for updates. 308 void ScheduleSourcesCheck(); 309 310 // Takes a list of sources checked by an external task and determines what 311 // to do with each. Virtual for testing. 312 virtual void RecordSourcesChecked(SourceInfoList* checked, 313 std::vector<size_t> samples_counts); 314 315 // Schedules the deletion of a file in the background using the task-runner. 316 void DeleteFileAsync(const base::FilePath& path); 317 318 // Updates the persistent state information to show a source as being read. 319 void RecordSourceAsRead(SourceInfo* source); 320 321 // metrics::MetricsProvider: 322 void OnDidCreateMetricsLog() override; 323 bool HasIndependentMetrics() override; 324 void ProvideIndependentMetrics( 325 base::OnceClosure serialize_log_callback, 326 base::OnceCallback<void(bool)> done_callback, 327 ChromeUserMetricsExtension* uma_proto, 328 base::HistogramSnapshotManager* snapshot_manager) override; 329 bool HasPreviousSessionData() override; 330 void RecordInitialHistogramSnapshots( 331 base::HistogramSnapshotManager* snapshot_manager) override; 332 333 // base::StatisticsRecorder::HistogramProvider: 334 void MergeHistogramDeltas(bool async, 335 base::OnceClosure done_callback) override; 336 337 // The part of ProvideIndependentMetrics that runs after background task. 338 void ProvideIndependentMetricsCleanup( 339 base::OnceCallback<void(bool)> done_callback, 340 std::unique_ptr<SourceInfo> source, 341 bool success); 342 343 // Simulates the independent metrics to read the first item from 344 // kMetricsBrowserMetricsMetadata and updates the stability prefs accordingly, 345 // return true if the pref isn't empty. 346 bool SimulateIndependentMetrics(); 347 348 // A list of sources not currently active that need to be checked for changes. 349 SourceInfoList sources_to_check_; 350 351 // A list of currently active sources to be merged when required. 352 SourceInfoList sources_mapped_; 353 354 // A list of currently active sources to be merged when required. 355 SourceInfoList sources_with_profile_; 356 357 // A list of sources for a previous run. These are held separately because 358 // they are not subject to the periodic background checking that handles 359 // metrics for the current run. 360 SourceInfoList sources_for_previous_run_; 361 362 // The preferences-service used to store persistent state about sources. 363 raw_ptr<PrefService> pref_service_; 364 365 SEQUENCE_CHECKER(sequence_checker_); 366 base::WeakPtrFactory<FileMetricsProvider> weak_factory_{this}; 367 }; 368 369 } // namespace metrics 370 371 #endif // COMPONENTS_METRICS_FILE_METRICS_PROVIDER_H_ 372