1 // Copyright 2016 The Chromium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef COMPONENTS_METRICS_FILE_METRICS_PROVIDER_H_ 6 #define COMPONENTS_METRICS_FILE_METRICS_PROVIDER_H_ 7 8 #include <stddef.h> 9 10 #include <list> 11 #include <memory> 12 #include <vector> 13 14 #include "base/files/file_path.h" 15 #include "base/functional/callback_forward.h" 16 #include "base/gtest_prod_util.h" 17 #include "base/memory/raw_ptr.h" 18 #include "base/memory/scoped_refptr.h" 19 #include "base/memory/weak_ptr.h" 20 #include "base/metrics/statistics_recorder.h" 21 #include "base/sequence_checker.h" 22 #include "base/time/time.h" 23 #include "components/metrics/metrics_provider.h" 24 25 class PrefRegistrySimple; 26 class PrefService; 27 28 namespace base { 29 class TaskRunner; 30 } 31 32 namespace metrics { 33 34 // FileMetricsProvider gathers and logs histograms written to files on disk. 35 // Any number of files can be registered and will be polled once per upload 36 // cycle (at startup and periodically thereafter -- about every 30 minutes 37 // for desktop) for data to send. 38 class FileMetricsProvider : public MetricsProvider, 39 public base::StatisticsRecorder::HistogramProvider { 40 public: 41 struct Params; 42 43 enum SourceType { 44 // "Atomic" files are a collection of histograms that are written 45 // completely in a single atomic operation (typically a write followed 46 // by an atomic rename) and the file is never updated again except to 47 // be replaced by a completely new set of histograms. This is the only 48 // option that can be used if the file is not writeable by *this* 49 // process. Once the file has been read, an attempt will be made to 50 // delete it thus providing some measure of safety should different 51 // instantiations (such as by different users of a system-level install) 52 // try to read it. In case the delete operation fails, this class 53 // persistently tracks the last-modified time of the file so it will 54 // not be read a second time. 55 SOURCE_HISTOGRAMS_ATOMIC_FILE, 56 57 // A directory of atomic PMA files. This handles a directory in which 58 // files of metrics are atomically added. Only files ending with ".pma" 59 // will be read. They are read according to their last-modified time and 60 // never read more that once (unless they change). Only one file will 61 // be read per reporting cycle. Filenames that start with a dot (.) or 62 // an underscore (_) are ignored so temporary files (perhaps created by 63 // the ImportantFileWriter) will not get read. Files that have been 64 // read will be attempted to be deleted; should those files not be 65 // deletable by this process, it is the reponsibility of the producer 66 // to keep the directory pruned in some manner. Added files must have a 67 // timestamp later (not the same or earlier) than the newest file that 68 // already exists or it may be assumed to have been already uploaded. 69 SOURCE_HISTOGRAMS_ATOMIC_DIR, 70 71 // "Active" files may be open by one or more other processes and updated 72 // at any time with new samples or new histograms. Such files may also be 73 // inactive for any period of time only to be opened again and have new 74 // data written to them. The file should probably never be deleted because 75 // there would be no guarantee that the data has been reported. 76 SOURCE_HISTOGRAMS_ACTIVE_FILE, 77 }; 78 79 enum SourceAssociation { 80 // Associates the metrics in the file with the current run of the browser. 81 // The reporting will take place as part of the normal logging of 82 // histograms. 83 ASSOCIATE_CURRENT_RUN, 84 85 // Associates the metrics in the file with the previous run of the browesr. 86 // The reporting will take place as part of the "stability" histograms. 87 // This is important when metrics are dumped as part of a crash of the 88 // previous run. This can only be used with FILE_HISTOGRAMS_ATOMIC. 89 ASSOCIATE_PREVIOUS_RUN, 90 91 // Associates the metrics in the file with the a profile embedded in the 92 // same file. The reporting will take place at a convenient time after 93 // startup when the browser is otherwise idle. If there is no embedded 94 // system profile, these metrics will be lost. 95 ASSOCIATE_INTERNAL_PROFILE, 96 97 // Like above but fall back to ASSOCIATE_PREVIOUS_RUN if there is no 98 // embedded profile. This has a small cost during startup as that is 99 // when previous-run metrics are sent so the file has be checked at 100 // that time even though actual transfer will be delayed if an 101 // embedded profile is found. 102 ASSOCIATE_INTERNAL_PROFILE_OR_PREVIOUS_RUN, 103 104 // Used to only record the metadata of |ASSOCIATE_INTERNAL_PROFILE| but not 105 // merge the metrics. Instead, write metadata such as the samples count etc, 106 // to prefs then delete file. To precisely simulate the 107 // |ASSOCIATE_INTERNAL_PROFILE| behavior, one file record will be read out 108 // and added to the stability prefs each time the metrics service requests 109 // the |ASSOCIATE_INTERNAL_PROFILE| source metrics. Finally, the results 110 // will be recoreded as stability metrics in the next run. 111 ASSOCIATE_INTERNAL_PROFILE_SAMPLES_COUNTER, 112 }; 113 114 enum FilterAction { 115 // Process this file normally. 116 FILTER_PROCESS_FILE, 117 118 // This file is the active metrics file for the current process. Don't 119 // do anything with it. This is effectively "try later" but isn't 120 // added to the results histogram because the file has to be ignored 121 // throughout the life of the browser and that skews the distribution. 122 FILTER_ACTIVE_THIS_PID, 123 124 // Try again. This could happen within milliseconds or minutes but no other 125 // files from the same source will get processed in between. The process 126 // must have permission to "touch" the file and alter its last-modified 127 // time because files are always processed in order of those stamps. 128 FILTER_TRY_LATER, 129 130 // Skip this file. This file will not be processed until it has changed 131 // (i.e. had its last-modifided time updated). If it is "atomic", an 132 // attempt will be made to delete it. 133 FILTER_SKIP_FILE, 134 }; 135 136 // A "filter" can be defined to determine what to do on a per-file basis. 137 // This is called only after a file has been found to be the next one to 138 // be processed so it's okay if filter calls are relatively expensive. 139 // Calls are made on a background thread of low-priority and capable of 140 // doing I/O. 141 using FilterCallback = 142 base::RepeatingCallback<FilterAction(const base::FilePath& path)>; 143 144 // Parameters for RegisterSource, defined as a structure to allow new 145 // ones to be added (with default values) that doesn't require changes 146 // to all call sites. 147 struct Params { 148 Params(const base::FilePath& path, 149 SourceType type, 150 SourceAssociation association, 151 base::StringPiece prefs_key = base::StringPiece()); 152 153 ~Params(); 154 155 // The standard parameters, set during construction. 156 const base::FilePath path; 157 const SourceType type; 158 const SourceAssociation association; 159 const base::StringPiece prefs_key; 160 161 // Other parameters that can be set after construction. 162 FilterCallback filter; // Run-time check for what to do with file. 163 base::TimeDelta max_age; // Maximum age of a file (0=unlimited). 164 size_t max_dir_kib = 0; // Maximum bytes in a directory (0=inf). 165 size_t max_dir_files = 100; // Maximum files in a directory (0=inf). 166 }; 167 168 explicit FileMetricsProvider(PrefService* local_state); 169 170 FileMetricsProvider(const FileMetricsProvider&) = delete; 171 FileMetricsProvider& operator=(const FileMetricsProvider&) = delete; 172 173 ~FileMetricsProvider() override; 174 175 // Indicates a file or directory to be monitored and how the file or files 176 // within that directory are used. Because some metadata may need to persist 177 // across process restarts, preferences entries are used based on the 178 // |prefs_key| name. Call RegisterSourcePrefs() with the same name to create 179 // the necessary keys in advance. Set |prefs_key| empty (nullptr will work) if 180 // no persistence is required. ACTIVE files shouldn't have a pref key as 181 // they update internal state about what has been previously sent. 182 void RegisterSource(const Params& params); 183 184 // Registers all necessary preferences for maintaining persistent state 185 // about a monitored file across process restarts. The |prefs_key| is 186 // typically the filename. 187 static void RegisterSourcePrefs(PrefRegistrySimple* prefs, 188 const base::StringPiece prefs_key); 189 190 static void RegisterPrefs(PrefRegistrySimple* prefs); 191 192 // Sets the task runner to use for testing. 193 static void SetTaskRunnerForTesting( 194 const scoped_refptr<base::TaskRunner>& task_runner); 195 196 private: 197 friend class FileMetricsProviderTest; 198 199 // The different results that can occur accessing a file. 200 enum AccessResult { 201 // File was successfully mapped. 202 ACCESS_RESULT_SUCCESS, 203 204 // File does not exist. 205 ACCESS_RESULT_DOESNT_EXIST, 206 207 // File exists but not modified since last read. 208 ACCESS_RESULT_NOT_MODIFIED, 209 210 // File is not valid: is a directory or zero-size. 211 ACCESS_RESULT_INVALID_FILE, 212 213 // System could not map file into memory. 214 ACCESS_RESULT_SYSTEM_MAP_FAILURE, 215 216 // File had invalid contents. 217 ACCESS_RESULT_INVALID_CONTENTS, 218 219 // File could not be opened. 220 ACCESS_RESULT_NO_OPEN, 221 222 // File contents were internally deleted. 223 ACCESS_RESULT_MEMORY_DELETED, 224 225 // File is scheduled to be tried again later. 226 ACCESS_RESULT_FILTER_TRY_LATER, 227 228 // File was skipped according to filtering rules. 229 ACCESS_RESULT_FILTER_SKIP_FILE, 230 231 // File was skipped because it exceeds the maximum age. 232 ACCESS_RESULT_TOO_OLD, 233 234 // File was skipped because too many files in directory. 235 ACCESS_RESULT_TOO_MANY_FILES, 236 237 // File was skipped because too many bytes in directory. 238 ACCESS_RESULT_TOO_MANY_BYTES, 239 240 // The file was skipped because it's being written by this process. 241 ACCESS_RESULT_THIS_PID, 242 243 // The file had no embedded system profile. 244 ACCESS_RESULT_NO_PROFILE, 245 246 // The file had internal data corruption. 247 ACCESS_RESULT_DATA_CORRUPTION, 248 249 // The file is not writable when it should be. 250 ACCESS_RESULT_NOT_WRITABLE, 251 252 ACCESS_RESULT_MAX 253 }; 254 255 // Information about sources being monitored; defined and used exclusively 256 // inside the .cc file. 257 struct SourceInfo; 258 using SourceInfoList = std::list<std::unique_ptr<SourceInfo>>; 259 260 // Records an access result in a histogram. 261 static void RecordAccessResult(AccessResult result); 262 263 // Looks for the next file to read within a directory. Returns true if a 264 // file was found. This is part of CheckAndMapNewMetricSourcesOnTaskRunner 265 // and so runs on an thread capable of I/O. The |source| structure will 266 // be internally updated to indicate the next file to be read. 267 static bool LocateNextFileInDirectory(SourceInfo* source); 268 269 // Handles the completion of a source. 270 static void FinishedWithSource(SourceInfo* source, AccessResult result); 271 272 // Checks a list of sources (on a task-runner allowed to do I/O) and merge 273 // any data found within them. 274 // Returns a list of histogram sample counts for sources of type 275 // ASSOCIATE_INTERNAL_PROFILE_SAMPLES_COUNTER that were processed. 276 static std::vector<size_t> CheckAndMergeMetricSourcesOnTaskRunner( 277 SourceInfoList* sources); 278 279 // Checks a single source and maps it into memory. 280 static AccessResult CheckAndMapMetricSource(SourceInfo* source); 281 282 // Merges all of the histograms from a |source| to the StatisticsRecorder. 283 // Returns the number of histograms merged. 284 static size_t MergeHistogramDeltasFromSource(SourceInfo* source); 285 286 // Records all histograms from a given source via a snapshot-manager. Only the 287 // histograms that have |required_flags| will be recorded. 288 static void RecordHistogramSnapshotsFromSource( 289 base::HistogramSnapshotManager* snapshot_manager, 290 SourceInfo* source, 291 base::HistogramBase::Flags required_flags); 292 293 // Calls source filter (if any) and returns the desired action. 294 static AccessResult HandleFilterSource(SourceInfo* source, 295 const base::FilePath& path); 296 297 // The part of ProvideIndependentMetrics that runs as a background task. 298 static bool ProvideIndependentMetricsOnTaskRunner( 299 SourceInfo* source, 300 SystemProfileProto* system_profile_proto, 301 base::HistogramSnapshotManager* snapshot_manager); 302 303 // Collects the metadata of the |source|. 304 // Returns the number of histogram samples from that source. 305 static size_t CollectFileMetadataFromSource(SourceInfo* source); 306 307 // Appends the samples count to pref on UI thread. 308 void AppendToSamplesCountPref(std::vector<size_t> samples_count); 309 310 // Creates a task to check all monitored sources for updates. 311 void ScheduleSourcesCheck(); 312 313 // Takes a list of sources checked by an external task and determines what 314 // to do with each. 315 void RecordSourcesChecked(SourceInfoList* checked, 316 std::vector<size_t> samples_counts); 317 318 // Schedules the deletion of a file in the background using the task-runner. 319 void DeleteFileAsync(const base::FilePath& path); 320 321 // Updates the persistent state information to show a source as being read. 322 void RecordSourceAsRead(SourceInfo* source); 323 324 // metrics::MetricsProvider: 325 void OnDidCreateMetricsLog() override; 326 bool HasIndependentMetrics() override; 327 void ProvideIndependentMetrics( 328 base::OnceCallback<void(bool)> done_callback, 329 ChromeUserMetricsExtension* uma_proto, 330 base::HistogramSnapshotManager* snapshot_manager) override; 331 bool HasPreviousSessionData() override; 332 void RecordInitialHistogramSnapshots( 333 base::HistogramSnapshotManager* snapshot_manager) override; 334 335 // base::StatisticsRecorder::HistogramProvider: 336 void MergeHistogramDeltas() override; 337 338 // The part of ProvideIndependentMetrics that runs after background task. 339 void ProvideIndependentMetricsCleanup( 340 base::OnceCallback<void(bool)> done_callback, 341 std::unique_ptr<SourceInfo> source, 342 bool success); 343 344 // Simulates the independent metrics to read the first item from 345 // kMetricsBrowserMetricsMetadata and updates the stability prefs accordingly, 346 // return true if the pref isn't empty. 347 bool SimulateIndependentMetrics(); 348 349 // A task-runner capable of performing I/O. 350 scoped_refptr<base::TaskRunner> task_runner_; 351 352 // A list of sources not currently active that need to be checked for changes. 353 SourceInfoList sources_to_check_; 354 355 // A list of currently active sources to be merged when required. 356 SourceInfoList sources_mapped_; 357 358 // A list of currently active sources to be merged when required. 359 SourceInfoList sources_with_profile_; 360 361 // A list of sources for a previous run. These are held separately because 362 // they are not subject to the periodic background checking that handles 363 // metrics for the current run. 364 SourceInfoList sources_for_previous_run_; 365 366 // The preferences-service used to store persistent state about sources. 367 raw_ptr<PrefService> pref_service_; 368 369 SEQUENCE_CHECKER(sequence_checker_); 370 base::WeakPtrFactory<FileMetricsProvider> weak_factory_{this}; 371 }; 372 373 } // namespace metrics 374 375 #endif // COMPONENTS_METRICS_FILE_METRICS_PROVIDER_H_ 376