1 // Copyright 2012 The Chromium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Histogram is an object that aggregates statistics, and can summarize them in 6 // various forms, including ASCII graphical, HTML, and numerically (as a 7 // vector of numbers corresponding to each of the aggregating buckets). 8 9 // It supports calls to accumulate either time intervals (which are processed 10 // as integral number of milliseconds), or arbitrary integral units. 11 12 // For Histogram (exponential histogram), LinearHistogram and CustomHistogram, 13 // the minimum for a declared range is 1 (instead of 0), while the maximum is 14 // (HistogramBase::kSampleType_MAX - 1). However, there will always be underflow 15 // and overflow buckets added automatically, so a 0 bucket will always exist 16 // even when a minimum value of 1 is specified. 17 18 // Each use of a histogram with the same name will reference the same underlying 19 // data, so it is safe to record to the same histogram from multiple locations 20 // in the code. It is a runtime error if all uses of the same histogram do not 21 // agree exactly in type, bucket size and range. 22 23 // For Histogram and LinearHistogram, the maximum for a declared range should 24 // always be larger (not equal) than minimal range. Zero and 25 // HistogramBase::kSampleType_MAX are implicitly added as first and last ranges, 26 // so the smallest legal bucket_count is 3. However CustomHistogram can have 27 // bucket count as 2 (when you give a custom ranges vector containing only 1 28 // range). 29 // For these 3 kinds of histograms, the max bucket count is always 30 // (Histogram::kBucketCount_MAX - 1). 31 32 // The buckets layout of class Histogram is exponential. For example, buckets 33 // might contain (sequentially) the count of values in the following intervals: 34 // [0,1), [1,2), [2,4), [4,8), [8,16), [16,32), [32,64), [64,infinity) 35 // That bucket allocation would actually result from construction of a histogram 36 // for values between 1 and 64, with 8 buckets, such as: 37 // Histogram count("some name", 1, 64, 8); 38 // Note that the underflow bucket [0,1) and the overflow bucket [64,infinity) 39 // are also counted by the constructor in the user supplied "bucket_count" 40 // argument. 41 // The above example has an exponential ratio of 2 (doubling the bucket width 42 // in each consecutive bucket). The Histogram class automatically calculates 43 // the smallest ratio that it can use to construct the number of buckets 44 // selected in the constructor. An another example, if you had 50 buckets, 45 // and millisecond time values from 1 to 10000, then the ratio between 46 // consecutive bucket widths will be approximately somewhere around the 50th 47 // root of 10000. This approach provides very fine grain (narrow) buckets 48 // at the low end of the histogram scale, but allows the histogram to cover a 49 // gigantic range with the addition of very few buckets. 50 51 // Usually we use macros to define and use a histogram, which are defined in 52 // base/metrics/histogram_macros.h. Note: Callers should include that header 53 // directly if they only access the histogram APIs through macros. 54 // 55 // Macros use a pattern involving a function static variable, that is a pointer 56 // to a histogram. This static is explicitly initialized on any thread 57 // that detects a uninitialized (NULL) pointer. The potentially racy 58 // initialization is not a problem as it is always set to point to the same 59 // value (i.e., the FactoryGet always returns the same value). FactoryGet 60 // is also completely thread safe, which results in a completely thread safe, 61 // and relatively fast, set of counters. To avoid races at shutdown, the static 62 // pointer is NOT deleted, and we leak the histograms at process termination. 63 64 #ifndef BASE_METRICS_HISTOGRAM_H_ 65 #define BASE_METRICS_HISTOGRAM_H_ 66 67 #include <stddef.h> 68 #include <stdint.h> 69 70 #include <map> 71 #include <memory> 72 #include <string> 73 #include <vector> 74 75 #include "base/base_export.h" 76 #include "base/compiler_specific.h" 77 #include "base/containers/span.h" 78 #include "base/dcheck_is_on.h" 79 #include "base/gtest_prod_util.h" 80 #include "base/memory/raw_ptr.h" 81 #include "base/metrics/bucket_ranges.h" 82 #include "base/metrics/histogram_base.h" 83 #include "base/metrics/histogram_samples.h" 84 #include "base/strings/string_piece.h" 85 #include "base/time/time.h" 86 #include "base/values.h" 87 88 namespace base { 89 90 class BooleanHistogram; 91 class CustomHistogram; 92 class DelayedPersistentAllocation; 93 class Histogram; 94 class HistogramTest; 95 class LinearHistogram; 96 class Pickle; 97 class PickleIterator; 98 class SampleVector; 99 class SampleVectorBase; 100 101 class BASE_EXPORT Histogram : public HistogramBase { 102 public: 103 // Initialize maximum number of buckets in histograms as 1000, plus over and 104 // under. This must be a value that fits in a uint32_t (since that's how we 105 // serialize bucket counts) as well as a Sample (since samples can be up to 106 // this value). 107 static constexpr size_t kBucketCount_MAX = 1002; 108 109 typedef std::vector<Count> Counts; 110 111 Histogram(const Histogram&) = delete; 112 Histogram& operator=(const Histogram&) = delete; 113 114 ~Histogram() override; 115 116 //---------------------------------------------------------------------------- 117 // For a valid histogram, input should follow these restrictions: 118 // minimum > 0 (if a minimum below 1 is specified, it will implicitly be 119 // normalized up to 1) 120 // maximum > minimum 121 // buckets > 2 [minimum buckets needed: underflow, overflow and the range] 122 // Additionally, 123 // buckets <= (maximum - minimum + 2) - this is to ensure that we don't have 124 // more buckets than the range of numbers; having more buckets than 1 per 125 // value in the range would be nonsensical. 126 static HistogramBase* FactoryGet(const std::string& name, 127 Sample minimum, 128 Sample maximum, 129 size_t bucket_count, 130 int32_t flags); 131 static HistogramBase* FactoryTimeGet(const std::string& name, 132 base::TimeDelta minimum, 133 base::TimeDelta maximum, 134 size_t bucket_count, 135 int32_t flags); 136 static HistogramBase* FactoryMicrosecondsTimeGet(const std::string& name, 137 base::TimeDelta minimum, 138 base::TimeDelta maximum, 139 size_t bucket_count, 140 int32_t flags); 141 142 // Overloads of the above functions that take a const char* |name| param, to 143 // avoid code bloat from the std::string constructor being inlined into call 144 // sites. 145 static HistogramBase* FactoryGet(const char* name, 146 Sample minimum, 147 Sample maximum, 148 size_t bucket_count, 149 int32_t flags); 150 static HistogramBase* FactoryTimeGet(const char* name, 151 base::TimeDelta minimum, 152 base::TimeDelta maximum, 153 size_t bucket_count, 154 int32_t flags); 155 static HistogramBase* FactoryMicrosecondsTimeGet(const char* name, 156 base::TimeDelta minimum, 157 base::TimeDelta maximum, 158 size_t bucket_count, 159 int32_t flags); 160 161 // Create a histogram using data in persistent storage. 162 static std::unique_ptr<HistogramBase> PersistentCreate( 163 const char* name, 164 const BucketRanges* ranges, 165 const DelayedPersistentAllocation& counts, 166 const DelayedPersistentAllocation& logged_counts, 167 HistogramSamples::Metadata* meta, 168 HistogramSamples::Metadata* logged_meta); 169 170 static void InitializeBucketRanges(Sample minimum, 171 Sample maximum, 172 BucketRanges* ranges); 173 174 // This constant if for FindCorruption. Since snapshots of histograms are 175 // taken asynchronously relative to sampling, and our counting code currently 176 // does not prevent race conditions, it is pretty likely that we'll catch a 177 // redundant count that doesn't match the sample count. We allow for a 178 // certain amount of slop before flagging this as an inconsistency. Even with 179 // an inconsistency, we'll snapshot it again (for UMA in about a half hour), 180 // so we'll eventually get the data, if it was not the result of a corruption. 181 static const int kCommonRaceBasedCountMismatch; 182 183 // Check to see if bucket ranges, counts and tallies in the snapshot are 184 // consistent with the bucket ranges and checksums in our histogram. This can 185 // produce a false-alarm if a race occurred in the reading of the data during 186 // a SnapShot process, but should otherwise be false at all times (unless we 187 // have memory over-writes, or DRAM failures). Flag definitions are located 188 // under "enum Inconsistency" in base/metrics/histogram_base.h. 189 uint32_t FindCorruption(const HistogramSamples& samples) const override; 190 191 //---------------------------------------------------------------------------- 192 // Accessors for factory construction, serialization and testing. 193 //---------------------------------------------------------------------------- 194 const BucketRanges* bucket_ranges() const; 195 Sample declared_min() const; 196 Sample declared_max() const; 197 virtual Sample ranges(size_t i) const; 198 virtual size_t bucket_count() const; 199 200 // This function validates histogram construction arguments. It returns false 201 // if some of the arguments are bad but also corrects them so they should 202 // function on non-dcheck builds without crashing. 203 // Note. Currently it allow some bad input, e.g. 0 as minimum, but silently 204 // converts it to good input: 1. 205 static bool InspectConstructionArguments(StringPiece name, 206 Sample* minimum, 207 Sample* maximum, 208 size_t* bucket_count); 209 210 // HistogramBase implementation: 211 uint64_t name_hash() const override; 212 HistogramType GetHistogramType() const override; 213 bool HasConstructionArguments(Sample expected_minimum, 214 Sample expected_maximum, 215 size_t expected_bucket_count) const override; 216 void Add(Sample value) override; 217 void AddCount(Sample value, int count) override; 218 std::unique_ptr<HistogramSamples> SnapshotSamples() const override; 219 std::unique_ptr<HistogramSamples> SnapshotUnloggedSamples() const override; 220 void MarkSamplesAsLogged(const HistogramSamples& samples) final; 221 std::unique_ptr<HistogramSamples> SnapshotDelta() override; 222 std::unique_ptr<HistogramSamples> SnapshotFinalDelta() const override; 223 void AddSamples(const HistogramSamples& samples) override; 224 bool AddSamplesFromPickle(base::PickleIterator* iter) override; 225 base::Value::Dict ToGraphDict() const override; 226 227 protected: 228 // This class, defined entirely within the .cc file, contains all the 229 // common logic for building a Histogram and can be overridden by more 230 // specific types to alter details of how the creation is done. It is 231 // defined as an embedded class (rather than an anonymous one) so it 232 // can access the protected constructors. 233 class Factory; 234 235 // |ranges| should contain the underflow and overflow buckets. See top 236 // comments for example. 237 Histogram(const char* name, const BucketRanges* ranges); 238 239 // Traditionally, histograms allocate their own memory for the bucket 240 // vector but "shared" histograms use memory regions allocated from a 241 // special memory segment that is passed in here. It is assumed that 242 // the life of this memory is managed externally and exceeds the lifetime 243 // of this object. Practically, this memory is never released until the 244 // process exits and the OS cleans it up. 245 Histogram(const char* name, 246 const BucketRanges* ranges, 247 const DelayedPersistentAllocation& counts, 248 const DelayedPersistentAllocation& logged_counts, 249 HistogramSamples::Metadata* meta, 250 HistogramSamples::Metadata* logged_meta); 251 252 // HistogramBase implementation: 253 void SerializeInfoImpl(base::Pickle* pickle) const override; 254 255 // Return a string description of what goes in a given bucket. 256 // Most commonly this is the numeric value, but in derived classes it may 257 // be a name (or string description) given to the bucket. 258 virtual const std::string GetAsciiBucketRange(size_t it) const; 259 260 private: 261 // Allow tests to corrupt our innards for testing purposes. 262 friend class HistogramTest; 263 friend class HistogramThreadsafeTest; 264 FRIEND_TEST_ALL_PREFIXES(HistogramTest, BoundsTest); 265 FRIEND_TEST_ALL_PREFIXES(HistogramTest, BucketPlacementTest); 266 FRIEND_TEST_ALL_PREFIXES(HistogramTest, CorruptSampleCounts); 267 268 friend class StatisticsRecorder; // To allow it to delete duplicates. 269 friend class StatisticsRecorderTest; 270 271 friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo( 272 base::PickleIterator* iter); 273 static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter); 274 275 // Create a snapshot containing all samples (both logged and unlogged). 276 // Implementation of SnapshotSamples method with a more specific type for 277 // internal use. 278 std::unique_ptr<SampleVector> SnapshotAllSamples() const; 279 280 // Returns a copy of unlogged samples as the underlying SampleVector class, 281 // instead of the HistogramSamples base class. Used for tests and to avoid 282 // virtual dispatch from some callsites. 283 std::unique_ptr<SampleVector> SnapshotUnloggedSamplesImpl() const; 284 285 // Writes the type, min, max, and bucket count information of the histogram in 286 // |params|. 287 Value::Dict GetParameters() const override; 288 289 // Samples that have not yet been logged with SnapshotDelta(). 290 std::unique_ptr<SampleVectorBase> unlogged_samples_; 291 292 // Accumulation of all samples that have been logged with SnapshotDelta(). 293 std::unique_ptr<SampleVectorBase> logged_samples_; 294 295 #if DCHECK_IS_ON() // Don't waste memory if it won't be used. 296 // Flag to indicate if PrepareFinalDelta has been previously called. It is 297 // used to DCHECK that a final delta is not created multiple times. 298 mutable bool final_delta_created_ = false; 299 #endif 300 }; 301 302 //------------------------------------------------------------------------------ 303 304 // LinearHistogram is a more traditional histogram, with evenly spaced 305 // buckets. 306 class BASE_EXPORT LinearHistogram : public Histogram { 307 public: 308 LinearHistogram(const LinearHistogram&) = delete; 309 LinearHistogram& operator=(const LinearHistogram&) = delete; 310 311 ~LinearHistogram() override; 312 313 /* minimum should start from 1. 0 is as minimum is invalid. 0 is an implicit 314 default underflow bucket. */ 315 static HistogramBase* FactoryGet(const std::string& name, 316 Sample minimum, 317 Sample maximum, 318 size_t bucket_count, 319 int32_t flags); 320 static HistogramBase* FactoryTimeGet(const std::string& name, 321 TimeDelta minimum, 322 TimeDelta maximum, 323 size_t bucket_count, 324 int32_t flags); 325 326 // Overloads of the above two functions that take a const char* |name| param, 327 // to avoid code bloat from the std::string constructor being inlined into 328 // call sites. 329 static HistogramBase* FactoryGet(const char* name, 330 Sample minimum, 331 Sample maximum, 332 size_t bucket_count, 333 int32_t flags); 334 static HistogramBase* FactoryTimeGet(const char* name, 335 TimeDelta minimum, 336 TimeDelta maximum, 337 size_t bucket_count, 338 int32_t flags); 339 340 // Create a histogram using data in persistent storage. 341 static std::unique_ptr<HistogramBase> PersistentCreate( 342 const char* name, 343 const BucketRanges* ranges, 344 const DelayedPersistentAllocation& counts, 345 const DelayedPersistentAllocation& logged_counts, 346 HistogramSamples::Metadata* meta, 347 HistogramSamples::Metadata* logged_meta); 348 349 struct DescriptionPair { 350 Sample sample; 351 const char* description; // Null means end of a list of pairs. 352 }; 353 354 // Create a LinearHistogram and store a list of number/text values for use in 355 // writing the histogram graph. 356 // |descriptions| can be NULL, which means no special descriptions to set. If 357 // it's not NULL, the last element in the array must has a NULL in its 358 // "description" field. 359 static HistogramBase* FactoryGetWithRangeDescription( 360 const std::string& name, 361 Sample minimum, 362 Sample maximum, 363 size_t bucket_count, 364 int32_t flags, 365 const DescriptionPair descriptions[]); 366 367 static void InitializeBucketRanges(Sample minimum, 368 Sample maximum, 369 BucketRanges* ranges); 370 371 // Overridden from Histogram: 372 HistogramType GetHistogramType() const override; 373 374 protected: 375 class Factory; 376 377 LinearHistogram(const char* name, const BucketRanges* ranges); 378 379 LinearHistogram(const char* name, 380 const BucketRanges* ranges, 381 const DelayedPersistentAllocation& counts, 382 const DelayedPersistentAllocation& logged_counts, 383 HistogramSamples::Metadata* meta, 384 HistogramSamples::Metadata* logged_meta); 385 386 // If we have a description for a bucket, then return that. Otherwise 387 // let parent class provide a (numeric) description. 388 const std::string GetAsciiBucketRange(size_t i) const override; 389 390 private: 391 friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo( 392 base::PickleIterator* iter); 393 static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter); 394 395 // For some ranges, we store a printable description of a bucket range. 396 // If there is no description, then GetAsciiBucketRange() uses parent class 397 // to provide a description. 398 typedef std::map<Sample, std::string> BucketDescriptionMap; 399 BucketDescriptionMap bucket_description_; 400 }; 401 402 //------------------------------------------------------------------------------ 403 404 // ScaledLinearHistogram is a wrapper around a linear histogram that scales the 405 // counts down by some factor. Remainder values are kept locally but lost when 406 // uploaded or serialized. The integral counts are rounded up/down so should 407 // average to the correct value when many reports are added. 408 // 409 // This is most useful when adding many counts at once via AddCount() that can 410 // cause overflows of the 31-bit counters, usually with an enum as the value. 411 class BASE_EXPORT ScaledLinearHistogram { 412 using AtomicCount = Histogram::AtomicCount; 413 using Sample = Histogram::Sample; 414 415 public: 416 // Currently only works with "exact" linear histograms: minimum=1, maximum=N, 417 // and bucket_count=N+1. 418 ScaledLinearHistogram(const char* name, 419 Sample minimum, 420 Sample maximum, 421 size_t bucket_count, 422 int32_t scale, 423 int32_t flags); 424 ScaledLinearHistogram(const std::string& name, 425 Sample minimum, 426 Sample maximum, 427 size_t bucket_count, 428 int32_t scale, 429 int32_t flags); 430 431 ScaledLinearHistogram(const ScaledLinearHistogram&) = delete; 432 ScaledLinearHistogram& operator=(const ScaledLinearHistogram&) = delete; 433 434 ~ScaledLinearHistogram(); 435 436 // Like AddCount() but actually accumulates |count|/|scale| and increments 437 // the accumulated remainder by |count|%|scale|. An additional increment 438 // is done when the remainder has grown sufficiently large. 439 // The value after scaling must fit into 32-bit signed integer. 440 void AddScaledCount(Sample value, int64_t count); 441 scale()442 int32_t scale() const { return scale_; } histogram()443 HistogramBase* histogram() { return histogram_; } 444 445 private: 446 // Pointer to the underlying histogram. Ownership of it remains with 447 // the statistics-recorder. This is typed as HistogramBase because it may be a 448 // DummyHistogram if expired. 449 const raw_ptr<HistogramBase> histogram_; 450 451 // The scale factor of the sample counts. 452 const int32_t scale_; 453 454 // A vector of "remainder" counts indexed by bucket number. These values 455 // may be negative as the scaled count is actually bumped once the 456 // remainder is 1/2 way to the scale value (thus "rounding"). 457 std::vector<AtomicCount> remainders_; 458 }; 459 460 //------------------------------------------------------------------------------ 461 462 // BooleanHistogram is a histogram for booleans. 463 class BASE_EXPORT BooleanHistogram : public LinearHistogram { 464 public: 465 static HistogramBase* FactoryGet(const std::string& name, int32_t flags); 466 467 // Overload of the above function that takes a const char* |name| param, 468 // to avoid code bloat from the std::string constructor being inlined into 469 // call sites. 470 static HistogramBase* FactoryGet(const char* name, int32_t flags); 471 472 BooleanHistogram(const BooleanHistogram&) = delete; 473 BooleanHistogram& operator=(const BooleanHistogram&) = delete; 474 475 // Create a histogram using data in persistent storage. 476 static std::unique_ptr<HistogramBase> PersistentCreate( 477 const char* name, 478 const BucketRanges* ranges, 479 const DelayedPersistentAllocation& counts, 480 const DelayedPersistentAllocation& logged_counts, 481 HistogramSamples::Metadata* meta, 482 HistogramSamples::Metadata* logged_meta); 483 484 HistogramType GetHistogramType() const override; 485 486 protected: 487 class Factory; 488 489 private: 490 BooleanHistogram(const char* name, const BucketRanges* ranges); 491 BooleanHistogram(const char* name, 492 const BucketRanges* ranges, 493 const DelayedPersistentAllocation& counts, 494 const DelayedPersistentAllocation& logged_counts, 495 HistogramSamples::Metadata* meta, 496 HistogramSamples::Metadata* logged_meta); 497 498 friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo( 499 base::PickleIterator* iter); 500 static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter); 501 }; 502 503 //------------------------------------------------------------------------------ 504 505 // CustomHistogram is a histogram for a set of custom integers. 506 class BASE_EXPORT CustomHistogram : public Histogram { 507 public: 508 // |custom_ranges| contains a vector of limits on ranges. Each limit should be 509 // > 0 and < kSampleType_MAX. (Currently 0 is still accepted for backward 510 // compatibility). The limits can be unordered or contain duplication, but 511 // client should not depend on this. 512 static HistogramBase* FactoryGet(const std::string& name, 513 const std::vector<Sample>& custom_ranges, 514 int32_t flags); 515 516 // Overload of the above function that takes a const char* |name| param, 517 // to avoid code bloat from the std::string constructor being inlined into 518 // call sites. 519 static HistogramBase* FactoryGet(const char* name, 520 const std::vector<Sample>& custom_ranges, 521 int32_t flags); 522 523 CustomHistogram(const CustomHistogram&) = delete; 524 CustomHistogram& operator=(const CustomHistogram&) = delete; 525 526 // Create a histogram using data in persistent storage. 527 static std::unique_ptr<HistogramBase> PersistentCreate( 528 const char* name, 529 const BucketRanges* ranges, 530 const DelayedPersistentAllocation& counts, 531 const DelayedPersistentAllocation& logged_counts, 532 HistogramSamples::Metadata* meta, 533 HistogramSamples::Metadata* logged_meta); 534 535 // Overridden from Histogram: 536 HistogramType GetHistogramType() const override; 537 538 // Helper method for transforming an array of valid enumeration values 539 // to the std::vector<int> expected by UMA_HISTOGRAM_CUSTOM_ENUMERATION. 540 // This function ensures that a guard bucket exists right after any 541 // valid sample value (unless the next higher sample is also a valid value), 542 // so that invalid samples never fall into the same bucket as valid samples. 543 static std::vector<Sample> ArrayToCustomEnumRanges( 544 base::span<const Sample> values); 545 546 protected: 547 class Factory; 548 549 CustomHistogram(const char* name, const BucketRanges* ranges); 550 551 CustomHistogram(const char* name, 552 const BucketRanges* ranges, 553 const DelayedPersistentAllocation& counts, 554 const DelayedPersistentAllocation& logged_counts, 555 HistogramSamples::Metadata* meta, 556 HistogramSamples::Metadata* logged_meta); 557 558 // HistogramBase implementation: 559 void SerializeInfoImpl(base::Pickle* pickle) const override; 560 561 private: 562 friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo( 563 base::PickleIterator* iter); 564 static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter); 565 566 static bool ValidateCustomRanges(const std::vector<Sample>& custom_ranges); 567 }; 568 569 } // namespace base 570 571 #endif // BASE_METRICS_HISTOGRAM_H_ 572