1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Histogram is an object that aggregates statistics, and can summarize them in 6 // various forms, including ASCII graphical, HTML, and numerically (as a 7 // vector of numbers corresponding to each of the aggregating buckets). 8 9 // It supports calls to accumulate either time intervals (which are processed 10 // as integral number of milliseconds), or arbitrary integral units. 11 12 // For Histogram (exponential histogram), LinearHistogram and CustomHistogram, 13 // the minimum for a declared range is 1 (instead of 0), while the maximum is 14 // (HistogramBase::kSampleType_MAX - 1). However, there will always be underflow 15 // and overflow buckets added automatically, so a 0 bucket will always exist 16 // even when a minimum value of 1 is specified. 17 18 // Each use of a histogram with the same name will reference the same underlying 19 // data, so it is safe to record to the same histogram from multiple locations 20 // in the code. It is a runtime error if all uses of the same histogram do not 21 // agree exactly in type, bucket size and range. 22 23 // For Histogram and LinearHistogram, the maximum for a declared range should 24 // always be larger (not equal) than minimal range. Zero and 25 // HistogramBase::kSampleType_MAX are implicitly added as first and last ranges, 26 // so the smallest legal bucket_count is 3. However CustomHistogram can have 27 // bucket count as 2 (when you give a custom ranges vector containing only 1 28 // range). 29 // For these 3 kinds of histograms, the max bucket count is always 30 // (Histogram::kBucketCount_MAX - 1). 31 32 // The buckets layout of class Histogram is exponential. For example, buckets 33 // might contain (sequentially) the count of values in the following intervals: 34 // [0,1), [1,2), [2,4), [4,8), [8,16), [16,32), [32,64), [64,infinity) 35 // That bucket allocation would actually result from construction of a histogram 36 // for values between 1 and 64, with 8 buckets, such as: 37 // Histogram count("some name", 1, 64, 8); 38 // Note that the underflow bucket [0,1) and the overflow bucket [64,infinity) 39 // are also counted by the constructor in the user supplied "bucket_count" 40 // argument. 41 // The above example has an exponential ratio of 2 (doubling the bucket width 42 // in each consecutive bucket). The Histogram class automatically calculates 43 // the smallest ratio that it can use to construct the number of buckets 44 // selected in the constructor. An another example, if you had 50 buckets, 45 // and millisecond time values from 1 to 10000, then the ratio between 46 // consecutive bucket widths will be approximately somewhere around the 50th 47 // root of 10000. This approach provides very fine grain (narrow) buckets 48 // at the low end of the histogram scale, but allows the histogram to cover a 49 // gigantic range with the addition of very few buckets. 50 51 // Usually we use macros to define and use a histogram, which are defined in 52 // base/metrics/histogram_macros.h. Note: Callers should include that header 53 // directly if they only access the histogram APIs through macros. 54 // 55 // Macros use a pattern involving a function static variable, that is a pointer 56 // to a histogram. This static is explicitly initialized on any thread 57 // that detects a uninitialized (NULL) pointer. The potentially racy 58 // initialization is not a problem as it is always set to point to the same 59 // value (i.e., the FactoryGet always returns the same value). FactoryGet 60 // is also completely thread safe, which results in a completely thread safe, 61 // and relatively fast, set of counters. To avoid races at shutdown, the static 62 // pointer is NOT deleted, and we leak the histograms at process termination. 63 64 #ifndef BASE_METRICS_HISTOGRAM_H_ 65 #define BASE_METRICS_HISTOGRAM_H_ 66 67 #include <stddef.h> 68 #include <stdint.h> 69 70 #include <map> 71 #include <memory> 72 #include <string> 73 #include <vector> 74 75 #include "base/base_export.h" 76 #include "base/compiler_specific.h" 77 #include "base/containers/span.h" 78 #include "base/gtest_prod_util.h" 79 #include "base/logging.h" 80 #include "base/macros.h" 81 #include "base/metrics/bucket_ranges.h" 82 #include "base/metrics/histogram_base.h" 83 #include "base/metrics/histogram_samples.h" 84 #include "base/strings/string_piece.h" 85 #include "base/time/time.h" 86 87 namespace base { 88 89 class BooleanHistogram; 90 class CustomHistogram; 91 class DelayedPersistentAllocation; 92 class Histogram; 93 class HistogramTest; 94 class LinearHistogram; 95 class Pickle; 96 class PickleIterator; 97 class SampleVector; 98 class SampleVectorBase; 99 100 class BASE_EXPORT Histogram : public HistogramBase { 101 public: 102 // Initialize maximum number of buckets in histograms as 16,384. 103 static const uint32_t kBucketCount_MAX; 104 105 typedef std::vector<Count> Counts; 106 107 ~Histogram() override; 108 109 //---------------------------------------------------------------------------- 110 // For a valid histogram, input should follow these restrictions: 111 // minimum > 0 (if a minimum below 1 is specified, it will implicitly be 112 // normalized up to 1) 113 // maximum > minimum 114 // buckets > 2 [minimum buckets needed: underflow, overflow and the range] 115 // Additionally, 116 // buckets <= (maximum - minimum + 2) - this is to ensure that we don't have 117 // more buckets than the range of numbers; having more buckets than 1 per 118 // value in the range would be nonsensical. 119 static HistogramBase* FactoryGet(const std::string& name, 120 Sample minimum, 121 Sample maximum, 122 uint32_t bucket_count, 123 int32_t flags); 124 static HistogramBase* FactoryTimeGet(const std::string& name, 125 base::TimeDelta minimum, 126 base::TimeDelta maximum, 127 uint32_t bucket_count, 128 int32_t flags); 129 static HistogramBase* FactoryMicrosecondsTimeGet(const std::string& name, 130 base::TimeDelta minimum, 131 base::TimeDelta maximum, 132 uint32_t bucket_count, 133 int32_t flags); 134 135 // Overloads of the above functions that take a const char* |name| param, to 136 // avoid code bloat from the std::string constructor being inlined into call 137 // sites. 138 static HistogramBase* FactoryGet(const char* name, 139 Sample minimum, 140 Sample maximum, 141 uint32_t bucket_count, 142 int32_t flags); 143 static HistogramBase* FactoryTimeGet(const char* name, 144 base::TimeDelta minimum, 145 base::TimeDelta maximum, 146 uint32_t bucket_count, 147 int32_t flags); 148 static HistogramBase* FactoryMicrosecondsTimeGet(const char* name, 149 base::TimeDelta minimum, 150 base::TimeDelta maximum, 151 uint32_t bucket_count, 152 int32_t flags); 153 154 // Create a histogram using data in persistent storage. 155 static std::unique_ptr<HistogramBase> PersistentCreate( 156 const char* name, 157 Sample minimum, 158 Sample maximum, 159 const BucketRanges* ranges, 160 const DelayedPersistentAllocation& counts, 161 const DelayedPersistentAllocation& logged_counts, 162 HistogramSamples::Metadata* meta, 163 HistogramSamples::Metadata* logged_meta); 164 165 static void InitializeBucketRanges(Sample minimum, 166 Sample maximum, 167 BucketRanges* ranges); 168 169 // This constant if for FindCorruption. Since snapshots of histograms are 170 // taken asynchronously relative to sampling, and our counting code currently 171 // does not prevent race conditions, it is pretty likely that we'll catch a 172 // redundant count that doesn't match the sample count. We allow for a 173 // certain amount of slop before flagging this as an inconsistency. Even with 174 // an inconsistency, we'll snapshot it again (for UMA in about a half hour), 175 // so we'll eventually get the data, if it was not the result of a corruption. 176 static const int kCommonRaceBasedCountMismatch; 177 178 // Check to see if bucket ranges, counts and tallies in the snapshot are 179 // consistent with the bucket ranges and checksums in our histogram. This can 180 // produce a false-alarm if a race occurred in the reading of the data during 181 // a SnapShot process, but should otherwise be false at all times (unless we 182 // have memory over-writes, or DRAM failures). Flag definitions are located 183 // under "enum Inconsistency" in base/metrics/histogram_base.h. 184 uint32_t FindCorruption(const HistogramSamples& samples) const override; 185 186 //---------------------------------------------------------------------------- 187 // Accessors for factory construction, serialization and testing. 188 //---------------------------------------------------------------------------- 189 const BucketRanges* bucket_ranges() const; 190 Sample declared_min() const; 191 Sample declared_max() const; 192 virtual Sample ranges(uint32_t i) const; 193 virtual uint32_t bucket_count() const; 194 195 // This function validates histogram construction arguments. It returns false 196 // if some of the arguments are bad but also corrects them so they should 197 // function on non-dcheck builds without crashing. 198 // Note. Currently it allow some bad input, e.g. 0 as minimum, but silently 199 // converts it to good input: 1. 200 // TODO(bcwhite): Use false returns to create "sink" histograms so that bad 201 // data doesn't create confusion on the servers. 202 static bool InspectConstructionArguments(StringPiece name, 203 Sample* minimum, 204 Sample* maximum, 205 uint32_t* bucket_count); 206 207 // HistogramBase implementation: 208 uint64_t name_hash() const override; 209 HistogramType GetHistogramType() const override; 210 bool HasConstructionArguments(Sample expected_minimum, 211 Sample expected_maximum, 212 uint32_t expected_bucket_count) const override; 213 void Add(Sample value) override; 214 void AddCount(Sample value, int count) override; 215 std::unique_ptr<HistogramSamples> SnapshotSamples() const override; 216 std::unique_ptr<HistogramSamples> SnapshotDelta() override; 217 std::unique_ptr<HistogramSamples> SnapshotFinalDelta() const override; 218 void AddSamples(const HistogramSamples& samples) override; 219 bool AddSamplesFromPickle(base::PickleIterator* iter) override; 220 void WriteHTMLGraph(std::string* output) const override; 221 void WriteAscii(std::string* output) const override; 222 223 // Validates the histogram contents and CHECKs on errors. 224 // TODO(bcwhite): Remove this after https://crbug/836875. 225 void ValidateHistogramContents() const override; 226 227 protected: 228 // This class, defined entirely within the .cc file, contains all the 229 // common logic for building a Histogram and can be overridden by more 230 // specific types to alter details of how the creation is done. It is 231 // defined as an embedded class (rather than an anonymous one) so it 232 // can access the protected constructors. 233 class Factory; 234 235 // |ranges| should contain the underflow and overflow buckets. See top 236 // comments for example. 237 Histogram(const char* name, 238 Sample minimum, 239 Sample maximum, 240 const BucketRanges* ranges); 241 242 // Traditionally, histograms allocate their own memory for the bucket 243 // vector but "shared" histograms use memory regions allocated from a 244 // special memory segment that is passed in here. It is assumed that 245 // the life of this memory is managed externally and exceeds the lifetime 246 // of this object. Practically, this memory is never released until the 247 // process exits and the OS cleans it up. 248 Histogram(const char* name, 249 Sample minimum, 250 Sample maximum, 251 const BucketRanges* ranges, 252 const DelayedPersistentAllocation& counts, 253 const DelayedPersistentAllocation& logged_counts, 254 HistogramSamples::Metadata* meta, 255 HistogramSamples::Metadata* logged_meta); 256 257 // HistogramBase implementation: 258 void SerializeInfoImpl(base::Pickle* pickle) const override; 259 260 // Method to override to skip the display of the i'th bucket if it's empty. 261 virtual bool PrintEmptyBucket(uint32_t index) const; 262 263 // Get normalized size, relative to the ranges(i). 264 virtual double GetBucketSize(Count current, uint32_t i) const; 265 266 // Return a string description of what goes in a given bucket. 267 // Most commonly this is the numeric value, but in derived classes it may 268 // be a name (or string description) given to the bucket. 269 virtual const std::string GetAsciiBucketRange(uint32_t it) const; 270 271 private: 272 // Allow tests to corrupt our innards for testing purposes. 273 friend class HistogramTest; 274 FRIEND_TEST_ALL_PREFIXES(HistogramTest, BoundsTest); 275 FRIEND_TEST_ALL_PREFIXES(HistogramTest, BucketPlacementTest); 276 FRIEND_TEST_ALL_PREFIXES(HistogramTest, CorruptSampleCounts); 277 278 friend class StatisticsRecorder; // To allow it to delete duplicates. 279 friend class StatisticsRecorderTest; 280 281 friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo( 282 base::PickleIterator* iter); 283 static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter); 284 285 // Create a snapshot containing all samples (both logged and unlogged). 286 // Implementation of SnapshotSamples method with a more specific type for 287 // internal use. 288 std::unique_ptr<SampleVector> SnapshotAllSamples() const; 289 290 // Create a copy of unlogged samples. 291 std::unique_ptr<SampleVector> SnapshotUnloggedSamples() const; 292 293 //---------------------------------------------------------------------------- 294 // Helpers for emitting Ascii graphic. Each method appends data to output. 295 296 void WriteAsciiImpl(bool graph_it, 297 const std::string& newline, 298 std::string* output) const; 299 300 // Find out how large (graphically) the largest bucket will appear to be. 301 double GetPeakBucketSize(const SampleVectorBase& samples) const; 302 303 // Write a common header message describing this histogram. 304 void WriteAsciiHeader(const SampleVectorBase& samples, 305 Count sample_count, 306 std::string* output) const; 307 308 // Write information about previous, current, and next buckets. 309 // Information such as cumulative percentage, etc. 310 void WriteAsciiBucketContext(const int64_t past, 311 const Count current, 312 const int64_t remaining, 313 const uint32_t i, 314 std::string* output) const; 315 316 // WriteJSON calls these. 317 void GetParameters(DictionaryValue* params) const override; 318 319 void GetCountAndBucketData(Count* count, 320 int64_t* sum, 321 ListValue* buckets) const override; 322 323 // Samples that have not yet been logged with SnapshotDelta(). 324 std::unique_ptr<SampleVectorBase> unlogged_samples_; 325 326 // Accumulation of all samples that have been logged with SnapshotDelta(). 327 std::unique_ptr<SampleVectorBase> logged_samples_; 328 329 #if DCHECK_IS_ON() // Don't waste memory if it won't be used. 330 // Flag to indicate if PrepareFinalDelta has been previously called. It is 331 // used to DCHECK that a final delta is not created multiple times. 332 mutable bool final_delta_created_ = false; 333 #endif 334 335 DISALLOW_COPY_AND_ASSIGN(Histogram); 336 }; 337 338 //------------------------------------------------------------------------------ 339 340 // LinearHistogram is a more traditional histogram, with evenly spaced 341 // buckets. 342 class BASE_EXPORT LinearHistogram : public Histogram { 343 public: 344 ~LinearHistogram() override; 345 346 /* minimum should start from 1. 0 is as minimum is invalid. 0 is an implicit 347 default underflow bucket. */ 348 static HistogramBase* FactoryGet(const std::string& name, 349 Sample minimum, 350 Sample maximum, 351 uint32_t bucket_count, 352 int32_t flags); 353 static HistogramBase* FactoryTimeGet(const std::string& name, 354 TimeDelta minimum, 355 TimeDelta maximum, 356 uint32_t bucket_count, 357 int32_t flags); 358 359 // Overloads of the above two functions that take a const char* |name| param, 360 // to avoid code bloat from the std::string constructor being inlined into 361 // call sites. 362 static HistogramBase* FactoryGet(const char* name, 363 Sample minimum, 364 Sample maximum, 365 uint32_t bucket_count, 366 int32_t flags); 367 static HistogramBase* FactoryTimeGet(const char* name, 368 TimeDelta minimum, 369 TimeDelta maximum, 370 uint32_t bucket_count, 371 int32_t flags); 372 373 // Create a histogram using data in persistent storage. 374 static std::unique_ptr<HistogramBase> PersistentCreate( 375 const char* name, 376 Sample minimum, 377 Sample maximum, 378 const BucketRanges* ranges, 379 const DelayedPersistentAllocation& counts, 380 const DelayedPersistentAllocation& logged_counts, 381 HistogramSamples::Metadata* meta, 382 HistogramSamples::Metadata* logged_meta); 383 384 struct DescriptionPair { 385 Sample sample; 386 const char* description; // Null means end of a list of pairs. 387 }; 388 389 // Create a LinearHistogram and store a list of number/text values for use in 390 // writing the histogram graph. 391 // |descriptions| can be NULL, which means no special descriptions to set. If 392 // it's not NULL, the last element in the array must has a NULL in its 393 // "description" field. 394 static HistogramBase* FactoryGetWithRangeDescription( 395 const std::string& name, 396 Sample minimum, 397 Sample maximum, 398 uint32_t bucket_count, 399 int32_t flags, 400 const DescriptionPair descriptions[]); 401 402 static void InitializeBucketRanges(Sample minimum, 403 Sample maximum, 404 BucketRanges* ranges); 405 406 // Overridden from Histogram: 407 HistogramType GetHistogramType() const override; 408 409 protected: 410 class Factory; 411 412 LinearHistogram(const char* name, 413 Sample minimum, 414 Sample maximum, 415 const BucketRanges* ranges); 416 417 LinearHistogram(const char* name, 418 Sample minimum, 419 Sample maximum, 420 const BucketRanges* ranges, 421 const DelayedPersistentAllocation& counts, 422 const DelayedPersistentAllocation& logged_counts, 423 HistogramSamples::Metadata* meta, 424 HistogramSamples::Metadata* logged_meta); 425 426 double GetBucketSize(Count current, uint32_t i) const override; 427 428 // If we have a description for a bucket, then return that. Otherwise 429 // let parent class provide a (numeric) description. 430 const std::string GetAsciiBucketRange(uint32_t i) const override; 431 432 // Skip printing of name for numeric range if we have a name (and if this is 433 // an empty bucket). 434 bool PrintEmptyBucket(uint32_t index) const override; 435 436 private: 437 friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo( 438 base::PickleIterator* iter); 439 static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter); 440 441 // For some ranges, we store a printable description of a bucket range. 442 // If there is no description, then GetAsciiBucketRange() uses parent class 443 // to provide a description. 444 typedef std::map<Sample, std::string> BucketDescriptionMap; 445 BucketDescriptionMap bucket_description_; 446 447 DISALLOW_COPY_AND_ASSIGN(LinearHistogram); 448 }; 449 450 //------------------------------------------------------------------------------ 451 452 // ScaledLinearHistogram is a wrapper around a linear histogram that scales the 453 // counts down by some factor. Remainder values are kept locally but lost when 454 // uploaded or serialized. The integral counts are rounded up/down so should 455 // average to the correct value when many reports are added. 456 // 457 // This is most useful when adding many counts at once via AddCount() that can 458 // cause overflows of the 31-bit counters, usually with an enum as the value. 459 class BASE_EXPORT ScaledLinearHistogram { 460 using AtomicCount = Histogram::AtomicCount; 461 using Sample = Histogram::Sample; 462 463 public: 464 // Currently only works with "exact" linear histograms: minimum=1, maximum=N, 465 // and bucket_count=N+1. 466 ScaledLinearHistogram(const char* name, 467 Sample minimum, 468 Sample maximum, 469 uint32_t bucket_count, 470 int32_t scale, 471 int32_t flags); 472 473 ~ScaledLinearHistogram(); 474 475 // Like AddCount() but actually accumulates |count|/|scale| and increments 476 // the accumulated remainder by |count|%|scale|. An additional increment 477 // is done when the remainder has grown sufficiently large. 478 void AddScaledCount(Sample value, int count); 479 scale()480 int32_t scale() const { return scale_; } histogram()481 LinearHistogram* histogram() { return histogram_; } 482 483 private: 484 // Pointer to the underlying histogram. Ownership of it remains with 485 // the statistics-recorder. 486 LinearHistogram* const histogram_; 487 488 // The scale factor of the sample counts. 489 const int32_t scale_; 490 491 // A vector of "remainder" counts indexed by bucket number. These values 492 // may be negative as the scaled count is actually bumped once the 493 // remainder is 1/2 way to the scale value (thus "rounding"). 494 std::vector<AtomicCount> remainders_; 495 496 DISALLOW_COPY_AND_ASSIGN(ScaledLinearHistogram); 497 }; 498 499 //------------------------------------------------------------------------------ 500 501 // BooleanHistogram is a histogram for booleans. 502 class BASE_EXPORT BooleanHistogram : public LinearHistogram { 503 public: 504 static HistogramBase* FactoryGet(const std::string& name, int32_t flags); 505 506 // Overload of the above function that takes a const char* |name| param, 507 // to avoid code bloat from the std::string constructor being inlined into 508 // call sites. 509 static HistogramBase* FactoryGet(const char* name, int32_t flags); 510 511 // Create a histogram using data in persistent storage. 512 static std::unique_ptr<HistogramBase> PersistentCreate( 513 const char* name, 514 const BucketRanges* ranges, 515 const DelayedPersistentAllocation& counts, 516 const DelayedPersistentAllocation& logged_counts, 517 HistogramSamples::Metadata* meta, 518 HistogramSamples::Metadata* logged_meta); 519 520 HistogramType GetHistogramType() const override; 521 522 protected: 523 class Factory; 524 525 private: 526 BooleanHistogram(const char* name, const BucketRanges* ranges); 527 BooleanHistogram(const char* name, 528 const BucketRanges* ranges, 529 const DelayedPersistentAllocation& counts, 530 const DelayedPersistentAllocation& logged_counts, 531 HistogramSamples::Metadata* meta, 532 HistogramSamples::Metadata* logged_meta); 533 534 friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo( 535 base::PickleIterator* iter); 536 static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter); 537 538 DISALLOW_COPY_AND_ASSIGN(BooleanHistogram); 539 }; 540 541 //------------------------------------------------------------------------------ 542 543 // CustomHistogram is a histogram for a set of custom integers. 544 class BASE_EXPORT CustomHistogram : public Histogram { 545 public: 546 // |custom_ranges| contains a vector of limits on ranges. Each limit should be 547 // > 0 and < kSampleType_MAX. (Currently 0 is still accepted for backward 548 // compatibility). The limits can be unordered or contain duplication, but 549 // client should not depend on this. 550 static HistogramBase* FactoryGet(const std::string& name, 551 const std::vector<Sample>& custom_ranges, 552 int32_t flags); 553 554 // Overload of the above function that takes a const char* |name| param, 555 // to avoid code bloat from the std::string constructor being inlined into 556 // call sites. 557 static HistogramBase* FactoryGet(const char* name, 558 const std::vector<Sample>& custom_ranges, 559 int32_t flags); 560 561 // Create a histogram using data in persistent storage. 562 static std::unique_ptr<HistogramBase> PersistentCreate( 563 const char* name, 564 const BucketRanges* ranges, 565 const DelayedPersistentAllocation& counts, 566 const DelayedPersistentAllocation& logged_counts, 567 HistogramSamples::Metadata* meta, 568 HistogramSamples::Metadata* logged_meta); 569 570 // Overridden from Histogram: 571 HistogramType GetHistogramType() const override; 572 573 // Helper method for transforming an array of valid enumeration values 574 // to the std::vector<int> expected by UMA_HISTOGRAM_CUSTOM_ENUMERATION. 575 // This function ensures that a guard bucket exists right after any 576 // valid sample value (unless the next higher sample is also a valid value), 577 // so that invalid samples never fall into the same bucket as valid samples. 578 static std::vector<Sample> ArrayToCustomEnumRanges( 579 base::span<const Sample> values); 580 581 protected: 582 class Factory; 583 584 CustomHistogram(const char* name, const BucketRanges* ranges); 585 586 CustomHistogram(const char* name, 587 const BucketRanges* ranges, 588 const DelayedPersistentAllocation& counts, 589 const DelayedPersistentAllocation& logged_counts, 590 HistogramSamples::Metadata* meta, 591 HistogramSamples::Metadata* logged_meta); 592 593 // HistogramBase implementation: 594 void SerializeInfoImpl(base::Pickle* pickle) const override; 595 596 double GetBucketSize(Count current, uint32_t i) const override; 597 598 private: 599 friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo( 600 base::PickleIterator* iter); 601 static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter); 602 603 static bool ValidateCustomRanges(const std::vector<Sample>& custom_ranges); 604 605 DISALLOW_COPY_AND_ASSIGN(CustomHistogram); 606 }; 607 608 } // namespace base 609 610 #endif // BASE_METRICS_HISTOGRAM_H_ 611