1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Histogram is an object that aggregates statistics, and can summarize them in 6 // various forms, including ASCII graphical, HTML, and numerically (as a 7 // vector of numbers corresponding to each of the aggregating buckets). 8 9 // It supports calls to accumulate either time intervals (which are processed 10 // as integral number of milliseconds), or arbitrary integral units. 11 12 // For Histogram(exponential histogram), LinearHistogram and CustomHistogram, 13 // the minimum for a declared range is 1 (instead of 0), while the maximum is 14 // (HistogramBase::kSampleType_MAX - 1). Currently you can declare histograms 15 // with ranges exceeding those limits (e.g. 0 as minimal or 16 // HistogramBase::kSampleType_MAX as maximal), but those excesses will be 17 // silently clamped to those limits (for backwards compatibility with existing 18 // code). Best practice is to not exceed the limits. 19 20 // Each use of a histogram with the same name will reference the same underlying 21 // data, so it is safe to record to the same histogram from multiple locations 22 // in the code. It is a runtime error if all uses of the same histogram do not 23 // agree exactly in type, bucket size and range. 24 25 // For Histogram and LinearHistogram, the maximum for a declared range should 26 // always be larger (not equal) than minimal range. Zero and 27 // HistogramBase::kSampleType_MAX are implicitly added as first and last ranges, 28 // so the smallest legal bucket_count is 3. However CustomHistogram can have 29 // bucket count as 2 (when you give a custom ranges vector containing only 1 30 // range). 31 // For these 3 kinds of histograms, the max bucket count is always 32 // (Histogram::kBucketCount_MAX - 1). 33 34 // The buckets layout of class Histogram is exponential. For example, buckets 35 // might contain (sequentially) the count of values in the following intervals: 36 // [0,1), [1,2), [2,4), [4,8), [8,16), [16,32), [32,64), [64,infinity) 37 // That bucket allocation would actually result from construction of a histogram 38 // for values between 1 and 64, with 8 buckets, such as: 39 // Histogram count("some name", 1, 64, 8); 40 // Note that the underflow bucket [0,1) and the overflow bucket [64,infinity) 41 // are also counted by the constructor in the user supplied "bucket_count" 42 // argument. 43 // The above example has an exponential ratio of 2 (doubling the bucket width 44 // in each consecutive bucket. The Histogram class automatically calculates 45 // the smallest ratio that it can use to construct the number of buckets 46 // selected in the constructor. An another example, if you had 50 buckets, 47 // and millisecond time values from 1 to 10000, then the ratio between 48 // consecutive bucket widths will be approximately somewhere around the 50th 49 // root of 10000. This approach provides very fine grain (narrow) buckets 50 // at the low end of the histogram scale, but allows the histogram to cover a 51 // gigantic range with the addition of very few buckets. 52 53 // Usually we use macros to define and use a histogram, which are defined in 54 // base/metrics/histogram_macros.h. Note: Callers should include that header 55 // directly if they only access the histogram APIs through macros. 56 // 57 // Macros use a pattern involving a function static variable, that is a pointer 58 // to a histogram. This static is explicitly initialized on any thread 59 // that detects a uninitialized (NULL) pointer. The potentially racy 60 // initialization is not a problem as it is always set to point to the same 61 // value (i.e., the FactoryGet always returns the same value). FactoryGet 62 // is also completely thread safe, which results in a completely thread safe, 63 // and relatively fast, set of counters. To avoid races at shutdown, the static 64 // pointer is NOT deleted, and we leak the histograms at process termination. 65 66 #ifndef BASE_METRICS_HISTOGRAM_H_ 67 #define BASE_METRICS_HISTOGRAM_H_ 68 69 #include <stddef.h> 70 #include <stdint.h> 71 72 #include <map> 73 #include <string> 74 #include <vector> 75 76 #include "base/base_export.h" 77 #include "base/compiler_specific.h" 78 #include "base/gtest_prod_util.h" 79 #include "base/logging.h" 80 #include "base/macros.h" 81 #include "base/memory/scoped_ptr.h" 82 #include "base/metrics/bucket_ranges.h" 83 #include "base/metrics/histogram_base.h" 84 // TODO(asvitkine): Migrate callers to to include this directly and remove this. 85 #include "base/metrics/histogram_macros.h" 86 #include "base/metrics/histogram_samples.h" 87 #include "base/time/time.h" 88 89 namespace base { 90 91 class BooleanHistogram; 92 class CustomHistogram; 93 class Histogram; 94 class LinearHistogram; 95 class Pickle; 96 class PickleIterator; 97 class SampleVector; 98 99 class BASE_EXPORT Histogram : public HistogramBase { 100 public: 101 // Initialize maximum number of buckets in histograms as 16,384. 102 static const size_t kBucketCount_MAX; 103 104 typedef std::vector<Count> Counts; 105 106 //---------------------------------------------------------------------------- 107 // For a valid histogram, input should follow these restrictions: 108 // minimum > 0 (if a minimum below 1 is specified, it will implicitly be 109 // normalized up to 1) 110 // maximum > minimum 111 // buckets > 2 [minimum buckets needed: underflow, overflow and the range] 112 // Additionally, 113 // buckets <= (maximum - minimum + 2) - this is to ensure that we don't have 114 // more buckets than the range of numbers; having more buckets than 1 per 115 // value in the range would be nonsensical. 116 static HistogramBase* FactoryGet(const std::string& name, 117 Sample minimum, 118 Sample maximum, 119 size_t bucket_count, 120 int32_t flags); 121 static HistogramBase* FactoryTimeGet(const std::string& name, 122 base::TimeDelta minimum, 123 base::TimeDelta maximum, 124 size_t bucket_count, 125 int32_t flags); 126 127 // Overloads of the above two functions that take a const char* |name| param, 128 // to avoid code bloat from the std::string constructor being inlined into 129 // call sites. 130 static HistogramBase* FactoryGet(const char* name, 131 Sample minimum, 132 Sample maximum, 133 size_t bucket_count, 134 int32_t flags); 135 static HistogramBase* FactoryTimeGet(const char* name, 136 base::TimeDelta minimum, 137 base::TimeDelta maximum, 138 size_t bucket_count, 139 int32_t flags); 140 141 static void InitializeBucketRanges(Sample minimum, 142 Sample maximum, 143 BucketRanges* ranges); 144 145 // This constant if for FindCorruption. Since snapshots of histograms are 146 // taken asynchronously relative to sampling, and our counting code currently 147 // does not prevent race conditions, it is pretty likely that we'll catch a 148 // redundant count that doesn't match the sample count. We allow for a 149 // certain amount of slop before flagging this as an inconsistency. Even with 150 // an inconsistency, we'll snapshot it again (for UMA in about a half hour), 151 // so we'll eventually get the data, if it was not the result of a corruption. 152 static const int kCommonRaceBasedCountMismatch; 153 154 // Check to see if bucket ranges, counts and tallies in the snapshot are 155 // consistent with the bucket ranges and checksums in our histogram. This can 156 // produce a false-alarm if a race occurred in the reading of the data during 157 // a SnapShot process, but should otherwise be false at all times (unless we 158 // have memory over-writes, or DRAM failures). 159 int FindCorruption(const HistogramSamples& samples) const override; 160 161 //---------------------------------------------------------------------------- 162 // Accessors for factory construction, serialization and testing. 163 //---------------------------------------------------------------------------- declared_min()164 Sample declared_min() const { return declared_min_; } declared_max()165 Sample declared_max() const { return declared_max_; } 166 virtual Sample ranges(size_t i) const; 167 virtual size_t bucket_count() const; bucket_ranges()168 const BucketRanges* bucket_ranges() const { return bucket_ranges_; } 169 170 // This function validates histogram construction arguments. It returns false 171 // if some of the arguments are totally bad. 172 // Note. Currently it allow some bad input, e.g. 0 as minimum, but silently 173 // converts it to good input: 1. 174 // TODO(kaiwang): Be more restrict and return false for any bad input, and 175 // make this a readonly validating function. 176 static bool InspectConstructionArguments(const std::string& name, 177 Sample* minimum, 178 Sample* maximum, 179 size_t* bucket_count); 180 181 // HistogramBase implementation: 182 uint64_t name_hash() const override; 183 HistogramType GetHistogramType() const override; 184 bool HasConstructionArguments(Sample expected_minimum, 185 Sample expected_maximum, 186 size_t expected_bucket_count) const override; 187 void Add(Sample value) override; 188 void AddCount(Sample value, int count) override; 189 scoped_ptr<HistogramSamples> SnapshotSamples() const override; 190 void AddSamples(const HistogramSamples& samples) override; 191 bool AddSamplesFromPickle(base::PickleIterator* iter) override; 192 void WriteHTMLGraph(std::string* output) const override; 193 void WriteAscii(std::string* output) const override; 194 195 protected: 196 // |ranges| should contain the underflow and overflow buckets. See top 197 // comments for example. 198 Histogram(const std::string& name, 199 Sample minimum, 200 Sample maximum, 201 const BucketRanges* ranges); 202 203 ~Histogram() override; 204 205 // HistogramBase implementation: 206 bool SerializeInfoImpl(base::Pickle* pickle) const override; 207 208 // Method to override to skip the display of the i'th bucket if it's empty. 209 virtual bool PrintEmptyBucket(size_t index) const; 210 211 // Get normalized size, relative to the ranges(i). 212 virtual double GetBucketSize(Count current, size_t i) const; 213 214 // Return a string description of what goes in a given bucket. 215 // Most commonly this is the numeric value, but in derived classes it may 216 // be a name (or string description) given to the bucket. 217 virtual const std::string GetAsciiBucketRange(size_t it) const; 218 219 private: 220 // Allow tests to corrupt our innards for testing purposes. 221 FRIEND_TEST_ALL_PREFIXES(HistogramTest, BoundsTest); 222 FRIEND_TEST_ALL_PREFIXES(HistogramTest, BucketPlacementTest); 223 FRIEND_TEST_ALL_PREFIXES(HistogramTest, CorruptBucketBounds); 224 FRIEND_TEST_ALL_PREFIXES(HistogramTest, CorruptSampleCounts); 225 FRIEND_TEST_ALL_PREFIXES(HistogramTest, NameMatchTest); 226 FRIEND_TEST_ALL_PREFIXES(HistogramTest, AddCountTest); 227 228 friend class StatisticsRecorder; // To allow it to delete duplicates. 229 friend class StatisticsRecorderTest; 230 231 friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo( 232 base::PickleIterator* iter); 233 static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter); 234 235 // Implementation of SnapshotSamples function. 236 scoped_ptr<SampleVector> SnapshotSampleVector() const; 237 238 //---------------------------------------------------------------------------- 239 // Helpers for emitting Ascii graphic. Each method appends data to output. 240 241 void WriteAsciiImpl(bool graph_it, 242 const std::string& newline, 243 std::string* output) const; 244 245 // Find out how large (graphically) the largest bucket will appear to be. 246 double GetPeakBucketSize(const SampleVector& samples) const; 247 248 // Write a common header message describing this histogram. 249 void WriteAsciiHeader(const SampleVector& samples, 250 Count sample_count, 251 std::string* output) const; 252 253 // Write information about previous, current, and next buckets. 254 // Information such as cumulative percentage, etc. 255 void WriteAsciiBucketContext(const int64_t past, 256 const Count current, 257 const int64_t remaining, 258 const size_t i, 259 std::string* output) const; 260 261 // WriteJSON calls these. 262 void GetParameters(DictionaryValue* params) const override; 263 264 void GetCountAndBucketData(Count* count, 265 int64_t* sum, 266 ListValue* buckets) const override; 267 268 // Does not own this object. Should get from StatisticsRecorder. 269 const BucketRanges* bucket_ranges_; 270 271 Sample declared_min_; // Less than this goes into the first bucket. 272 Sample declared_max_; // Over this goes into the last bucket. 273 274 // Finally, provide the state that changes with the addition of each new 275 // sample. 276 scoped_ptr<SampleVector> samples_; 277 278 DISALLOW_COPY_AND_ASSIGN(Histogram); 279 }; 280 281 //------------------------------------------------------------------------------ 282 283 // LinearHistogram is a more traditional histogram, with evenly spaced 284 // buckets. 285 class BASE_EXPORT LinearHistogram : public Histogram { 286 public: 287 ~LinearHistogram() override; 288 289 /* minimum should start from 1. 0 is as minimum is invalid. 0 is an implicit 290 default underflow bucket. */ 291 static HistogramBase* FactoryGet(const std::string& name, 292 Sample minimum, 293 Sample maximum, 294 size_t bucket_count, 295 int32_t flags); 296 static HistogramBase* FactoryTimeGet(const std::string& name, 297 TimeDelta minimum, 298 TimeDelta maximum, 299 size_t bucket_count, 300 int32_t flags); 301 302 // Overloads of the above two functions that take a const char* |name| param, 303 // to avoid code bloat from the std::string constructor being inlined into 304 // call sites. 305 static HistogramBase* FactoryGet(const char* name, 306 Sample minimum, 307 Sample maximum, 308 size_t bucket_count, 309 int32_t flags); 310 static HistogramBase* FactoryTimeGet(const char* name, 311 TimeDelta minimum, 312 TimeDelta maximum, 313 size_t bucket_count, 314 int32_t flags); 315 316 struct DescriptionPair { 317 Sample sample; 318 const char* description; // Null means end of a list of pairs. 319 }; 320 321 // Create a LinearHistogram and store a list of number/text values for use in 322 // writing the histogram graph. 323 // |descriptions| can be NULL, which means no special descriptions to set. If 324 // it's not NULL, the last element in the array must has a NULL in its 325 // "description" field. 326 static HistogramBase* FactoryGetWithRangeDescription( 327 const std::string& name, 328 Sample minimum, 329 Sample maximum, 330 size_t bucket_count, 331 int32_t flags, 332 const DescriptionPair descriptions[]); 333 334 static void InitializeBucketRanges(Sample minimum, 335 Sample maximum, 336 BucketRanges* ranges); 337 338 // Overridden from Histogram: 339 HistogramType GetHistogramType() const override; 340 341 protected: 342 LinearHistogram(const std::string& name, 343 Sample minimum, 344 Sample maximum, 345 const BucketRanges* ranges); 346 347 double GetBucketSize(Count current, size_t i) const override; 348 349 // If we have a description for a bucket, then return that. Otherwise 350 // let parent class provide a (numeric) description. 351 const std::string GetAsciiBucketRange(size_t i) const override; 352 353 // Skip printing of name for numeric range if we have a name (and if this is 354 // an empty bucket). 355 bool PrintEmptyBucket(size_t index) const override; 356 357 private: 358 friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo( 359 base::PickleIterator* iter); 360 static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter); 361 362 // For some ranges, we store a printable description of a bucket range. 363 // If there is no description, then GetAsciiBucketRange() uses parent class 364 // to provide a description. 365 typedef std::map<Sample, std::string> BucketDescriptionMap; 366 BucketDescriptionMap bucket_description_; 367 368 DISALLOW_COPY_AND_ASSIGN(LinearHistogram); 369 }; 370 371 //------------------------------------------------------------------------------ 372 373 // BooleanHistogram is a histogram for booleans. 374 class BASE_EXPORT BooleanHistogram : public LinearHistogram { 375 public: 376 static HistogramBase* FactoryGet(const std::string& name, int32_t flags); 377 378 // Overload of the above function that takes a const char* |name| param, 379 // to avoid code bloat from the std::string constructor being inlined into 380 // call sites. 381 static HistogramBase* FactoryGet(const char* name, int32_t flags); 382 383 HistogramType GetHistogramType() const override; 384 385 private: 386 BooleanHistogram(const std::string& name, const BucketRanges* ranges); 387 388 friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo( 389 base::PickleIterator* iter); 390 static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter); 391 392 DISALLOW_COPY_AND_ASSIGN(BooleanHistogram); 393 }; 394 395 //------------------------------------------------------------------------------ 396 397 // CustomHistogram is a histogram for a set of custom integers. 398 class BASE_EXPORT CustomHistogram : public Histogram { 399 public: 400 // |custom_ranges| contains a vector of limits on ranges. Each limit should be 401 // > 0 and < kSampleType_MAX. (Currently 0 is still accepted for backward 402 // compatibility). The limits can be unordered or contain duplication, but 403 // client should not depend on this. 404 static HistogramBase* FactoryGet(const std::string& name, 405 const std::vector<Sample>& custom_ranges, 406 int32_t flags); 407 408 // Overload of the above function that takes a const char* |name| param, 409 // to avoid code bloat from the std::string constructor being inlined into 410 // call sites. 411 static HistogramBase* FactoryGet(const char* name, 412 const std::vector<Sample>& custom_ranges, 413 int32_t flags); 414 415 // Overridden from Histogram: 416 HistogramType GetHistogramType() const override; 417 418 // Helper method for transforming an array of valid enumeration values 419 // to the std::vector<int> expected by UMA_HISTOGRAM_CUSTOM_ENUMERATION. 420 // This function ensures that a guard bucket exists right after any 421 // valid sample value (unless the next higher sample is also a valid value), 422 // so that invalid samples never fall into the same bucket as valid samples. 423 // TODO(kaiwang): Change name to ArrayToCustomEnumRanges. 424 static std::vector<Sample> ArrayToCustomRanges(const Sample* values, 425 size_t num_values); 426 protected: 427 CustomHistogram(const std::string& name, 428 const BucketRanges* ranges); 429 430 // HistogramBase implementation: 431 bool SerializeInfoImpl(base::Pickle* pickle) const override; 432 433 double GetBucketSize(Count current, size_t i) const override; 434 435 private: 436 friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo( 437 base::PickleIterator* iter); 438 static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter); 439 440 static bool ValidateCustomRanges(const std::vector<Sample>& custom_ranges); 441 static BucketRanges* CreateBucketRangesFromCustomRanges( 442 const std::vector<Sample>& custom_ranges); 443 444 DISALLOW_COPY_AND_ASSIGN(CustomHistogram); 445 }; 446 447 } // namespace base 448 449 #endif // BASE_METRICS_HISTOGRAM_H_ 450