• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Histogram is an object that aggregates statistics, and can summarize them in
6 // various forms, including ASCII graphical, HTML, and numerically (as a
7 // vector of numbers corresponding to each of the aggregating buckets).
8 
9 // It supports calls to accumulate either time intervals (which are processed
10 // as integral number of milliseconds), or arbitrary integral units.
11 
12 // For Histogram(exponential histogram), LinearHistogram and CustomHistogram,
13 // the minimum for a declared range is 1 (instead of 0), while the maximum is
14 // (HistogramBase::kSampleType_MAX - 1). Currently you can declare histograms
15 // with ranges exceeding those limits (e.g. 0 as minimal or
16 // HistogramBase::kSampleType_MAX as maximal), but those excesses will be
17 // silently clamped to those limits (for backwards compatibility with existing
18 // code). Best practice is to not exceed the limits.
19 
20 // Each use of a histogram with the same name will reference the same underlying
21 // data, so it is safe to record to the same histogram from multiple locations
22 // in the code. It is a runtime error if all uses of the same histogram do not
23 // agree exactly in type, bucket size and range.
24 
25 // For Histogram and LinearHistogram, the maximum for a declared range should
26 // always be larger (not equal) than minimal range. Zero and
27 // HistogramBase::kSampleType_MAX are implicitly added as first and last ranges,
28 // so the smallest legal bucket_count is 3. However CustomHistogram can have
29 // bucket count as 2 (when you give a custom ranges vector containing only 1
30 // range).
31 // For these 3 kinds of histograms, the max bucket count is always
32 // (Histogram::kBucketCount_MAX - 1).
33 
34 // The buckets layout of class Histogram is exponential. For example, buckets
35 // might contain (sequentially) the count of values in the following intervals:
36 // [0,1), [1,2), [2,4), [4,8), [8,16), [16,32), [32,64), [64,infinity)
37 // That bucket allocation would actually result from construction of a histogram
38 // for values between 1 and 64, with 8 buckets, such as:
39 // Histogram count("some name", 1, 64, 8);
40 // Note that the underflow bucket [0,1) and the overflow bucket [64,infinity)
41 // are also counted by the constructor in the user supplied "bucket_count"
42 // argument.
43 // The above example has an exponential ratio of 2 (doubling the bucket width
44 // in each consecutive bucket.  The Histogram class automatically calculates
45 // the smallest ratio that it can use to construct the number of buckets
46 // selected in the constructor.  An another example, if you had 50 buckets,
47 // and millisecond time values from 1 to 10000, then the ratio between
48 // consecutive bucket widths will be approximately somewhere around the 50th
49 // root of 10000.  This approach provides very fine grain (narrow) buckets
50 // at the low end of the histogram scale, but allows the histogram to cover a
51 // gigantic range with the addition of very few buckets.
52 
53 // Usually we use macros to define and use a histogram, which are defined in
54 // base/metrics/histogram_macros.h. Note: Callers should include that header
55 // directly if they only access the histogram APIs through macros.
56 //
57 // Macros use a pattern involving a function static variable, that is a pointer
58 // to a histogram.  This static is explicitly initialized on any thread
59 // that detects a uninitialized (NULL) pointer.  The potentially racy
60 // initialization is not a problem as it is always set to point to the same
61 // value (i.e., the FactoryGet always returns the same value).  FactoryGet
62 // is also completely thread safe, which results in a completely thread safe,
63 // and relatively fast, set of counters.  To avoid races at shutdown, the static
64 // pointer is NOT deleted, and we leak the histograms at process termination.
65 
66 #ifndef BASE_METRICS_HISTOGRAM_H_
67 #define BASE_METRICS_HISTOGRAM_H_
68 
69 #include <stddef.h>
70 #include <stdint.h>
71 
72 #include <map>
73 #include <string>
74 #include <vector>
75 
76 #include "base/base_export.h"
77 #include "base/compiler_specific.h"
78 #include "base/gtest_prod_util.h"
79 #include "base/logging.h"
80 #include "base/macros.h"
81 #include "base/memory/scoped_ptr.h"
82 #include "base/metrics/bucket_ranges.h"
83 #include "base/metrics/histogram_base.h"
84 // TODO(asvitkine): Migrate callers to to include this directly and remove this.
85 #include "base/metrics/histogram_macros.h"
86 #include "base/metrics/histogram_samples.h"
87 #include "base/time/time.h"
88 
89 namespace base {
90 
91 class BooleanHistogram;
92 class CustomHistogram;
93 class Histogram;
94 class LinearHistogram;
95 class Pickle;
96 class PickleIterator;
97 class SampleVector;
98 
99 class BASE_EXPORT Histogram : public HistogramBase {
100  public:
101   // Initialize maximum number of buckets in histograms as 16,384.
102   static const size_t kBucketCount_MAX;
103 
104   typedef std::vector<Count> Counts;
105 
106   //----------------------------------------------------------------------------
107   // For a valid histogram, input should follow these restrictions:
108   // minimum > 0 (if a minimum below 1 is specified, it will implicitly be
109   //              normalized up to 1)
110   // maximum > minimum
111   // buckets > 2 [minimum buckets needed: underflow, overflow and the range]
112   // Additionally,
113   // buckets <= (maximum - minimum + 2) - this is to ensure that we don't have
114   // more buckets than the range of numbers; having more buckets than 1 per
115   // value in the range would be nonsensical.
116   static HistogramBase* FactoryGet(const std::string& name,
117                                    Sample minimum,
118                                    Sample maximum,
119                                    size_t bucket_count,
120                                    int32_t flags);
121   static HistogramBase* FactoryTimeGet(const std::string& name,
122                                        base::TimeDelta minimum,
123                                        base::TimeDelta maximum,
124                                        size_t bucket_count,
125                                        int32_t flags);
126 
127   // Overloads of the above two functions that take a const char* |name| param,
128   // to avoid code bloat from the std::string constructor being inlined into
129   // call sites.
130   static HistogramBase* FactoryGet(const char* name,
131                                    Sample minimum,
132                                    Sample maximum,
133                                    size_t bucket_count,
134                                    int32_t flags);
135   static HistogramBase* FactoryTimeGet(const char* name,
136                                        base::TimeDelta minimum,
137                                        base::TimeDelta maximum,
138                                        size_t bucket_count,
139                                        int32_t flags);
140 
141   static void InitializeBucketRanges(Sample minimum,
142                                      Sample maximum,
143                                      BucketRanges* ranges);
144 
145   // This constant if for FindCorruption. Since snapshots of histograms are
146   // taken asynchronously relative to sampling, and our counting code currently
147   // does not prevent race conditions, it is pretty likely that we'll catch a
148   // redundant count that doesn't match the sample count.  We allow for a
149   // certain amount of slop before flagging this as an inconsistency. Even with
150   // an inconsistency, we'll snapshot it again (for UMA in about a half hour),
151   // so we'll eventually get the data, if it was not the result of a corruption.
152   static const int kCommonRaceBasedCountMismatch;
153 
154   // Check to see if bucket ranges, counts and tallies in the snapshot are
155   // consistent with the bucket ranges and checksums in our histogram.  This can
156   // produce a false-alarm if a race occurred in the reading of the data during
157   // a SnapShot process, but should otherwise be false at all times (unless we
158   // have memory over-writes, or DRAM failures).
159   int FindCorruption(const HistogramSamples& samples) const override;
160 
161   //----------------------------------------------------------------------------
162   // Accessors for factory construction, serialization and testing.
163   //----------------------------------------------------------------------------
declared_min()164   Sample declared_min() const { return declared_min_; }
declared_max()165   Sample declared_max() const { return declared_max_; }
166   virtual Sample ranges(size_t i) const;
167   virtual size_t bucket_count() const;
bucket_ranges()168   const BucketRanges* bucket_ranges() const { return bucket_ranges_; }
169 
170   // This function validates histogram construction arguments. It returns false
171   // if some of the arguments are totally bad.
172   // Note. Currently it allow some bad input, e.g. 0 as minimum, but silently
173   // converts it to good input: 1.
174   // TODO(kaiwang): Be more restrict and return false for any bad input, and
175   // make this a readonly validating function.
176   static bool InspectConstructionArguments(const std::string& name,
177                                            Sample* minimum,
178                                            Sample* maximum,
179                                            size_t* bucket_count);
180 
181   // HistogramBase implementation:
182   uint64_t name_hash() const override;
183   HistogramType GetHistogramType() const override;
184   bool HasConstructionArguments(Sample expected_minimum,
185                                 Sample expected_maximum,
186                                 size_t expected_bucket_count) const override;
187   void Add(Sample value) override;
188   void AddCount(Sample value, int count) override;
189   scoped_ptr<HistogramSamples> SnapshotSamples() const override;
190   void AddSamples(const HistogramSamples& samples) override;
191   bool AddSamplesFromPickle(base::PickleIterator* iter) override;
192   void WriteHTMLGraph(std::string* output) const override;
193   void WriteAscii(std::string* output) const override;
194 
195  protected:
196   // |ranges| should contain the underflow and overflow buckets. See top
197   // comments for example.
198   Histogram(const std::string& name,
199             Sample minimum,
200             Sample maximum,
201             const BucketRanges* ranges);
202 
203   ~Histogram() override;
204 
205   // HistogramBase implementation:
206   bool SerializeInfoImpl(base::Pickle* pickle) const override;
207 
208   // Method to override to skip the display of the i'th bucket if it's empty.
209   virtual bool PrintEmptyBucket(size_t index) const;
210 
211   // Get normalized size, relative to the ranges(i).
212   virtual double GetBucketSize(Count current, size_t i) const;
213 
214   // Return a string description of what goes in a given bucket.
215   // Most commonly this is the numeric value, but in derived classes it may
216   // be a name (or string description) given to the bucket.
217   virtual const std::string GetAsciiBucketRange(size_t it) const;
218 
219  private:
220   // Allow tests to corrupt our innards for testing purposes.
221   FRIEND_TEST_ALL_PREFIXES(HistogramTest, BoundsTest);
222   FRIEND_TEST_ALL_PREFIXES(HistogramTest, BucketPlacementTest);
223   FRIEND_TEST_ALL_PREFIXES(HistogramTest, CorruptBucketBounds);
224   FRIEND_TEST_ALL_PREFIXES(HistogramTest, CorruptSampleCounts);
225   FRIEND_TEST_ALL_PREFIXES(HistogramTest, NameMatchTest);
226   FRIEND_TEST_ALL_PREFIXES(HistogramTest, AddCountTest);
227 
228   friend class StatisticsRecorder;  // To allow it to delete duplicates.
229   friend class StatisticsRecorderTest;
230 
231   friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
232       base::PickleIterator* iter);
233   static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);
234 
235   // Implementation of SnapshotSamples function.
236   scoped_ptr<SampleVector> SnapshotSampleVector() const;
237 
238   //----------------------------------------------------------------------------
239   // Helpers for emitting Ascii graphic.  Each method appends data to output.
240 
241   void WriteAsciiImpl(bool graph_it,
242                       const std::string& newline,
243                       std::string* output) const;
244 
245   // Find out how large (graphically) the largest bucket will appear to be.
246   double GetPeakBucketSize(const SampleVector& samples) const;
247 
248   // Write a common header message describing this histogram.
249   void WriteAsciiHeader(const SampleVector& samples,
250                         Count sample_count,
251                         std::string* output) const;
252 
253   // Write information about previous, current, and next buckets.
254   // Information such as cumulative percentage, etc.
255   void WriteAsciiBucketContext(const int64_t past,
256                                const Count current,
257                                const int64_t remaining,
258                                const size_t i,
259                                std::string* output) const;
260 
261   // WriteJSON calls these.
262   void GetParameters(DictionaryValue* params) const override;
263 
264   void GetCountAndBucketData(Count* count,
265                              int64_t* sum,
266                              ListValue* buckets) const override;
267 
268   // Does not own this object. Should get from StatisticsRecorder.
269   const BucketRanges* bucket_ranges_;
270 
271   Sample declared_min_;  // Less than this goes into the first bucket.
272   Sample declared_max_;  // Over this goes into the last bucket.
273 
274   // Finally, provide the state that changes with the addition of each new
275   // sample.
276   scoped_ptr<SampleVector> samples_;
277 
278   DISALLOW_COPY_AND_ASSIGN(Histogram);
279 };
280 
281 //------------------------------------------------------------------------------
282 
283 // LinearHistogram is a more traditional histogram, with evenly spaced
284 // buckets.
285 class BASE_EXPORT LinearHistogram : public Histogram {
286  public:
287   ~LinearHistogram() override;
288 
289   /* minimum should start from 1. 0 is as minimum is invalid. 0 is an implicit
290      default underflow bucket. */
291   static HistogramBase* FactoryGet(const std::string& name,
292                                    Sample minimum,
293                                    Sample maximum,
294                                    size_t bucket_count,
295                                    int32_t flags);
296   static HistogramBase* FactoryTimeGet(const std::string& name,
297                                        TimeDelta minimum,
298                                        TimeDelta maximum,
299                                        size_t bucket_count,
300                                        int32_t flags);
301 
302   // Overloads of the above two functions that take a const char* |name| param,
303   // to avoid code bloat from the std::string constructor being inlined into
304   // call sites.
305   static HistogramBase* FactoryGet(const char* name,
306                                    Sample minimum,
307                                    Sample maximum,
308                                    size_t bucket_count,
309                                    int32_t flags);
310   static HistogramBase* FactoryTimeGet(const char* name,
311                                        TimeDelta minimum,
312                                        TimeDelta maximum,
313                                        size_t bucket_count,
314                                        int32_t flags);
315 
316   struct DescriptionPair {
317     Sample sample;
318     const char* description;  // Null means end of a list of pairs.
319   };
320 
321   // Create a LinearHistogram and store a list of number/text values for use in
322   // writing the histogram graph.
323   // |descriptions| can be NULL, which means no special descriptions to set. If
324   // it's not NULL, the last element in the array must has a NULL in its
325   // "description" field.
326   static HistogramBase* FactoryGetWithRangeDescription(
327       const std::string& name,
328       Sample minimum,
329       Sample maximum,
330       size_t bucket_count,
331       int32_t flags,
332       const DescriptionPair descriptions[]);
333 
334   static void InitializeBucketRanges(Sample minimum,
335                                      Sample maximum,
336                                      BucketRanges* ranges);
337 
338   // Overridden from Histogram:
339   HistogramType GetHistogramType() const override;
340 
341  protected:
342   LinearHistogram(const std::string& name,
343                   Sample minimum,
344                   Sample maximum,
345                   const BucketRanges* ranges);
346 
347   double GetBucketSize(Count current, size_t i) const override;
348 
349   // If we have a description for a bucket, then return that.  Otherwise
350   // let parent class provide a (numeric) description.
351   const std::string GetAsciiBucketRange(size_t i) const override;
352 
353   // Skip printing of name for numeric range if we have a name (and if this is
354   // an empty bucket).
355   bool PrintEmptyBucket(size_t index) const override;
356 
357  private:
358   friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
359       base::PickleIterator* iter);
360   static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);
361 
362   // For some ranges, we store a printable description of a bucket range.
363   // If there is no description, then GetAsciiBucketRange() uses parent class
364   // to provide a description.
365   typedef std::map<Sample, std::string> BucketDescriptionMap;
366   BucketDescriptionMap bucket_description_;
367 
368   DISALLOW_COPY_AND_ASSIGN(LinearHistogram);
369 };
370 
371 //------------------------------------------------------------------------------
372 
373 // BooleanHistogram is a histogram for booleans.
374 class BASE_EXPORT BooleanHistogram : public LinearHistogram {
375  public:
376   static HistogramBase* FactoryGet(const std::string& name, int32_t flags);
377 
378   // Overload of the above function that takes a const char* |name| param,
379   // to avoid code bloat from the std::string constructor being inlined into
380   // call sites.
381   static HistogramBase* FactoryGet(const char* name, int32_t flags);
382 
383   HistogramType GetHistogramType() const override;
384 
385  private:
386   BooleanHistogram(const std::string& name, const BucketRanges* ranges);
387 
388   friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
389       base::PickleIterator* iter);
390   static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);
391 
392   DISALLOW_COPY_AND_ASSIGN(BooleanHistogram);
393 };
394 
395 //------------------------------------------------------------------------------
396 
397 // CustomHistogram is a histogram for a set of custom integers.
398 class BASE_EXPORT CustomHistogram : public Histogram {
399  public:
400   // |custom_ranges| contains a vector of limits on ranges. Each limit should be
401   // > 0 and < kSampleType_MAX. (Currently 0 is still accepted for backward
402   // compatibility). The limits can be unordered or contain duplication, but
403   // client should not depend on this.
404   static HistogramBase* FactoryGet(const std::string& name,
405                                    const std::vector<Sample>& custom_ranges,
406                                    int32_t flags);
407 
408   // Overload of the above function that takes a const char* |name| param,
409   // to avoid code bloat from the std::string constructor being inlined into
410   // call sites.
411   static HistogramBase* FactoryGet(const char* name,
412                                    const std::vector<Sample>& custom_ranges,
413                                    int32_t flags);
414 
415   // Overridden from Histogram:
416   HistogramType GetHistogramType() const override;
417 
418   // Helper method for transforming an array of valid enumeration values
419   // to the std::vector<int> expected by UMA_HISTOGRAM_CUSTOM_ENUMERATION.
420   // This function ensures that a guard bucket exists right after any
421   // valid sample value (unless the next higher sample is also a valid value),
422   // so that invalid samples never fall into the same bucket as valid samples.
423   // TODO(kaiwang): Change name to ArrayToCustomEnumRanges.
424   static std::vector<Sample> ArrayToCustomRanges(const Sample* values,
425                                                  size_t num_values);
426  protected:
427   CustomHistogram(const std::string& name,
428                   const BucketRanges* ranges);
429 
430   // HistogramBase implementation:
431   bool SerializeInfoImpl(base::Pickle* pickle) const override;
432 
433   double GetBucketSize(Count current, size_t i) const override;
434 
435  private:
436   friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
437       base::PickleIterator* iter);
438   static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);
439 
440   static bool ValidateCustomRanges(const std::vector<Sample>& custom_ranges);
441   static BucketRanges* CreateBucketRangesFromCustomRanges(
442       const std::vector<Sample>& custom_ranges);
443 
444   DISALLOW_COPY_AND_ASSIGN(CustomHistogram);
445 };
446 
447 }  // namespace base
448 
449 #endif  // BASE_METRICS_HISTOGRAM_H_
450