• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/variations/entropy_provider.h"
6 
7 #include <cmath>
8 #include <limits>
9 #include <numeric>
10 
11 #include "base/basictypes.h"
12 #include "base/guid.h"
13 #include "base/memory/scoped_ptr.h"
14 #include "base/rand_util.h"
15 #include "base/strings/string_number_conversions.h"
16 #include "components/variations/metrics_util.h"
17 #include "testing/gtest/include/gtest/gtest.h"
18 
19 namespace metrics {
20 
21 namespace {
22 
23 // Size of the low entropy source to use for the permuted entropy provider
24 // in tests.
25 const size_t kMaxLowEntropySize = 8000;
26 
27 // Field trial names used in unit tests.
28 const char* const kTestTrialNames[] = { "TestTrial", "AnotherTestTrial",
29                                         "NewTabButton" };
30 
31 // Computes the Chi-Square statistic for |values| assuming they follow a uniform
32 // distribution, where each entry has expected value |expected_value|.
33 //
34 // The Chi-Square statistic is defined as Sum((O-E)^2/E) where O is the observed
35 // value and E is the expected value.
ComputeChiSquare(const std::vector<int> & values,double expected_value)36 double ComputeChiSquare(const std::vector<int>& values,
37                         double expected_value) {
38   double sum = 0;
39   for (size_t i = 0; i < values.size(); ++i) {
40     const double delta = values[i] - expected_value;
41     sum += (delta * delta) / expected_value;
42   }
43   return sum;
44 }
45 
46 // Computes SHA1-based entropy for the given |trial_name| based on
47 // |entropy_source|
GenerateSHA1Entropy(const std::string & entropy_source,const std::string & trial_name)48 double GenerateSHA1Entropy(const std::string& entropy_source,
49                            const std::string& trial_name) {
50   SHA1EntropyProvider sha1_provider(entropy_source);
51   return sha1_provider.GetEntropyForTrial(trial_name, 0);
52 }
53 
54 // Generates permutation-based entropy for the given |trial_name| based on
55 // |entropy_source| which must be in the range [0, entropy_max).
GeneratePermutedEntropy(uint16 entropy_source,size_t entropy_max,const std::string & trial_name)56 double GeneratePermutedEntropy(uint16 entropy_source,
57                                size_t entropy_max,
58                                const std::string& trial_name) {
59   PermutedEntropyProvider permuted_provider(entropy_source, entropy_max);
60   return permuted_provider.GetEntropyForTrial(trial_name, 0);
61 }
62 
63 // Helper interface for testing used to generate entropy values for a given
64 // field trial. Unlike EntropyProvider, which keeps the low/high entropy source
65 // value constant and generates entropy for different trial names, instances
66 // of TrialEntropyGenerator keep the trial name constant and generate low/high
67 // entropy source values internally to produce each output entropy value.
68 class TrialEntropyGenerator {
69  public:
~TrialEntropyGenerator()70   virtual ~TrialEntropyGenerator() {}
71   virtual double GenerateEntropyValue() const = 0;
72 };
73 
74 // An TrialEntropyGenerator that uses the SHA1EntropyProvider with the high
75 // entropy source (random GUID with 128 bits of entropy + 13 additional bits of
76 // entropy corresponding to a low entropy source).
77 class SHA1EntropyGenerator : public TrialEntropyGenerator {
78  public:
SHA1EntropyGenerator(const std::string & trial_name)79   explicit SHA1EntropyGenerator(const std::string& trial_name)
80       : trial_name_(trial_name) {
81   }
82 
~SHA1EntropyGenerator()83   virtual ~SHA1EntropyGenerator() {
84   }
85 
GenerateEntropyValue() const86   virtual double GenerateEntropyValue() const OVERRIDE {
87     // Use a random GUID + 13 additional bits of entropy to match how the
88     // SHA1EntropyProvider is used in metrics_service.cc.
89     const int low_entropy_source =
90         static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1));
91     const std::string high_entropy_source =
92         base::GenerateGUID() + base::IntToString(low_entropy_source);
93     return GenerateSHA1Entropy(high_entropy_source, trial_name_);
94   }
95 
96  private:
97   std::string trial_name_;
98 
99   DISALLOW_COPY_AND_ASSIGN(SHA1EntropyGenerator);
100 };
101 
102 // An TrialEntropyGenerator that uses the permuted entropy provider algorithm,
103 // using 13-bit low entropy source values.
104 class PermutedEntropyGenerator : public TrialEntropyGenerator {
105  public:
PermutedEntropyGenerator(const std::string & trial_name)106   explicit PermutedEntropyGenerator(const std::string& trial_name)
107       : mapping_(kMaxLowEntropySize) {
108     // Note: Given a trial name, the computed mapping will be the same.
109     // As a performance optimization, pre-compute the mapping once per trial
110     // name and index into it for each entropy value.
111     const uint32 randomization_seed = HashName(trial_name);
112     internal::PermuteMappingUsingRandomizationSeed(randomization_seed,
113                                                    &mapping_);
114   }
115 
~PermutedEntropyGenerator()116   virtual ~PermutedEntropyGenerator() {
117   }
118 
GenerateEntropyValue() const119   virtual double GenerateEntropyValue() const OVERRIDE {
120     const int low_entropy_source =
121         static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1));
122     return mapping_[low_entropy_source] /
123            static_cast<double>(kMaxLowEntropySize);
124   }
125 
126  private:
127   std::vector<uint16> mapping_;
128 
129   DISALLOW_COPY_AND_ASSIGN(PermutedEntropyGenerator);
130 };
131 
132 // Tests uniformity of a given |entropy_generator| using the Chi-Square Goodness
133 // of Fit Test.
PerformEntropyUniformityTest(const std::string & trial_name,const TrialEntropyGenerator & entropy_generator)134 void PerformEntropyUniformityTest(
135     const std::string& trial_name,
136     const TrialEntropyGenerator& entropy_generator) {
137   // Number of buckets in the simulated field trials.
138   const size_t kBucketCount = 20;
139   // Max number of iterations to perform before giving up and failing.
140   const size_t kMaxIterationCount = 100000;
141   // The number of iterations to perform before each time the statistical
142   // significance of the results is checked.
143   const size_t kCheckIterationCount = 10000;
144   // This is the Chi-Square threshold from the Chi-Square statistic table for
145   // 19 degrees of freedom (based on |kBucketCount|) with a 99.9% confidence
146   // level. See: http://www.medcalc.org/manual/chi-square-table.php
147   const double kChiSquareThreshold = 43.82;
148 
149   std::vector<int> distribution(kBucketCount);
150 
151   for (size_t i = 1; i <= kMaxIterationCount; ++i) {
152     const double entropy_value = entropy_generator.GenerateEntropyValue();
153     const size_t bucket = static_cast<size_t>(kBucketCount * entropy_value);
154     ASSERT_LT(bucket, kBucketCount);
155     distribution[bucket] += 1;
156 
157     // After |kCheckIterationCount| iterations, compute the Chi-Square
158     // statistic of the distribution. If the resulting statistic is greater
159     // than |kChiSquareThreshold|, we can conclude with 99.9% confidence
160     // that the observed samples do not follow a uniform distribution.
161     //
162     // However, since 99.9% would still result in a false negative every
163     // 1000 runs of the test, do not treat it as a failure (else the test
164     // will be flaky). Instead, perform additional iterations to determine
165     // if the distribution will converge, up to |kMaxIterationCount|.
166     if ((i % kCheckIterationCount) == 0) {
167       const double expected_value_per_bucket =
168           static_cast<double>(i) / kBucketCount;
169       const double chi_square =
170           ComputeChiSquare(distribution, expected_value_per_bucket);
171       if (chi_square < kChiSquareThreshold)
172         break;
173 
174       // If |i == kMaxIterationCount|, the Chi-Square statistic did not
175       // converge after |kMaxIterationCount|.
176       EXPECT_NE(i, kMaxIterationCount) << "Failed for trial " <<
177           trial_name << " with chi_square = " << chi_square <<
178           " after " << kMaxIterationCount << " iterations.";
179     }
180   }
181 }
182 
183 }  // namespace
184 
TEST(EntropyProviderTest,UseOneTimeRandomizationSHA1)185 TEST(EntropyProviderTest, UseOneTimeRandomizationSHA1) {
186   // Simply asserts that two trials using one-time randomization
187   // that have different names, normally generate different results.
188   //
189   // Note that depending on the one-time random initialization, they
190   // _might_ actually give the same result, but we know that given
191   // the particular client_id we use for unit tests they won't.
192   base::FieldTrialList field_trial_list(new SHA1EntropyProvider("client_id"));
193   const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
194   scoped_refptr<base::FieldTrial> trials[] = {
195       base::FieldTrialList::FactoryGetFieldTrial(
196           "one", 100, "default", kNoExpirationYear, 1, 1,
197           base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
198       base::FieldTrialList::FactoryGetFieldTrial(
199           "two", 100, "default", kNoExpirationYear, 1, 1,
200           base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
201   };
202 
203   for (size_t i = 0; i < arraysize(trials); ++i) {
204     for (int j = 0; j < 100; ++j)
205       trials[i]->AppendGroup(std::string(), 1);
206   }
207 
208   // The trials are most likely to give different results since they have
209   // different names.
210   EXPECT_NE(trials[0]->group(), trials[1]->group());
211   EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
212 }
213 
TEST(EntropyProviderTest,UseOneTimeRandomizationPermuted)214 TEST(EntropyProviderTest, UseOneTimeRandomizationPermuted) {
215   // Simply asserts that two trials using one-time randomization
216   // that have different names, normally generate different results.
217   //
218   // Note that depending on the one-time random initialization, they
219   // _might_ actually give the same result, but we know that given
220   // the particular client_id we use for unit tests they won't.
221   base::FieldTrialList field_trial_list(
222       new PermutedEntropyProvider(1234, kMaxLowEntropySize));
223   const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
224   scoped_refptr<base::FieldTrial> trials[] = {
225       base::FieldTrialList::FactoryGetFieldTrial(
226           "one", 100, "default", kNoExpirationYear, 1, 1,
227           base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
228       base::FieldTrialList::FactoryGetFieldTrial(
229           "two", 100, "default", kNoExpirationYear, 1, 1,
230           base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
231   };
232 
233   for (size_t i = 0; i < arraysize(trials); ++i) {
234     for (int j = 0; j < 100; ++j)
235       trials[i]->AppendGroup(std::string(), 1);
236   }
237 
238   // The trials are most likely to give different results since they have
239   // different names.
240   EXPECT_NE(trials[0]->group(), trials[1]->group());
241   EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
242 }
243 
TEST(EntropyProviderTest,UseOneTimeRandomizationWithCustomSeedPermuted)244 TEST(EntropyProviderTest, UseOneTimeRandomizationWithCustomSeedPermuted) {
245   // Ensures that two trials with different names but the same custom seed used
246   // for one time randomization produce the same group assignments.
247   base::FieldTrialList field_trial_list(
248       new PermutedEntropyProvider(1234, kMaxLowEntropySize));
249   const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
250   const uint32 kCustomSeed = 9001;
251   scoped_refptr<base::FieldTrial> trials[] = {
252       base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
253           "one", 100, "default", kNoExpirationYear, 1, 1,
254           base::FieldTrial::ONE_TIME_RANDOMIZED, kCustomSeed, NULL),
255       base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
256           "two", 100, "default", kNoExpirationYear, 1, 1,
257           base::FieldTrial::ONE_TIME_RANDOMIZED, kCustomSeed, NULL),
258   };
259 
260   for (size_t i = 0; i < arraysize(trials); ++i) {
261     for (int j = 0; j < 100; ++j)
262       trials[i]->AppendGroup(std::string(), 1);
263   }
264 
265   // Normally, these trials should produce different groups, but if the same
266   // custom seed is used, they should produce the same group assignment.
267   EXPECT_EQ(trials[0]->group(), trials[1]->group());
268   EXPECT_EQ(trials[0]->group_name(), trials[1]->group_name());
269 }
270 
TEST(EntropyProviderTest,SHA1Entropy)271 TEST(EntropyProviderTest, SHA1Entropy) {
272   const double results[] = { GenerateSHA1Entropy("hi", "1"),
273                              GenerateSHA1Entropy("there", "1") };
274 
275   EXPECT_NE(results[0], results[1]);
276   for (size_t i = 0; i < arraysize(results); ++i) {
277     EXPECT_LE(0.0, results[i]);
278     EXPECT_GT(1.0, results[i]);
279   }
280 
281   EXPECT_EQ(GenerateSHA1Entropy("yo", "1"),
282             GenerateSHA1Entropy("yo", "1"));
283   EXPECT_NE(GenerateSHA1Entropy("yo", "something"),
284             GenerateSHA1Entropy("yo", "else"));
285 }
286 
TEST(EntropyProviderTest,PermutedEntropy)287 TEST(EntropyProviderTest, PermutedEntropy) {
288   const double results[] = {
289       GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),
290       GeneratePermutedEntropy(4321, kMaxLowEntropySize, "1") };
291 
292   EXPECT_NE(results[0], results[1]);
293   for (size_t i = 0; i < arraysize(results); ++i) {
294     EXPECT_LE(0.0, results[i]);
295     EXPECT_GT(1.0, results[i]);
296   }
297 
298   EXPECT_EQ(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),
299             GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"));
300   EXPECT_NE(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "something"),
301             GeneratePermutedEntropy(1234, kMaxLowEntropySize, "else"));
302 }
303 
TEST(EntropyProviderTest,PermutedEntropyProviderResults)304 TEST(EntropyProviderTest, PermutedEntropyProviderResults) {
305   // Verifies that PermutedEntropyProvider produces expected results. This
306   // ensures that the results are the same between platforms and ensures that
307   // changes to the implementation do not regress this accidentally.
308 
309   EXPECT_DOUBLE_EQ(2194 / static_cast<double>(kMaxLowEntropySize),
310                    GeneratePermutedEntropy(1234, kMaxLowEntropySize, "XYZ"));
311   EXPECT_DOUBLE_EQ(5676 / static_cast<double>(kMaxLowEntropySize),
312                    GeneratePermutedEntropy(1, kMaxLowEntropySize, "Test"));
313   EXPECT_DOUBLE_EQ(1151 / static_cast<double>(kMaxLowEntropySize),
314                    GeneratePermutedEntropy(5000, kMaxLowEntropySize, "Foo"));
315 }
316 
TEST(EntropyProviderTest,SHA1EntropyIsUniform)317 TEST(EntropyProviderTest, SHA1EntropyIsUniform) {
318   for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
319     SHA1EntropyGenerator entropy_generator(kTestTrialNames[i]);
320     PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
321   }
322 }
323 
TEST(EntropyProviderTest,PermutedEntropyIsUniform)324 TEST(EntropyProviderTest, PermutedEntropyIsUniform) {
325   for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
326     PermutedEntropyGenerator entropy_generator(kTestTrialNames[i]);
327     PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
328   }
329 }
330 
TEST(EntropyProviderTest,SeededRandGeneratorIsUniform)331 TEST(EntropyProviderTest, SeededRandGeneratorIsUniform) {
332   // Verifies that SeededRandGenerator has a uniform distribution.
333   //
334   // Mirrors RandUtilTest.RandGeneratorIsUniform in base/rand_util_unittest.cc.
335 
336   const uint32 kTopOfRange = (std::numeric_limits<uint32>::max() / 4ULL) * 3ULL;
337   const uint32 kExpectedAverage = kTopOfRange / 2ULL;
338   const uint32 kAllowedVariance = kExpectedAverage / 50ULL;  // +/- 2%
339   const int kMinAttempts = 1000;
340   const int kMaxAttempts = 1000000;
341 
342   for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
343     const uint32 seed = HashName(kTestTrialNames[i]);
344     internal::SeededRandGenerator rand_generator(seed);
345 
346     double cumulative_average = 0.0;
347     int count = 0;
348     while (count < kMaxAttempts) {
349       uint32 value = rand_generator(kTopOfRange);
350       cumulative_average = (count * cumulative_average + value) / (count + 1);
351 
352       // Don't quit too quickly for things to start converging, or we may have
353       // a false positive.
354       if (count > kMinAttempts &&
355           kExpectedAverage - kAllowedVariance < cumulative_average &&
356           cumulative_average < kExpectedAverage + kAllowedVariance) {
357         break;
358       }
359 
360       ++count;
361     }
362 
363     ASSERT_LT(count, kMaxAttempts) << "Expected average was " <<
364         kExpectedAverage << ", average ended at " << cumulative_average <<
365         ", for trial " << kTestTrialNames[i];
366   }
367 }
368 
369 }  // namespace metrics
370