#!/usr/bin/env python # Copyright 2016 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """Tests for results_stats.""" import os import sys import unittest try: import numpy as np except ImportError: np = None sys.path.insert(1, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) from statistical_analysis import results_stats class StatisticalBenchmarkResultsAnalysisTest(unittest.TestCase): """Unit testing of several functions in results_stats.""" def testGetChartsFromBenchmarkResultJson(self): """Unit test for errors raised when getting the charts element. Also makes sure that the 'trace' element is deleted if it exists. """ input_json_wrong_format = {'charts_wrong': {}} input_json_empty = {'charts': {}} with self.assertRaises(ValueError): (results_stats.GetChartsFromBenchmarkResultJson(input_json_wrong_format)) with self.assertRaises(ValueError): (results_stats.GetChartsFromBenchmarkResultJson(input_json_empty)) input_json_with_trace = {'charts': {'trace': {}, 'Ex_metric_1': {'Ex_page_1': {'type': 'list_of_scalar_values', 'values': [1, 2]}, 'Ex_page_2': {'type': 'histogram', 'values': [1, 2]}}, 'Ex_metric_2': {'Ex_page_1': {'type': 'list_of_scalar_values'}, 'Ex_page_2': {'type': 'list_of_scalar_values', 'values': [1, 2]}}}} output = (results_stats. GetChartsFromBenchmarkResultJson(input_json_with_trace)) expected_output = {'Ex_metric_1': {'Ex_page_1': {'type': 'list_of_scalar_values', 'values': [1, 2]}}, 'Ex_metric_2': {'Ex_page_2': {'type': 'list_of_scalar_values', 'values': [1, 2]}}} self.assertEqual(output, expected_output) def testCreateBenchmarkResultDict(self): """Unit test for benchmark result dict created from a benchmark json. Creates a json of the format created by tools/perf/run_benchmark and then compares the output dict against an expected predefined output dict. """ metric_names = ['messageloop_start_time', 'open_tabs_time', 'window_display_time'] metric_values = [[55, 72, 60], [54, 42, 65], [44, 89]] input_json = {'charts': {}} for metric, metric_vals in zip(metric_names, metric_values): input_json['charts'][metric] = {'summary': {'values': metric_vals, 'type': 'list_of_scalar_values'}} output = results_stats.CreateBenchmarkResultDict(input_json) expected_output = {'messageloop_start_time': [55, 72, 60], 'open_tabs_time': [54, 42, 65], 'window_display_time': [44, 89]} self.assertEqual(output, expected_output) def testCreatePagesetBenchmarkResultDict(self): """Unit test for pageset benchmark result dict created from benchmark json. Creates a json of the format created by tools/perf/run_benchmark when it includes a pageset and then compares the output dict against an expected predefined output dict. """ metric_names = ['messageloop_start_time', 'open_tabs_time', 'window_display_time'] metric_values = [[55, 72, 60], [54, 42, 65], [44, 89]] page_names = ['Ex_page_1', 'Ex_page_2'] input_json = {'charts': {}} for metric, metric_vals in zip(metric_names, metric_values): input_json['charts'][metric] = {'summary': {'values': [0, 1, 2, 3], 'type': 'list_of_scalar_values'}} for page in page_names: input_json['charts'][metric][page] = {'values': metric_vals, 'type': 'list_of_scalar_values'} output = results_stats.CreatePagesetBenchmarkResultDict(input_json) expected_output = {'messageloop_start_time': {'Ex_page_1': [55, 72, 60], 'Ex_page_2': [55, 72, 60]}, 'open_tabs_time': {'Ex_page_1': [54, 42, 65], 'Ex_page_2': [54, 42, 65]}, 'window_display_time': {'Ex_page_1': [44, 89], 'Ex_page_2': [44, 89]}} self.assertEqual(output, expected_output) def testCombinePValues(self): """Unit test for Fisher's Method that combines multiple p-values.""" test_p_values = [0.05, 0.04, 0.10, 0.07, 0.01] expected_output = 0.00047334256271885721 output = results_stats.CombinePValues(test_p_values) self.assertEqual(output, expected_output) def CreateRandomNormalDistribution(self, mean=0, size=30): """Creates two pseudo random samples for testing in multiple methods.""" if not np: raise ImportError('This function requires Numpy.') np.random.seed(0) sample = np.random.normal(loc=mean, scale=1, size=size) return sample def testIsNormallyDistributed(self): """Unit test for values returned when testing for normality.""" if not np: self.skipTest("Numpy is not installed.") test_samples = [self.CreateRandomNormalDistribution(0), self.CreateRandomNormalDistribution(1)] expected_outputs = [(True, 0.5253966450691223), (True, 0.5253913402557373)] for sample, expected_output in zip(test_samples, expected_outputs): output = results_stats.IsNormallyDistributed(sample) self.assertEqual(output, expected_output) def testAreSamplesDifferent(self): """Unit test for values returned after running the statistical tests. Creates two pseudo-random normally distributed samples to run the statistical tests and compares the resulting answer and p-value against their pre-calculated values. """ test_samples = [3 * [0, 0, 2, 4, 4], 3 * [5, 5, 7, 9, 9]] with self.assertRaises(results_stats.SampleSizeError): results_stats.AreSamplesDifferent(test_samples[0], test_samples[1], test=results_stats.MANN) with self.assertRaises(results_stats.NonNormalSampleError): results_stats.AreSamplesDifferent(test_samples[0], test_samples[1], test=results_stats.WELCH) test_samples_equal = (20 * [1], 20 * [1]) expected_output_equal = (False, 1.0) output_equal = results_stats.AreSamplesDifferent(test_samples_equal[0], test_samples_equal[1], test=results_stats.MANN) self.assertEqual(output_equal, expected_output_equal) if not np: self.skipTest("Numpy is not installed.") test_samples = [self.CreateRandomNormalDistribution(0), self.CreateRandomNormalDistribution(1)] test_options = results_stats.ALL_TEST_OPTIONS expected_outputs = [(True, 2 * 0.00068516628052438266), (True, 0.0017459498829507842), (True, 0.00084765230478226514)] for test, expected_output in zip(test_options, expected_outputs): output = results_stats.AreSamplesDifferent(test_samples[0], test_samples[1], test=test) self.assertEqual(output, expected_output) def testAssertThatKeysMatch(self): """Unit test for exception raised when input dicts' metrics don't match.""" differing_input_dicts = [{'messageloop_start_time': [55, 72, 60], 'display_time': [44, 89]}, {'messageloop_start_time': [55, 72, 60]}] with self.assertRaises(results_stats.DictMismatchError): results_stats.AssertThatKeysMatch(differing_input_dicts[0], differing_input_dicts[1]) def testAreBenchmarkResultsDifferent(self): """Unit test for statistical test outcome dict.""" test_input_dicts = [{'open_tabs_time': self.CreateRandomNormalDistribution(0), 'display_time': self.CreateRandomNormalDistribution(0)}, {'open_tabs_time': self.CreateRandomNormalDistribution(0), 'display_time': self.CreateRandomNormalDistribution(1)}] test_options = results_stats.ALL_TEST_OPTIONS expected_outputs = [{'open_tabs_time': (False, 2 * 0.49704973080841425), 'display_time': (True, 2 * 0.00068516628052438266)}, {'open_tabs_time': (False, 1.0), 'display_time': (True, 0.0017459498829507842)}, {'open_tabs_time': (False, 1.0), 'display_time': (True, 0.00084765230478226514)}] for test, expected_output in zip(test_options, expected_outputs): output = results_stats.AreBenchmarkResultsDifferent(test_input_dicts[0], test_input_dicts[1], test=test) self.assertEqual(output, expected_output) def testArePagesetBenchmarkResultsDifferent(self): """Unit test for statistical test outcome dict.""" distributions = (self.CreateRandomNormalDistribution(0), self.CreateRandomNormalDistribution(1)) test_input_dicts = ({'open_tabs_time': {'Ex_page_1': distributions[0], 'Ex_page_2': distributions[0]}, 'display_time': {'Ex_page_1': distributions[1], 'Ex_page_2': distributions[1]}}, {'open_tabs_time': {'Ex_page_1': distributions[0], 'Ex_page_2': distributions[1]}, 'display_time': {'Ex_page_1': distributions[1], 'Ex_page_2': distributions[0]}}) test_options = results_stats.ALL_TEST_OPTIONS expected_outputs = ({'open_tabs_time': # Mann. {'Ex_page_1': (False, 2 * 0.49704973080841425), 'Ex_page_2': (True, 2 * 0.00068516628052438266)}, 'display_time': {'Ex_page_1': (False, 2 * 0.49704973080841425), 'Ex_page_2': (True, 2 * 0.00068516628052438266)}}, {'open_tabs_time': # Kolmogorov. {'Ex_page_1': (False, 1.0), 'Ex_page_2': (True, 0.0017459498829507842)}, 'display_time': {'Ex_page_1': (False, 1.0), 'Ex_page_2': (True, 0.0017459498829507842)}}, {'open_tabs_time': # Welch. {'Ex_page_1': (False, 1.0), 'Ex_page_2': (True, 0.00084765230478226514)}, 'display_time': {'Ex_page_1': (False, 1.0), 'Ex_page_2': (True, 0.00084765230478226514)}}) for test, expected_output in zip(test_options, expected_outputs): output = (results_stats. ArePagesetBenchmarkResultsDifferent(test_input_dicts[0], test_input_dicts[1], test=test)) self.assertEqual(output, expected_output) if __name__ == '__main__': sys.exit(unittest.main())