1#!/usr/bin/env python 2# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. 3# 4# Use of this source code is governed by a BSD-style license 5# that can be found in the LICENSE file in the root of the source 6# tree. An additional intellectual property rights grant can be found 7# in the file PATENTS. All contributing project authors may 8# be found in the AUTHORS file in the root of the source tree. 9 10"""Finds the APM configuration that maximizes a provided metric by 11parsing the output generated apm_quality_assessment.py. 12""" 13 14from __future__ import division 15 16import collections 17import logging 18import os 19 20import quality_assessment.data_access as data_access 21import quality_assessment.collect_data as collect_data 22 23def _InstanceArgumentsParser(): 24 """Arguments parser factory. Extends the arguments from 'collect_data' 25 with a few extra for selecting what parameters to optimize for. 26 """ 27 parser = collect_data.InstanceArgumentsParser() 28 parser.description = ( 29 'Rudimentary optimization of a function over different parameter' 30 'combinations.') 31 32 parser.add_argument('-n', '--config_dir', required=False, 33 help=('path to the folder with the configuration files'), 34 default='apm_configs') 35 36 parser.add_argument('-p', '--params', required=True, nargs='+', 37 help=('parameters to parse from the config files in' 38 'config_dir')) 39 40 parser.add_argument('-z', '--params_not_to_optimize', required=False, 41 nargs='+', default=[], 42 help=('parameters from `params` not to be optimized for')) 43 44 return parser 45 46 47def _ConfigurationAndScores(data_frame, params, 48 params_not_to_optimize, config_dir): 49 """Returns a list of all configurations and scores. 50 51 Args: 52 data_frame: A pandas data frame with the scores and config name 53 returned by _FindScores. 54 params: The parameter names to parse from configs the config 55 directory 56 57 params_not_to_optimize: The parameter names which shouldn't affect 58 the optimal parameter 59 selection. E.g., fixed settings and not 60 tunable parameters. 61 62 config_dir: Path to folder with config files. 63 64 Returns: 65 Dictionary of the form 66 {param_combination: [{params: {param1: value1, ...}, 67 scores: {score1: value1, ...}}]}. 68 69 The key `param_combination` runs over all parameter combinations 70 of the parameters in `params` and not in 71 `params_not_to_optimize`. A corresponding value is a list of all 72 param combinations for params in `params_not_to_optimize` and 73 their scores. 74 """ 75 results = collections.defaultdict(list) 76 config_names = data_frame['apm_config'].drop_duplicates().values.tolist() 77 score_names = data_frame['eval_score_name'].drop_duplicates().values.tolist() 78 79 # Normalize the scores 80 normalization_constants = {} 81 for score_name in score_names: 82 scores = data_frame[data_frame.eval_score_name == score_name].score 83 normalization_constants[score_name] = max(scores) 84 85 params_to_optimize = [p for p in params if p not in params_not_to_optimize] 86 param_combination = collections.namedtuple("ParamCombination", 87 params_to_optimize) 88 89 for config_name in config_names: 90 config_json = data_access.AudioProcConfigFile.Load( 91 os.path.join(config_dir, config_name + ".json")) 92 scores = {} 93 data_cell = data_frame[data_frame.apm_config == config_name] 94 for score_name in score_names: 95 data_cell_scores = data_cell[data_cell.eval_score_name == 96 score_name].score 97 scores[score_name] = sum(data_cell_scores) / len(data_cell_scores) 98 scores[score_name] /= normalization_constants[score_name] 99 100 result = {'scores': scores, 'params': {}} 101 config_optimize_params = {} 102 for param in params: 103 if param in params_to_optimize: 104 config_optimize_params[param] = config_json['-' + param] 105 else: 106 result['params'][param] = config_json['-' + param] 107 108 current_param_combination = param_combination( 109 **config_optimize_params) 110 results[current_param_combination].append(result) 111 return results 112 113 114def _FindOptimalParameter(configs_and_scores, score_weighting): 115 """Finds the config producing the maximal score. 116 117 Args: 118 configs_and_scores: structure of the form returned by 119 _ConfigurationAndScores 120 121 score_weighting: a function to weight together all score values of 122 the form [{params: {param1: value1, ...}, scores: 123 {score1: value1, ...}}] into a numeric 124 value 125 Returns: 126 the config that has the largest values of |score_weighting| applied 127 to its scores. 128 """ 129 130 min_score = float('+inf') 131 best_params = None 132 for config in configs_and_scores: 133 scores_and_params = configs_and_scores[config] 134 current_score = score_weighting(scores_and_params) 135 if current_score < min_score: 136 min_score = current_score 137 best_params = config 138 logging.debug("Score: %f", current_score) 139 logging.debug("Config: %s", str(config)) 140 return best_params 141 142 143def _ExampleWeighting(scores_and_configs): 144 """Example argument to `_FindOptimalParameter` 145 Args: 146 scores_and_configs: a list of configs and scores, in the form 147 described in _FindOptimalParameter 148 Returns: 149 numeric value, the sum of all scores 150 """ 151 res = 0 152 for score_config in scores_and_configs: 153 res += sum(score_config['scores'].values()) 154 return res 155 156 157def main(): 158 # Init. 159 # TODO(alessiob): INFO once debugged. 160 logging.basicConfig(level=logging.DEBUG) 161 parser = _InstanceArgumentsParser() 162 args = parser.parse_args() 163 164 # Get the scores. 165 src_path = collect_data.ConstructSrcPath(args) 166 logging.debug('Src path <%s>', src_path) 167 scores_data_frame = collect_data.FindScores(src_path, args) 168 all_scores = _ConfigurationAndScores(scores_data_frame, 169 args.params, 170 args.params_not_to_optimize, 171 args.config_dir) 172 173 opt_param = _FindOptimalParameter(all_scores, _ExampleWeighting) 174 175 logging.info('Optimal parameter combination: <%s>', opt_param) 176 logging.info('It\'s score values: <%s>', all_scores[opt_param]) 177 178if __name__ == "__main__": 179 main() 180