1#!/usr/bin/env python 2# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. 3# 4# Use of this source code is governed by a BSD-style license 5# that can be found in the LICENSE file in the root of the source 6# tree. An additional intellectual property rights grant can be found 7# in the file PATENTS. All contributing project authors may 8# be found in the AUTHORS file in the root of the source tree. 9"""Finds the APM configuration that maximizes a provided metric by 10parsing the output generated apm_quality_assessment.py. 11""" 12 13from __future__ import division 14 15import collections 16import logging 17import os 18 19import quality_assessment.data_access as data_access 20import quality_assessment.collect_data as collect_data 21 22 23def _InstanceArgumentsParser(): 24 """Arguments parser factory. Extends the arguments from 'collect_data' 25 with a few extra for selecting what parameters to optimize for. 26 """ 27 parser = collect_data.InstanceArgumentsParser() 28 parser.description = ( 29 'Rudimentary optimization of a function over different parameter' 30 'combinations.') 31 32 parser.add_argument( 33 '-n', 34 '--config_dir', 35 required=False, 36 help=('path to the folder with the configuration files'), 37 default='apm_configs') 38 39 parser.add_argument('-p', 40 '--params', 41 required=True, 42 nargs='+', 43 help=('parameters to parse from the config files in' 44 'config_dir')) 45 46 parser.add_argument( 47 '-z', 48 '--params_not_to_optimize', 49 required=False, 50 nargs='+', 51 default=[], 52 help=('parameters from `params` not to be optimized for')) 53 54 return parser 55 56 57def _ConfigurationAndScores(data_frame, params, params_not_to_optimize, 58 config_dir): 59 """Returns a list of all configurations and scores. 60 61 Args: 62 data_frame: A pandas data frame with the scores and config name 63 returned by _FindScores. 64 params: The parameter names to parse from configs the config 65 directory 66 67 params_not_to_optimize: The parameter names which shouldn't affect 68 the optimal parameter 69 selection. E.g., fixed settings and not 70 tunable parameters. 71 72 config_dir: Path to folder with config files. 73 74 Returns: 75 Dictionary of the form 76 {param_combination: [{params: {param1: value1, ...}, 77 scores: {score1: value1, ...}}]}. 78 79 The key `param_combination` runs over all parameter combinations 80 of the parameters in `params` and not in 81 `params_not_to_optimize`. A corresponding value is a list of all 82 param combinations for params in `params_not_to_optimize` and 83 their scores. 84 """ 85 results = collections.defaultdict(list) 86 config_names = data_frame['apm_config'].drop_duplicates().values.tolist() 87 score_names = data_frame['eval_score_name'].drop_duplicates( 88 ).values.tolist() 89 90 # Normalize the scores 91 normalization_constants = {} 92 for score_name in score_names: 93 scores = data_frame[data_frame.eval_score_name == score_name].score 94 normalization_constants[score_name] = max(scores) 95 96 params_to_optimize = [p for p in params if p not in params_not_to_optimize] 97 param_combination = collections.namedtuple("ParamCombination", 98 params_to_optimize) 99 100 for config_name in config_names: 101 config_json = data_access.AudioProcConfigFile.Load( 102 os.path.join(config_dir, config_name + ".json")) 103 scores = {} 104 data_cell = data_frame[data_frame.apm_config == config_name] 105 for score_name in score_names: 106 data_cell_scores = data_cell[data_cell.eval_score_name == 107 score_name].score 108 scores[score_name] = sum(data_cell_scores) / len(data_cell_scores) 109 scores[score_name] /= normalization_constants[score_name] 110 111 result = {'scores': scores, 'params': {}} 112 config_optimize_params = {} 113 for param in params: 114 if param in params_to_optimize: 115 config_optimize_params[param] = config_json['-' + param] 116 else: 117 result['params'][param] = config_json['-' + param] 118 119 current_param_combination = param_combination(**config_optimize_params) 120 results[current_param_combination].append(result) 121 return results 122 123 124def _FindOptimalParameter(configs_and_scores, score_weighting): 125 """Finds the config producing the maximal score. 126 127 Args: 128 configs_and_scores: structure of the form returned by 129 _ConfigurationAndScores 130 131 score_weighting: a function to weight together all score values of 132 the form [{params: {param1: value1, ...}, scores: 133 {score1: value1, ...}}] into a numeric 134 value 135 Returns: 136 the config that has the largest values of `score_weighting` applied 137 to its scores. 138 """ 139 140 min_score = float('+inf') 141 best_params = None 142 for config in configs_and_scores: 143 scores_and_params = configs_and_scores[config] 144 current_score = score_weighting(scores_and_params) 145 if current_score < min_score: 146 min_score = current_score 147 best_params = config 148 logging.debug("Score: %f", current_score) 149 logging.debug("Config: %s", str(config)) 150 return best_params 151 152 153def _ExampleWeighting(scores_and_configs): 154 """Example argument to `_FindOptimalParameter` 155 Args: 156 scores_and_configs: a list of configs and scores, in the form 157 described in _FindOptimalParameter 158 Returns: 159 numeric value, the sum of all scores 160 """ 161 res = 0 162 for score_config in scores_and_configs: 163 res += sum(score_config['scores'].values()) 164 return res 165 166 167def main(): 168 # Init. 169 # TODO(alessiob): INFO once debugged. 170 logging.basicConfig(level=logging.DEBUG) 171 parser = _InstanceArgumentsParser() 172 args = parser.parse_args() 173 174 # Get the scores. 175 src_path = collect_data.ConstructSrcPath(args) 176 logging.debug('Src path <%s>', src_path) 177 scores_data_frame = collect_data.FindScores(src_path, args) 178 all_scores = _ConfigurationAndScores(scores_data_frame, args.params, 179 args.params_not_to_optimize, 180 args.config_dir) 181 182 opt_param = _FindOptimalParameter(all_scores, _ExampleWeighting) 183 184 logging.info('Optimal parameter combination: <%s>', opt_param) 185 logging.info('It\'s score values: <%s>', all_scores[opt_param]) 186 187 188if __name__ == "__main__": 189 main() 190