• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3#
4# Use of this source code is governed by a BSD-style license
5# that can be found in the LICENSE file in the root of the source
6# tree. An additional intellectual property rights grant can be found
7# in the file PATENTS.  All contributing project authors may
8# be found in the AUTHORS file in the root of the source tree.
9"""Finds the APM configuration that maximizes a provided metric by
10parsing the output generated apm_quality_assessment.py.
11"""
12
13from __future__ import division
14
15import collections
16import logging
17import os
18
19import quality_assessment.data_access as data_access
20import quality_assessment.collect_data as collect_data
21
22
23def _InstanceArgumentsParser():
24    """Arguments parser factory. Extends the arguments from 'collect_data'
25  with a few extra for selecting what parameters to optimize for.
26  """
27    parser = collect_data.InstanceArgumentsParser()
28    parser.description = (
29        'Rudimentary optimization of a function over different parameter'
30        'combinations.')
31
32    parser.add_argument(
33        '-n',
34        '--config_dir',
35        required=False,
36        help=('path to the folder with the configuration files'),
37        default='apm_configs')
38
39    parser.add_argument('-p',
40                        '--params',
41                        required=True,
42                        nargs='+',
43                        help=('parameters to parse from the config files in'
44                              'config_dir'))
45
46    parser.add_argument(
47        '-z',
48        '--params_not_to_optimize',
49        required=False,
50        nargs='+',
51        default=[],
52        help=('parameters from `params` not to be optimized for'))
53
54    return parser
55
56
57def _ConfigurationAndScores(data_frame, params, params_not_to_optimize,
58                            config_dir):
59    """Returns a list of all configurations and scores.
60
61  Args:
62    data_frame: A pandas data frame with the scores and config name
63                returned by _FindScores.
64    params: The parameter names to parse from configs the config
65            directory
66
67    params_not_to_optimize: The parameter names which shouldn't affect
68                            the optimal parameter
69                            selection. E.g., fixed settings and not
70                            tunable parameters.
71
72    config_dir: Path to folder with config files.
73
74  Returns:
75    Dictionary of the form
76    {param_combination: [{params: {param1: value1, ...},
77                          scores: {score1: value1, ...}}]}.
78
79    The key `param_combination` runs over all parameter combinations
80    of the parameters in `params` and not in
81    `params_not_to_optimize`. A corresponding value is a list of all
82    param combinations for params in `params_not_to_optimize` and
83    their scores.
84  """
85    results = collections.defaultdict(list)
86    config_names = data_frame['apm_config'].drop_duplicates().values.tolist()
87    score_names = data_frame['eval_score_name'].drop_duplicates(
88    ).values.tolist()
89
90    # Normalize the scores
91    normalization_constants = {}
92    for score_name in score_names:
93        scores = data_frame[data_frame.eval_score_name == score_name].score
94        normalization_constants[score_name] = max(scores)
95
96    params_to_optimize = [p for p in params if p not in params_not_to_optimize]
97    param_combination = collections.namedtuple("ParamCombination",
98                                               params_to_optimize)
99
100    for config_name in config_names:
101        config_json = data_access.AudioProcConfigFile.Load(
102            os.path.join(config_dir, config_name + ".json"))
103        scores = {}
104        data_cell = data_frame[data_frame.apm_config == config_name]
105        for score_name in score_names:
106            data_cell_scores = data_cell[data_cell.eval_score_name ==
107                                         score_name].score
108            scores[score_name] = sum(data_cell_scores) / len(data_cell_scores)
109            scores[score_name] /= normalization_constants[score_name]
110
111        result = {'scores': scores, 'params': {}}
112        config_optimize_params = {}
113        for param in params:
114            if param in params_to_optimize:
115                config_optimize_params[param] = config_json['-' + param]
116            else:
117                result['params'][param] = config_json['-' + param]
118
119        current_param_combination = param_combination(**config_optimize_params)
120        results[current_param_combination].append(result)
121    return results
122
123
124def _FindOptimalParameter(configs_and_scores, score_weighting):
125    """Finds the config producing the maximal score.
126
127  Args:
128    configs_and_scores: structure of the form returned by
129                        _ConfigurationAndScores
130
131    score_weighting: a function to weight together all score values of
132                     the form [{params: {param1: value1, ...}, scores:
133                                {score1: value1, ...}}] into a numeric
134                     value
135  Returns:
136    the config that has the largest values of `score_weighting` applied
137    to its scores.
138  """
139
140    min_score = float('+inf')
141    best_params = None
142    for config in configs_and_scores:
143        scores_and_params = configs_and_scores[config]
144        current_score = score_weighting(scores_and_params)
145        if current_score < min_score:
146            min_score = current_score
147            best_params = config
148            logging.debug("Score: %f", current_score)
149            logging.debug("Config: %s", str(config))
150    return best_params
151
152
153def _ExampleWeighting(scores_and_configs):
154    """Example argument to `_FindOptimalParameter`
155  Args:
156    scores_and_configs: a list of configs and scores, in the form
157                        described in _FindOptimalParameter
158  Returns:
159    numeric value, the sum of all scores
160  """
161    res = 0
162    for score_config in scores_and_configs:
163        res += sum(score_config['scores'].values())
164    return res
165
166
167def main():
168    # Init.
169    # TODO(alessiob): INFO once debugged.
170    logging.basicConfig(level=logging.DEBUG)
171    parser = _InstanceArgumentsParser()
172    args = parser.parse_args()
173
174    # Get the scores.
175    src_path = collect_data.ConstructSrcPath(args)
176    logging.debug('Src path <%s>', src_path)
177    scores_data_frame = collect_data.FindScores(src_path, args)
178    all_scores = _ConfigurationAndScores(scores_data_frame, args.params,
179                                         args.params_not_to_optimize,
180                                         args.config_dir)
181
182    opt_param = _FindOptimalParameter(all_scores, _ExampleWeighting)
183
184    logging.info('Optimal parameter combination: <%s>', opt_param)
185    logging.info('It\'s score values: <%s>', all_scores[opt_param])
186
187
188if __name__ == "__main__":
189    main()
190