• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2013 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Extract histogram names from the description XML file.
6
7For more information on the format of the XML file, which is self-documenting,
8see histograms.xml; however, here is a simple example to get you started. The
9XML below will generate the following five histograms:
10
11    HistogramTime
12    HistogramEnum
13    HistogramEnum_Chrome
14    HistogramEnum_IE
15    HistogramEnum_Firefox
16
17<histogram-configuration>
18
19<histograms>
20
21<histogram name="HistogramTime" units="milliseconds">
22  <summary>A brief description.</summary>
23  <details>This is a more thorough description of this histogram.</details>
24</histogram>
25
26<histogram name="HistogramEnum" enum="MyEnumType">
27  <summary>This histogram sports an enum value type.</summary>
28</histogram>
29
30</histograms>
31
32<enums>
33
34<enum name="MyEnumType">
35  <summary>This is an example enum type, where the values mean little.</summary>
36  <int value="1" label="FIRST_VALUE">This is the first value.</int>
37  <int value="2" label="SECOND_VALUE">This is the second value.</int>
38</enum>
39
40</enums>
41
42<histogram_suffixes_list>
43
44<histogram_suffixes name="BrowserType">
45  <group name="Chrome"/>
46  <group name="IE"/>
47  <group name="Firefox"/>
48  <affected-histogram name="HistogramEnum"/>
49</histogram_suffixes>
50
51</histogram_suffixes_list>
52
53</histogram-configuration>
54
55"""
56
57import copy
58import logging
59import xml.dom.minidom
60
61OWNER_FIELD_PLACEHOLDER = (
62    'Please list the metric\'s owners. Add more owner tags as needed.')
63
64MAX_HISTOGRAM_SUFFIX_DEPENDENCY_DEPTH = 5
65
66
67class Error(Exception):
68  pass
69
70
71def _JoinChildNodes(tag):
72  """Join child nodes into a single text.
73
74  Applicable to leafs like 'summary' and 'detail'.
75
76  Args:
77    tag: parent node
78
79  Returns:
80    a string with concatenated nodes' text representation.
81  """
82  return ''.join(c.toxml() for c in tag.childNodes).strip()
83
84
85def _NormalizeString(s):
86  """Replaces all whitespace sequences with a single space.
87
88  The function properly handles multi-line strings.
89
90  Args:
91    s: The string to normalize, ('  \\n a  b c\\n d  ').
92
93  Returns:
94    The normalized string (a b c d).
95  """
96  return ' '.join(s.split())
97
98
99def _NormalizeAllAttributeValues(node):
100  """Recursively normalizes all tag attribute values in the given tree.
101
102  Args:
103    node: The minidom node to be normalized.
104
105  Returns:
106    The normalized minidom node.
107  """
108  if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE:
109    for a in node.attributes.keys():
110      node.attributes[a].value = _NormalizeString(node.attributes[a].value)
111
112  for c in node.childNodes:
113    _NormalizeAllAttributeValues(c)
114  return node
115
116
117def _ExpandHistogramNameWithSuffixes(suffix_name, histogram_name,
118                                     histogram_suffixes_node):
119  """Creates a new histogram name based on a histogram suffix.
120
121  Args:
122    suffix_name: The suffix string to apply to the histogram name. May be empty.
123    histogram_name: The name of the histogram. May be of the form
124      Group.BaseName or BaseName.
125    histogram_suffixes_node: The histogram_suffixes XML node.
126
127  Returns:
128    A string with the expanded histogram name.
129
130  Raises:
131    Error: if the expansion can't be done.
132  """
133  if histogram_suffixes_node.hasAttribute('separator'):
134    separator = histogram_suffixes_node.getAttribute('separator')
135  else:
136    separator = '_'
137
138  if histogram_suffixes_node.hasAttribute('ordering'):
139    ordering = histogram_suffixes_node.getAttribute('ordering')
140  else:
141    ordering = 'suffix'
142  if ordering not in ['prefix', 'suffix']:
143    logging.error('ordering needs to be prefix or suffix, value is %s',
144                  ordering)
145    raise Error()
146
147  if not suffix_name:
148    return histogram_name
149
150  if ordering == 'suffix':
151    return histogram_name + separator + suffix_name
152
153  # For prefixes, the suffix_name is inserted between the "cluster" and the
154  # "remainder", e.g. Foo.BarHist expanded with gamma becomes Foo.gamma_BarHist.
155  sections = histogram_name.split('.')
156  if len(sections) <= 1:
157    logging.error(
158        'Prefix Field Trial expansions require histogram names which include a '
159        'dot separator. Histogram name is %s, and Field Trial is %s',
160        histogram_name, histogram_suffixes_node.getAttribute('name'))
161    raise Error()
162
163  cluster = sections[0] + '.'
164  remainder = '.'.join(sections[1:])
165  return cluster + suffix_name + separator + remainder
166
167
168def _ExtractEnumsFromXmlTree(tree):
169  """Extract all <enum> nodes in the tree into a dictionary."""
170
171  enums = {}
172  have_errors = False
173
174  last_name = None
175  for enum in tree.getElementsByTagName('enum'):
176    if enum.getAttribute('type') != 'int':
177      logging.error('Unknown enum type %s', enum.getAttribute('type'))
178      have_errors = True
179      continue
180
181    name = enum.getAttribute('name')
182    if last_name is not None and name.lower() < last_name.lower():
183      logging.error('Enums %s and %s are not in alphabetical order',
184                    last_name, name)
185      have_errors = True
186    last_name = name
187
188    if name in enums:
189      logging.error('Duplicate enum %s', name)
190      have_errors = True
191      continue
192
193    last_int_value = None
194    enum_dict = {}
195    enum_dict['name'] = name
196    enum_dict['values'] = {}
197
198    for int_tag in enum.getElementsByTagName('int'):
199      value_dict = {}
200      int_value = int(int_tag.getAttribute('value'))
201      if last_int_value is not None and int_value < last_int_value:
202        logging.error('Enum %s int values %d and %d are not in numerical order',
203                      name, last_int_value, int_value)
204        have_errors = True
205      last_int_value = int_value
206      if int_value in enum_dict['values']:
207        logging.error('Duplicate enum value %d for enum %s', int_value, name)
208        have_errors = True
209        continue
210      value_dict['label'] = int_tag.getAttribute('label')
211      value_dict['summary'] = _JoinChildNodes(int_tag)
212      enum_dict['values'][int_value] = value_dict
213
214    summary_nodes = enum.getElementsByTagName('summary')
215    if summary_nodes:
216      enum_dict['summary'] = _NormalizeString(_JoinChildNodes(summary_nodes[0]))
217
218    enums[name] = enum_dict
219
220  return enums, have_errors
221
222
223def _ExtractOwners(xml_node):
224  """Extract all owners into a list from owner tag under |xml_node|."""
225  owners = []
226  for owner_node in xml_node.getElementsByTagName('owner'):
227    owner_entry = _NormalizeString(_JoinChildNodes(owner_node))
228    if OWNER_FIELD_PLACEHOLDER not in owner_entry:
229      owners.append(owner_entry)
230  return owners
231
232
233def _ExtractHistogramsFromXmlTree(tree, enums):
234  """Extract all <histogram> nodes in the tree into a dictionary."""
235
236  # Process the histograms. The descriptions can include HTML tags.
237  histograms = {}
238  have_errors = False
239  last_name = None
240  for histogram in tree.getElementsByTagName('histogram'):
241    name = histogram.getAttribute('name')
242    if last_name is not None and name.lower() < last_name.lower():
243      logging.error('Histograms %s and %s are not in alphabetical order',
244                    last_name, name)
245      have_errors = True
246    last_name = name
247    if name in histograms:
248      logging.error('Duplicate histogram definition %s', name)
249      have_errors = True
250      continue
251    histograms[name] = histogram_entry = {}
252
253    # Find <owner> tag.
254    owners = _ExtractOwners(histogram)
255    if owners:
256      histogram_entry['owners'] = owners
257
258    # Find <summary> tag.
259    summary_nodes = histogram.getElementsByTagName('summary')
260    if summary_nodes:
261      histogram_entry['summary'] = _NormalizeString(
262          _JoinChildNodes(summary_nodes[0]))
263    else:
264      histogram_entry['summary'] = 'TBD'
265
266    # Find <obsolete> tag.
267    obsolete_nodes = histogram.getElementsByTagName('obsolete')
268    if obsolete_nodes:
269      reason = _JoinChildNodes(obsolete_nodes[0])
270      histogram_entry['obsolete'] = reason
271
272    # Handle units.
273    if histogram.hasAttribute('units'):
274      histogram_entry['units'] = histogram.getAttribute('units')
275
276    # Find <details> tag.
277    details_nodes = histogram.getElementsByTagName('details')
278    if details_nodes:
279      histogram_entry['details'] = _NormalizeString(
280          _JoinChildNodes(details_nodes[0]))
281
282    # Handle enum types.
283    if histogram.hasAttribute('enum'):
284      enum_name = histogram.getAttribute('enum')
285      if enum_name not in enums:
286        logging.error('Unknown enum %s in histogram %s', enum_name, name)
287        have_errors = True
288      else:
289        histogram_entry['enum'] = enums[enum_name]
290
291  return histograms, have_errors
292
293
294def _UpdateHistogramsWithSuffixes(tree, histograms):
295  """Process <histogram_suffixes> tags and combine with affected histograms.
296
297  The histograms dictionary will be updated in-place by adding new histograms
298  created by combining histograms themselves with histogram_suffixes targeting
299  these histograms.
300
301  Args:
302    tree: XML dom tree.
303    histograms: a dictionary of histograms previously extracted from the tree;
304
305  Returns:
306    True if any errors were found.
307  """
308  have_errors = False
309
310  # TODO(yiyaoliu): Remove this part after fieldtrial is not used any more.
311  if tree.getElementsByTagName('histogram_suffixes'):
312    histogram_suffix_tag = 'histogram_suffixes'
313    suffix_tag = 'suffix'
314    with_tag = 'with-suffix'
315  else:
316    histogram_suffix_tag = 'fieldtrial'
317    suffix_tag = 'group'
318    with_tag = 'with-group'
319
320  # Verify order of histogram_suffixes fields first.
321  last_name = None
322  for histogram_suffixes in tree.getElementsByTagName(histogram_suffix_tag):
323    name = histogram_suffixes.getAttribute('name')
324    if last_name is not None and name.lower() < last_name.lower():
325      logging.error('histogram_suffixes %s and %s are not in alphabetical '
326                    'order', last_name, name)
327      have_errors = True
328    last_name = name
329
330  # histogram_suffixes can depend on other histogram_suffixes, so we need to be
331  # careful. Make a temporary copy of the list of histogram_suffixes to use as a
332  # queue. histogram_suffixes whose dependencies have not yet been processed
333  # will get relegated to the back of the queue to be processed later.
334  reprocess_queue = []
335  def GenerateHistogramSuffixes():
336    for f in tree.getElementsByTagName(histogram_suffix_tag):
337      yield 0, f
338    for r, f in reprocess_queue:
339      yield r, f
340
341  for reprocess_count, histogram_suffixes in GenerateHistogramSuffixes():
342    # Check dependencies first
343    dependencies_valid = True
344    affected_histograms = histogram_suffixes.getElementsByTagName(
345        'affected-histogram')
346    for affected_histogram in affected_histograms:
347      histogram_name = affected_histogram.getAttribute('name')
348      if histogram_name not in histograms:
349        # Base histogram is missing
350        dependencies_valid = False
351        missing_dependency = histogram_name
352        break
353    if not dependencies_valid:
354      if reprocess_count < MAX_HISTOGRAM_SUFFIX_DEPENDENCY_DEPTH:
355        reprocess_queue.append((reprocess_count + 1, histogram_suffixes))
356        continue
357      else:
358        logging.error('histogram_suffixes %s is missing its dependency %s',
359                      histogram_suffixes.getAttribute('name'),
360                      missing_dependency)
361        have_errors = True
362        continue
363
364    name = histogram_suffixes.getAttribute('name')
365    suffix_nodes = histogram_suffixes.getElementsByTagName(suffix_tag)
366    suffix_labels = {}
367    for suffix in suffix_nodes:
368      suffix_labels[suffix.getAttribute('name')] = suffix.getAttribute('label')
369    # Find owners list under current histogram_suffixes tag.
370    owners = _ExtractOwners(histogram_suffixes)
371
372    last_histogram_name = None
373    for affected_histogram in affected_histograms:
374      histogram_name = affected_histogram.getAttribute('name')
375      if (last_histogram_name is not None
376          and histogram_name.lower() < last_histogram_name.lower()):
377        logging.error('Affected histograms %s and %s of histogram_suffixes %s '
378                      'are not in alphabetical order',
379                      last_histogram_name, histogram_name, name)
380        have_errors = True
381      last_histogram_name = histogram_name
382      with_suffixes = affected_histogram.getElementsByTagName(with_tag)
383      if with_suffixes:
384        suffixes_to_add = with_suffixes
385      else:
386        suffixes_to_add = suffix_nodes
387      for suffix in suffixes_to_add:
388        suffix_name = suffix.getAttribute('name')
389        try:
390          new_histogram_name = _ExpandHistogramNameWithSuffixes(
391              suffix_name, histogram_name, histogram_suffixes)
392          if new_histogram_name != histogram_name:
393            histograms[new_histogram_name] = copy.deepcopy(
394                histograms[histogram_name])
395
396          suffix_label = suffix_labels.get(suffix_name, '')
397
398          # TODO(yiyaoliu): Rename these to be consistent with the new naming.
399          # It is kept unchanged for now to be it's used by dashboards.
400          if 'fieldtrial_groups' not in histograms[new_histogram_name]:
401            histograms[new_histogram_name]['fieldtrial_groups'] = []
402          histograms[new_histogram_name]['fieldtrial_groups'].append(
403              suffix_name)
404
405          if 'fieldtrial_names' not in histograms[new_histogram_name]:
406            histograms[new_histogram_name]['fieldtrial_names'] = []
407          histograms[new_histogram_name]['fieldtrial_names'].append(name)
408
409          if 'fieldtrial_labels' not in histograms[new_histogram_name]:
410            histograms[new_histogram_name]['fieldtrial_labels'] = []
411          histograms[new_histogram_name]['fieldtrial_labels'].append(
412              suffix_label)
413
414          # If no owners are added for this histogram-suffixes, it inherits the
415          # owners of its parents.
416          if owners:
417            histograms[new_histogram_name]['owners'] = owners
418
419        except Error:
420          have_errors = True
421
422  return have_errors
423
424
425def ExtractHistogramsFromFile(file_handle):
426  """Compute the histogram names and descriptions from the XML representation.
427
428  Args:
429    file_handle: A file or file-like with XML content.
430
431  Returns:
432    a tuple of (histograms, status) where histograms is a dictionary mapping
433    histogram names to dictionaries containing histogram descriptions and status
434    is a boolean indicating if errros were encoutered in processing.
435  """
436  tree = xml.dom.minidom.parse(file_handle)
437  _NormalizeAllAttributeValues(tree)
438
439  enums, enum_errors = _ExtractEnumsFromXmlTree(tree)
440  histograms, histogram_errors = _ExtractHistogramsFromXmlTree(tree, enums)
441  update_errors = _UpdateHistogramsWithSuffixes(tree, histograms)
442
443  return histograms, enum_errors or histogram_errors or update_errors
444
445
446def ExtractHistograms(filename):
447  """Load histogram definitions from a disk file.
448
449  Args:
450    filename: a file path to load data from.
451
452  Returns:
453    a dictionary of histogram descriptions.
454
455  Raises:
456    Error: if the file is not well-formatted.
457  """
458  with open(filename, 'r') as f:
459    histograms, had_errors = ExtractHistogramsFromFile(f)
460    if had_errors:
461      logging.error('Error parsing %s', filename)
462      raise Error()
463    return histograms
464
465
466def ExtractNames(histograms):
467  return sorted(histograms.keys())