• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python
2
3"""
4Copyright 2013 Google Inc.
5
6Use of this source code is governed by a BSD-style license that can be
7found in the LICENSE file.
8
9Repackage expected/actual GM results as needed by our HTML rebaseline viewer.
10"""
11
12# System-level imports
13import fnmatch
14import json
15import logging
16import os
17import re
18import sys
19import time
20
21# Imports from within Skia
22#
23# We need to add the 'gm' directory, so that we can import gm_json.py within
24# that directory.  That script allows us to parse the actual-results.json file
25# written out by the GM tool.
26# Make sure that the 'gm' dir is in the PYTHONPATH, but add it at the *end*
27# so any dirs that are already in the PYTHONPATH will be preferred.
28GM_DIRECTORY = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
29if GM_DIRECTORY not in sys.path:
30  sys.path.append(GM_DIRECTORY)
31import gm_json
32import imagediffdb
33
34IMAGE_FILENAME_RE = re.compile(gm_json.IMAGE_FILENAME_PATTERN)
35IMAGE_FILENAME_FORMATTER = '%s_%s.png'  # pass in (testname, config)
36
37FIELDS_PASSED_THRU_VERBATIM = [
38    gm_json.JSONKEY_EXPECTEDRESULTS_BUGS,
39    gm_json.JSONKEY_EXPECTEDRESULTS_IGNOREFAILURE,
40    gm_json.JSONKEY_EXPECTEDRESULTS_REVIEWED,
41]
42CATEGORIES_TO_SUMMARIZE = [
43    'builder', 'test', 'config', 'resultType',
44    gm_json.JSONKEY_EXPECTEDRESULTS_IGNOREFAILURE,
45    gm_json.JSONKEY_EXPECTEDRESULTS_REVIEWED,
46]
47
48RESULTS_ALL = 'all'
49RESULTS_FAILURES = 'failures'
50
51class Results(object):
52  """ Loads actual and expected results from all builders, supplying combined
53  reports as requested.
54
55  Once this object has been constructed, the results (in self._results[])
56  are immutable.  If you want to update the results based on updated JSON
57  file contents, you will need to create a new Results object."""
58
59  def __init__(self, actuals_root, expected_root, generated_images_root):
60    """
61    Args:
62      actuals_root: root directory containing all actual-results.json files
63      expected_root: root directory containing all expected-results.json files
64      generated_images_root: directory within which to create all pixel diffs;
65          if this directory does not yet exist, it will be created
66    """
67    self._image_diff_db = imagediffdb.ImageDiffDB(generated_images_root)
68    self._actuals_root = actuals_root
69    self._expected_root = expected_root
70    self._load_actual_and_expected()
71    self._timestamp = int(time.time())
72
73  def get_timestamp(self):
74    """Return the time at which this object was created, in seconds past epoch
75    (UTC).
76    """
77    return self._timestamp
78
79  def edit_expectations(self, modifications):
80    """Edit the expectations stored within this object and write them back
81    to disk.
82
83    Note that this will NOT update the results stored in self._results[] ;
84    in order to see those updates, you must instantiate a new Results object
85    based on the (now updated) files on disk.
86
87    Args:
88      modifications: a list of dictionaries, one for each expectation to update:
89
90         [
91           {
92             'builder': 'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Debug',
93             'test': 'bigmatrix',
94             'config': '8888',
95             'expectedHashType': 'bitmap-64bitMD5',
96             'expectedHashDigest': '10894408024079689926',
97             'bugs': [123, 456],
98             'ignore-failure': false,
99             'reviewed-by-human': true,
100           },
101           ...
102         ]
103
104    """
105    expected_builder_dicts = Results._read_dicts_from_root(self._expected_root)
106    for mod in modifications:
107      image_name = IMAGE_FILENAME_FORMATTER % (mod['test'], mod['config'])
108      # TODO(epoger): assumes a single allowed digest per test
109      allowed_digests = [[mod['expectedHashType'],
110                          int(mod['expectedHashDigest'])]]
111      new_expectations = {
112          gm_json.JSONKEY_EXPECTEDRESULTS_ALLOWEDDIGESTS: allowed_digests,
113      }
114      for field in FIELDS_PASSED_THRU_VERBATIM:
115        value = mod.get(field)
116        if value is not None:
117          new_expectations[field] = value
118      builder_dict = expected_builder_dicts[mod['builder']]
119      builder_expectations = builder_dict.get(gm_json.JSONKEY_EXPECTEDRESULTS)
120      if not builder_expectations:
121        builder_expectations = {}
122        builder_dict[gm_json.JSONKEY_EXPECTEDRESULTS] = builder_expectations
123      builder_expectations[image_name] = new_expectations
124    Results._write_dicts_to_root(expected_builder_dicts, self._expected_root)
125
126  def get_results_of_type(self, type):
127    """Return results of some/all tests (depending on 'type' parameter).
128
129    Args:
130      type: string describing which types of results to include; must be one
131            of the RESULTS_* constants
132
133    Results are returned as a dictionary in this form:
134
135       {
136         'categories': # dictionary of categories listed in
137                       # CATEGORIES_TO_SUMMARIZE, with the number of times
138                       # each value appears within its category
139         {
140           'resultType': # category name
141           {
142             'failed': 29, # category value and total number found of that value
143             'failure-ignored': 948,
144             'no-comparison': 4502,
145             'succeeded': 38609,
146           },
147           'builder':
148           {
149             'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Debug': 1286,
150             'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Release': 1134,
151             ...
152           },
153           ... # other categories from CATEGORIES_TO_SUMMARIZE
154         }, # end of 'categories' dictionary
155
156         'testData': # list of test results, with a dictionary for each
157         [
158           {
159             'resultType': 'failed',
160             'builder': 'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Debug',
161             'test': 'bigmatrix',
162             'config': '8888',
163             'expectedHashType': 'bitmap-64bitMD5',
164             'expectedHashDigest': '10894408024079689926',
165             'actualHashType': 'bitmap-64bitMD5',
166             'actualHashDigest': '2409857384569',
167             'bugs': [123, 456],
168             'ignore-failure': false,
169             'reviewed-by-human': true,
170           },
171           ...
172         ], # end of 'testData' list
173       }
174    """
175    return self._results[type]
176
177  @staticmethod
178  def _read_dicts_from_root(root, pattern='*.json'):
179    """Read all JSON dictionaries within a directory tree.
180
181    Args:
182      root: path to root of directory tree
183      pattern: which files to read within root (fnmatch-style pattern)
184
185    Returns:
186      A meta-dictionary containing all the JSON dictionaries found within
187      the directory tree, keyed by the builder name of each dictionary.
188
189    Raises:
190      IOError if root does not refer to an existing directory
191    """
192    if not os.path.isdir(root):
193      raise IOError('no directory found at path %s' % root)
194    meta_dict = {}
195    for dirpath, dirnames, filenames in os.walk(root):
196      for matching_filename in fnmatch.filter(filenames, pattern):
197        builder = os.path.basename(dirpath)
198        # If we are reading from the collection of actual results, skip over
199        # the Trybot results (we don't maintain baselines for them).
200        if builder.endswith('-Trybot'):
201          continue
202        fullpath = os.path.join(dirpath, matching_filename)
203        meta_dict[builder] = gm_json.LoadFromFile(fullpath)
204    return meta_dict
205
206  @staticmethod
207  def _write_dicts_to_root(meta_dict, root, pattern='*.json'):
208    """Write all per-builder dictionaries within meta_dict to files under
209    the root path.
210
211    Security note: this will only write to files that already exist within
212    the root path (as found by os.walk() within root), so we don't need to
213    worry about malformed content writing to disk outside of root.
214    However, the data written to those files is not double-checked, so it
215    could contain poisonous data.
216
217    Args:
218      meta_dict: a builder-keyed meta-dictionary containing all the JSON
219                 dictionaries we want to write out
220      root: path to root of directory tree within which to write files
221      pattern: which files to write within root (fnmatch-style pattern)
222
223    Raises:
224      IOError if root does not refer to an existing directory
225      KeyError if the set of per-builder dictionaries written out was
226               different than expected
227    """
228    if not os.path.isdir(root):
229      raise IOError('no directory found at path %s' % root)
230    actual_builders_written = []
231    for dirpath, dirnames, filenames in os.walk(root):
232      for matching_filename in fnmatch.filter(filenames, pattern):
233        builder = os.path.basename(dirpath)
234        # We should never encounter Trybot *expectations*, but if we are
235        # writing into the actual-results dir, skip the Trybot actuals.
236        # (I don't know why we would ever write into the actual-results dir,
237        # though.)
238        if builder.endswith('-Trybot'):
239          continue
240        per_builder_dict = meta_dict.get(builder)
241        if per_builder_dict is not None:
242          fullpath = os.path.join(dirpath, matching_filename)
243          gm_json.WriteToFile(per_builder_dict, fullpath)
244          actual_builders_written.append(builder)
245
246    # Check: did we write out the set of per-builder dictionaries we
247    # expected to?
248    expected_builders_written = sorted(meta_dict.keys())
249    actual_builders_written.sort()
250    if expected_builders_written != actual_builders_written:
251      raise KeyError(
252          'expected to write dicts for builders %s, but actually wrote them '
253          'for builders %s' % (
254              expected_builders_written, actual_builders_written))
255
256  def _generate_pixel_diffs_if_needed(self, test, expected_image, actual_image):
257    """If expected_image and actual_image both exist but are different,
258    add the image pair to self._image_diff_db and generate pixel diffs.
259
260    Args:
261      test: string; name of test
262      expected_image: (hashType, hashDigest) tuple describing the expected image
263      actual_image: (hashType, hashDigest) tuple describing the actual image
264    """
265    if expected_image == actual_image:
266      return
267
268    (expected_hashtype, expected_hashdigest) = expected_image
269    (actual_hashtype, actual_hashdigest) = actual_image
270    if None in [expected_hashtype, expected_hashdigest,
271                actual_hashtype, actual_hashdigest]:
272      return
273
274    expected_url = gm_json.CreateGmActualUrl(
275        test_name=test, hash_type=expected_hashtype,
276        hash_digest=expected_hashdigest)
277    actual_url = gm_json.CreateGmActualUrl(
278        test_name=test, hash_type=actual_hashtype,
279        hash_digest=actual_hashdigest)
280    self._image_diff_db.add_image_pair(
281        expected_image_locator=expected_hashdigest,
282        expected_image_url=expected_url,
283        actual_image_locator=actual_hashdigest,
284        actual_image_url=actual_url)
285
286  def _load_actual_and_expected(self):
287    """Loads the results of all tests, across all builders (based on the
288    files within self._actuals_root and self._expected_root),
289    and stores them in self._results.
290    """
291    actual_builder_dicts = Results._read_dicts_from_root(self._actuals_root)
292    expected_builder_dicts = Results._read_dicts_from_root(self._expected_root)
293
294    categories_all = {}
295    categories_failures = {}
296
297    Results._ensure_included_in_category_dict(categories_all,
298                                              'resultType', [
299        gm_json.JSONKEY_ACTUALRESULTS_FAILED,
300        gm_json.JSONKEY_ACTUALRESULTS_FAILUREIGNORED,
301        gm_json.JSONKEY_ACTUALRESULTS_NOCOMPARISON,
302        gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED,
303        ])
304    Results._ensure_included_in_category_dict(categories_failures,
305                                              'resultType', [
306        gm_json.JSONKEY_ACTUALRESULTS_FAILED,
307        gm_json.JSONKEY_ACTUALRESULTS_FAILUREIGNORED,
308        gm_json.JSONKEY_ACTUALRESULTS_NOCOMPARISON,
309        ])
310
311    data_all = []
312    data_failures = []
313    for builder in sorted(actual_builder_dicts.keys()):
314      actual_results_for_this_builder = (
315          actual_builder_dicts[builder][gm_json.JSONKEY_ACTUALRESULTS])
316      for result_type in sorted(actual_results_for_this_builder.keys()):
317        results_of_this_type = actual_results_for_this_builder[result_type]
318        if not results_of_this_type:
319          continue
320        for image_name in sorted(results_of_this_type.keys()):
321          actual_image = results_of_this_type[image_name]
322
323          # Default empty expectations; overwrite these if we find any real ones
324          expectations_per_test = None
325          expected_image = [None, None]
326          try:
327            expectations_per_test = (
328                expected_builder_dicts
329                [builder][gm_json.JSONKEY_EXPECTEDRESULTS][image_name])
330            # TODO(epoger): assumes a single allowed digest per test
331            expected_image = (
332                expectations_per_test
333                [gm_json.JSONKEY_EXPECTEDRESULTS_ALLOWEDDIGESTS][0])
334          except (KeyError, TypeError):
335            # There are several cases in which we would expect to find
336            # no expectations for a given test:
337            #
338            # 1. result_type == NOCOMPARISON
339            #   There are no expectations for this test yet!
340            #
341            # 2. alternate rendering mode failures (e.g. serialized)
342            #   In cases like
343            #   https://code.google.com/p/skia/issues/detail?id=1684
344            #   ('tileimagefilter GM test failing in serialized render mode'),
345            #   the gm-actuals will list a failure for the alternate
346            #   rendering mode even though we don't have explicit expectations
347            #   for the test (the implicit expectation is that it must
348            #   render the same in all rendering modes).
349            #
350            # Don't log type 1, because it is common.
351            # Log other types, because they are rare and we should know about
352            # them, but don't throw an exception, because we need to keep our
353            # tools working in the meanwhile!
354            if result_type != gm_json.JSONKEY_ACTUALRESULTS_NOCOMPARISON:
355              logging.warning('No expectations found for test: %s' % {
356                  'builder': builder,
357                  'image_name': image_name,
358                  'result_type': result_type,
359                  })
360
361          # If this test was recently rebaselined, it will remain in
362          # the 'failed' set of actuals until all the bots have
363          # cycled (although the expectations have indeed been set
364          # from the most recent actuals).  Treat these as successes
365          # instead of failures.
366          #
367          # TODO(epoger): Do we need to do something similar in
368          # other cases, such as when we have recently marked a test
369          # as ignoreFailure but it still shows up in the 'failed'
370          # category?  Maybe we should not rely on the result_type
371          # categories recorded within the gm_actuals AT ALL, and
372          # instead evaluate the result_type ourselves based on what
373          # we see in expectations vs actual checksum?
374          if expected_image == actual_image:
375            updated_result_type = gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED
376          else:
377            updated_result_type = result_type
378
379          (test, config) = IMAGE_FILENAME_RE.match(image_name).groups()
380          self._generate_pixel_diffs_if_needed(
381              test=test, expected_image=expected_image,
382              actual_image=actual_image)
383          results_for_this_test = {
384              'resultType': updated_result_type,
385              'builder': builder,
386              'test': test,
387              'config': config,
388              'actualHashType': actual_image[0],
389              'actualHashDigest': str(actual_image[1]),
390              'expectedHashType': expected_image[0],
391              'expectedHashDigest': str(expected_image[1]),
392
393              # FIELDS_PASSED_THRU_VERBATIM that may be overwritten below...
394              gm_json.JSONKEY_EXPECTEDRESULTS_IGNOREFAILURE: False,
395          }
396          if expectations_per_test:
397            for field in FIELDS_PASSED_THRU_VERBATIM:
398              results_for_this_test[field] = expectations_per_test.get(field)
399
400          if updated_result_type == gm_json.JSONKEY_ACTUALRESULTS_NOCOMPARISON:
401            pass # no diff record to calculate at all
402          elif updated_result_type == gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED:
403            results_for_this_test['numDifferingPixels'] = 0
404            results_for_this_test['percentDifferingPixels'] = 0
405            results_for_this_test['weightedDiffMeasure'] = 0
406            results_for_this_test['maxDiffPerChannel'] = 0
407          else:
408            try:
409              diff_record = self._image_diff_db.get_diff_record(
410                  expected_image_locator=expected_image[1],
411                  actual_image_locator=actual_image[1])
412              results_for_this_test['numDifferingPixels'] = (
413                  diff_record.get_num_pixels_differing())
414              results_for_this_test['percentDifferingPixels'] = (
415                  diff_record.get_percent_pixels_differing())
416              results_for_this_test['weightedDiffMeasure'] = (
417                  diff_record.get_weighted_diff_measure())
418              results_for_this_test['maxDiffPerChannel'] = (
419                  diff_record.get_max_diff_per_channel())
420            except KeyError:
421              logging.warning('unable to find diff_record for ("%s", "%s")' %
422                              (expected_image[1], actual_image[1]))
423              pass
424
425          Results._add_to_category_dict(categories_all, results_for_this_test)
426          data_all.append(results_for_this_test)
427
428          # TODO(epoger): In effect, we have a list of resultTypes that we
429          # include in the different result lists (data_all and data_failures).
430          # This same list should be used by the calls to
431          # Results._ensure_included_in_category_dict() earlier on.
432          if updated_result_type != gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED:
433            Results._add_to_category_dict(categories_failures,
434                                          results_for_this_test)
435            data_failures.append(results_for_this_test)
436
437    self._results = {
438      RESULTS_ALL:
439        {'categories': categories_all, 'testData': data_all},
440      RESULTS_FAILURES:
441        {'categories': categories_failures, 'testData': data_failures},
442    }
443
444  @staticmethod
445  def _add_to_category_dict(category_dict, test_results):
446    """Add test_results to the category dictionary we are building.
447    (See documentation of self.get_results_of_type() for the format of this
448    dictionary.)
449
450    Args:
451      category_dict: category dict-of-dicts to add to; modify this in-place
452      test_results: test data with which to update category_list, in a dict:
453         {
454           'category_name': 'category_value',
455           'category_name': 'category_value',
456           ...
457         }
458    """
459    for category in CATEGORIES_TO_SUMMARIZE:
460      category_value = test_results.get(category)
461      if not category_dict.get(category):
462        category_dict[category] = {}
463      if not category_dict[category].get(category_value):
464        category_dict[category][category_value] = 0
465      category_dict[category][category_value] += 1
466
467  @staticmethod
468  def _ensure_included_in_category_dict(category_dict,
469                                        category_name, category_values):
470    """Ensure that the category name/value pairs are included in category_dict,
471    even if there aren't any results with that name/value pair.
472    (See documentation of self.get_results_of_type() for the format of this
473    dictionary.)
474
475    Args:
476      category_dict: category dict-of-dicts to modify
477      category_name: category name, as a string
478      category_values: list of values we want to make sure are represented
479                       for this category
480    """
481    if not category_dict.get(category_name):
482      category_dict[category_name] = {}
483    for category_value in category_values:
484      if not category_dict[category_name].get(category_value):
485        category_dict[category_name][category_value] = 0
486