• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2015 The PDFium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import json
6import os
7import shlex
8import shutil
9import ssl
10import urllib2
11
12
13def _ParseKeyValuePairs(kv_str):
14  """
15  Parses a string of the type 'key1 value1 key2 value2' into a dict.
16  """
17  kv_pairs = shlex.split(kv_str)
18  if len(kv_pairs) % 2:
19    raise ValueError('Uneven number of key/value pairs. Got %s' % kv_str)
20  return {kv_pairs[i]: kv_pairs[i + 1] for i in xrange(0, len(kv_pairs), 2)}
21
22
23# This module downloads a json provided by Skia Gold with the expected baselines
24# for each test file.
25#
26# The expected format for the json is:
27# {
28#   "commit": {
29#     "author": "John Doe (jdoe@chromium.org)",
30#     "commit_time": 1510598123,
31#     "hash": "cee39e6e90c219cc91f2c94a912a06977f4461a0"
32#   },
33#   "master": {
34#     "abc.pdf.1": {
35#       "0ec3d86f545052acd7c9a16fde8ca9d4": 1,
36#       "80455b71673becc9fbc100d6da56ca65": 1,
37#       "b68e2ecb80090b4502ec89ad1be2322c": 1
38#      },
39#     "defgh.pdf.0": {
40#       "01e020cd4cd05c6738e479a46a506044": 1,
41#       "b68e2ecb80090b4502ec89ad1be2322c": 1
42#     }
43#   },
44#   "changeLists": {
45#     "18499" : {
46#       "abc.pdf.1": {
47#         "d5dd649124cf1779152253dc8fb239c5": 1,
48#         "42a270581930579cdb0f28674972fb1a": 1,
49#       }
50#     }
51#   }
52# }
53class GoldBaseline(object):
54
55  def __init__(self, properties_str):
56    """
57    properties_str is a string with space separated key/value pairs that
58               is used to find the cl number for which to baseline
59    """
60    self._properties = _ParseKeyValuePairs(properties_str)
61    self._baselines = self._LoadSkiaGoldBaselines()
62
63  def _LoadSkiaGoldBaselines(self):
64    """
65    Download the baseline json and return a list of the two baselines that
66    should be used to match hashes (master and cl#).
67    """
68    GOLD_BASELINE_URL = 'https://pdfium-gold.skia.org/json/baseline'
69
70    # If we have an issue number add it to the baseline URL
71    cl_number_str = self._properties.get('issue', None)
72    url = GOLD_BASELINE_URL + ('/' + cl_number_str if cl_number_str else '')
73
74    json_data = ''
75    MAX_TIMEOUT = 33  # 5 tries. (2, 4, 8, 16, 32)
76    timeout = 2
77    while True:
78      try:
79        response = urllib2.urlopen(url, timeout=timeout)
80        c_type = response.headers.get('Content-type', '')
81        EXPECTED_CONTENT_TYPE = 'application/json'
82        if c_type != EXPECTED_CONTENT_TYPE:
83          raise ValueError('Invalid content type. Got %s instead of %s' %
84                           (c_type, EXPECTED_CONTENT_TYPE))
85        json_data = response.read()
86        break  # If this line is reached, then no exception occurred.
87      except (ssl.SSLError, urllib2.HTTPError, urllib2.URLError) as e:
88        timeout *= 2
89        if timeout < MAX_TIMEOUT:
90          continue
91        print('Error: Unable to read skia gold json from %s: %s' % (url, e))
92        return None
93
94    try:
95      data = json.loads(json_data)
96    except ValueError as e:
97      print 'Error: Malformed json read from %s: %s' % (url, e)
98      return None
99
100    return data.get('master', {})
101
102  # Return values for MatchLocalResult().
103  MATCH = 'match'
104  MISMATCH = 'mismatch'
105  NO_BASELINE = 'no_baseline'
106  BASELINE_DOWNLOAD_FAILED = 'baseline_download_failed'
107
108  def MatchLocalResult(self, test_name, md5_hash):
109    """
110    Match a locally generated hash of a test cases rendered image with the
111    expected hashes downloaded in the baselines json.
112
113    Each baseline is a dict mapping the test case name to a dict with the
114    expected hashes as keys. Therefore, this list of baselines should be
115    searched until the test case name is found, then the hash should be matched
116    with the options in that dict. If the hashes don't match, it should be
117    considered a failure and we should not continue searching the baseline list.
118
119    Returns MATCH if the md5 provided matches the ones in the baseline json,
120    MISMATCH if it does not, NO_BASELINE if the test case has no baseline, or
121    BASELINE_DOWNLOAD_FAILED if the baseline could not be downloaded and parsed.
122    """
123    if self._baselines is None:
124      return GoldBaseline.BASELINE_DOWNLOAD_FAILED
125
126    found_test_case = False
127    if test_name in self._baselines:
128      found_test_case = True
129      if md5_hash in self._baselines[test_name]:
130        return GoldBaseline.MATCH
131
132    return (GoldBaseline.MISMATCH
133            if found_test_case else GoldBaseline.NO_BASELINE)
134
135
136# This module collects and writes output in a format expected by the
137# Gold baseline tool. Based on meta data provided explicitly and by
138# adding a series of test results it can be used to produce
139# a JSON file that is uploaded to Google Storage and ingested by Gold.
140#
141# The output will look similar this:
142#
143# {
144#    "build_number" : "2",
145#    "gitHash" : "a4a338179013b029d6dd55e737b5bd648a9fb68c",
146#    "key" : {
147#       "arch" : "arm64",
148#       "compiler" : "Clang",
149#    },
150#    "results" : [
151#       {
152#          "key" : {
153#             "config" : "vk",
154#             "name" : "yuv_nv12_to_rgb_effect",
155#             "source_type" : "gm"
156#          },
157#          "md5" : "7db34da246868d50ab9ddd776ce6d779",
158#          "options" : {
159#             "ext" : "png",
160#             "gamma_correct" : "no"
161#          }
162#       },
163#       {
164#          "key" : {
165#             "config" : "vk",
166#             "name" : "yuv_to_rgb_effect",
167#             "source_type" : "gm"
168#          },
169#          "md5" : "0b955f387740c66eb23bf0e253c80d64",
170#          "options" : {
171#             "ext" : "png",
172#             "gamma_correct" : "no"
173#          }
174#       }
175#    ],
176# }
177#
178class GoldResults(object):
179
180  def __init__(self, source_type, output_dir, properties_str, key_str,
181               ignore_hashes_file):
182    """
183    source_type is the source_type (=corpus) field used for all results.
184    output_dir is the directory where the resulting images are copied and
185               the dm.json file is written. If the directory exists it will
186               be removed and recreated.
187    properties_str is a string with space separated key/value pairs that
188               is used to set the top level fields in the output JSON file.
189    key_str is a string with space separated key/value pairs that
190               is used to set the 'key' field in the output JSON file.
191    ignore_hashes_file is a file that contains a list of image hashes
192               that should be ignored.
193    """
194    self._source_type = source_type
195    self._properties = _ParseKeyValuePairs(properties_str)
196    self._properties['key'] = _ParseKeyValuePairs(key_str)
197    self._results = []
198    self._passfail = []
199    self._output_dir = output_dir
200
201    # make sure the output directory exists and is empty.
202    if os.path.exists(output_dir):
203      shutil.rmtree(output_dir, ignore_errors=True)
204    os.makedirs(output_dir)
205
206    self._ignore_hashes = set()
207    if ignore_hashes_file:
208      with open(ignore_hashes_file, 'r') as ig_file:
209        hashes = [x.strip() for x in ig_file.readlines() if x.strip()]
210        self._ignore_hashes = set(hashes)
211
212  def AddTestResult(self, testName, md5Hash, outputImagePath, matchResult):
213    # If the hash is in the list of hashes to ignore then we don'try
214    # make a copy, but add it to the result.
215    imgExt = os.path.splitext(outputImagePath)[1].lstrip('.')
216    if md5Hash not in self._ignore_hashes:
217      # Copy the image to <output_dir>/<md5Hash>.<image_extension>
218      if not imgExt:
219        raise ValueError('File %s does not have an extension' % outputImagePath)
220      newFilePath = os.path.join(self._output_dir, md5Hash + '.' + imgExt)
221      shutil.copy2(outputImagePath, newFilePath)
222
223    # Add an entry to the list of test results
224    self._results.append({
225        'key': {
226            'name': testName,
227            'source_type': self._source_type,
228        },
229        'md5': md5Hash,
230        'options': {
231            'ext': imgExt,
232            'gamma_correct': 'no'
233        }
234    })
235
236    self._passfail.append((testName, matchResult))
237
238  def WriteResults(self):
239    self._properties.update({'results': self._results})
240
241    output_file_name = os.path.join(self._output_dir, 'dm.json')
242    with open(output_file_name, 'wb') as outfile:
243      json.dump(self._properties, outfile, indent=1)
244      outfile.write('\n')
245
246    output_file_name = os.path.join(self._output_dir, 'passfail.json')
247    with open(output_file_name, 'wb') as outfile:
248      json.dump(self._passfail, outfile, indent=1)
249      outfile.write('\n')
250
251
252# Produce example output for manual testing.
253def _Example():
254  # Create a test directory with three empty 'image' files.
255  test_dir = './testdirectory'
256  if not os.path.exists(test_dir):
257    os.makedirs(test_dir)
258  open(os.path.join(test_dir, 'image1.png'), 'wb').close()
259  open(os.path.join(test_dir, 'image2.png'), 'wb').close()
260  open(os.path.join(test_dir, 'image3.png'), 'wb').close()
261
262  # Create an instance and add results.
263  prop_str = 'build_number 2 "builder name" Builder-Name gitHash ' \
264      'a4a338179013b029d6dd55e737b5bd648a9fb68c'
265
266  key_str = 'arch arm64 compiler Clang configuration Debug'
267
268  hash_file = os.path.join(test_dir, 'ignore_hashes.txt')
269  with open(hash_file, 'wb') as f:
270    f.write('\n'.join(['hash-1', 'hash-4']) + '\n')
271
272  output_dir = './output_directory'
273  gr = GoldResults('pdfium', output_dir, prop_str, key_str, hash_file)
274  gr.AddTestResult('test-1', 'hash-1', os.path.join(test_dir, 'image1.png'),
275                   GoldBaseline.MATCH)
276  gr.AddTestResult('test-2', 'hash-2', os.path.join(test_dir, 'image2.png'),
277                   GoldBaseline.MATCH)
278  gr.AddTestResult('test-3', 'hash-3', os.path.join(test_dir, 'image3.png'),
279                   GoldBaseline.MISMATCH)
280  gr.WriteResults()
281
282
283if __name__ == '__main__':
284  _Example()
285