1# Copyright 2015 The PDFium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import json 6import os 7import shlex 8import shutil 9import ssl 10import urllib2 11 12 13def _ParseKeyValuePairs(kv_str): 14 """ 15 Parses a string of the type 'key1 value1 key2 value2' into a dict. 16 """ 17 kv_pairs = shlex.split(kv_str) 18 if len(kv_pairs) % 2: 19 raise ValueError('Uneven number of key/value pairs. Got %s' % kv_str) 20 return {kv_pairs[i]: kv_pairs[i + 1] for i in xrange(0, len(kv_pairs), 2)} 21 22 23# This module downloads a json provided by Skia Gold with the expected baselines 24# for each test file. 25# 26# The expected format for the json is: 27# { 28# "commit": { 29# "author": "John Doe (jdoe@chromium.org)", 30# "commit_time": 1510598123, 31# "hash": "cee39e6e90c219cc91f2c94a912a06977f4461a0" 32# }, 33# "master": { 34# "abc.pdf.1": { 35# "0ec3d86f545052acd7c9a16fde8ca9d4": 1, 36# "80455b71673becc9fbc100d6da56ca65": 1, 37# "b68e2ecb80090b4502ec89ad1be2322c": 1 38# }, 39# "defgh.pdf.0": { 40# "01e020cd4cd05c6738e479a46a506044": 1, 41# "b68e2ecb80090b4502ec89ad1be2322c": 1 42# } 43# }, 44# "changeLists": { 45# "18499" : { 46# "abc.pdf.1": { 47# "d5dd649124cf1779152253dc8fb239c5": 1, 48# "42a270581930579cdb0f28674972fb1a": 1, 49# } 50# } 51# } 52# } 53class GoldBaseline(object): 54 55 def __init__(self, properties_str): 56 """ 57 properties_str is a string with space separated key/value pairs that 58 is used to find the cl number for which to baseline 59 """ 60 self._properties = _ParseKeyValuePairs(properties_str) 61 self._baselines = self._LoadSkiaGoldBaselines() 62 63 def _LoadSkiaGoldBaselines(self): 64 """ 65 Download the baseline json and return a list of the two baselines that 66 should be used to match hashes (master and cl#). 67 """ 68 GOLD_BASELINE_URL = 'https://pdfium-gold.skia.org/json/baseline' 69 70 # If we have an issue number add it to the baseline URL 71 cl_number_str = self._properties.get('issue', None) 72 url = GOLD_BASELINE_URL + ('/' + cl_number_str if cl_number_str else '') 73 74 json_data = '' 75 MAX_TIMEOUT = 33 # 5 tries. (2, 4, 8, 16, 32) 76 timeout = 2 77 while True: 78 try: 79 response = urllib2.urlopen(url, timeout=timeout) 80 c_type = response.headers.get('Content-type', '') 81 EXPECTED_CONTENT_TYPE = 'application/json' 82 if c_type != EXPECTED_CONTENT_TYPE: 83 raise ValueError('Invalid content type. Got %s instead of %s' % 84 (c_type, EXPECTED_CONTENT_TYPE)) 85 json_data = response.read() 86 break # If this line is reached, then no exception occurred. 87 except (ssl.SSLError, urllib2.HTTPError, urllib2.URLError) as e: 88 timeout *= 2 89 if timeout < MAX_TIMEOUT: 90 continue 91 print('Error: Unable to read skia gold json from %s: %s' % (url, e)) 92 return None 93 94 try: 95 data = json.loads(json_data) 96 except ValueError as e: 97 print 'Error: Malformed json read from %s: %s' % (url, e) 98 return None 99 100 return data.get('master', {}) 101 102 # Return values for MatchLocalResult(). 103 MATCH = 'match' 104 MISMATCH = 'mismatch' 105 NO_BASELINE = 'no_baseline' 106 BASELINE_DOWNLOAD_FAILED = 'baseline_download_failed' 107 108 def MatchLocalResult(self, test_name, md5_hash): 109 """ 110 Match a locally generated hash of a test cases rendered image with the 111 expected hashes downloaded in the baselines json. 112 113 Each baseline is a dict mapping the test case name to a dict with the 114 expected hashes as keys. Therefore, this list of baselines should be 115 searched until the test case name is found, then the hash should be matched 116 with the options in that dict. If the hashes don't match, it should be 117 considered a failure and we should not continue searching the baseline list. 118 119 Returns MATCH if the md5 provided matches the ones in the baseline json, 120 MISMATCH if it does not, NO_BASELINE if the test case has no baseline, or 121 BASELINE_DOWNLOAD_FAILED if the baseline could not be downloaded and parsed. 122 """ 123 if self._baselines is None: 124 return GoldBaseline.BASELINE_DOWNLOAD_FAILED 125 126 found_test_case = False 127 if test_name in self._baselines: 128 found_test_case = True 129 if md5_hash in self._baselines[test_name]: 130 return GoldBaseline.MATCH 131 132 return (GoldBaseline.MISMATCH 133 if found_test_case else GoldBaseline.NO_BASELINE) 134 135 136# This module collects and writes output in a format expected by the 137# Gold baseline tool. Based on meta data provided explicitly and by 138# adding a series of test results it can be used to produce 139# a JSON file that is uploaded to Google Storage and ingested by Gold. 140# 141# The output will look similar this: 142# 143# { 144# "build_number" : "2", 145# "gitHash" : "a4a338179013b029d6dd55e737b5bd648a9fb68c", 146# "key" : { 147# "arch" : "arm64", 148# "compiler" : "Clang", 149# }, 150# "results" : [ 151# { 152# "key" : { 153# "config" : "vk", 154# "name" : "yuv_nv12_to_rgb_effect", 155# "source_type" : "gm" 156# }, 157# "md5" : "7db34da246868d50ab9ddd776ce6d779", 158# "options" : { 159# "ext" : "png", 160# "gamma_correct" : "no" 161# } 162# }, 163# { 164# "key" : { 165# "config" : "vk", 166# "name" : "yuv_to_rgb_effect", 167# "source_type" : "gm" 168# }, 169# "md5" : "0b955f387740c66eb23bf0e253c80d64", 170# "options" : { 171# "ext" : "png", 172# "gamma_correct" : "no" 173# } 174# } 175# ], 176# } 177# 178class GoldResults(object): 179 180 def __init__(self, source_type, output_dir, properties_str, key_str, 181 ignore_hashes_file): 182 """ 183 source_type is the source_type (=corpus) field used for all results. 184 output_dir is the directory where the resulting images are copied and 185 the dm.json file is written. If the directory exists it will 186 be removed and recreated. 187 properties_str is a string with space separated key/value pairs that 188 is used to set the top level fields in the output JSON file. 189 key_str is a string with space separated key/value pairs that 190 is used to set the 'key' field in the output JSON file. 191 ignore_hashes_file is a file that contains a list of image hashes 192 that should be ignored. 193 """ 194 self._source_type = source_type 195 self._properties = _ParseKeyValuePairs(properties_str) 196 self._properties['key'] = _ParseKeyValuePairs(key_str) 197 self._results = [] 198 self._passfail = [] 199 self._output_dir = output_dir 200 201 # make sure the output directory exists and is empty. 202 if os.path.exists(output_dir): 203 shutil.rmtree(output_dir, ignore_errors=True) 204 os.makedirs(output_dir) 205 206 self._ignore_hashes = set() 207 if ignore_hashes_file: 208 with open(ignore_hashes_file, 'r') as ig_file: 209 hashes = [x.strip() for x in ig_file.readlines() if x.strip()] 210 self._ignore_hashes = set(hashes) 211 212 def AddTestResult(self, testName, md5Hash, outputImagePath, matchResult): 213 # If the hash is in the list of hashes to ignore then we don'try 214 # make a copy, but add it to the result. 215 imgExt = os.path.splitext(outputImagePath)[1].lstrip('.') 216 if md5Hash not in self._ignore_hashes: 217 # Copy the image to <output_dir>/<md5Hash>.<image_extension> 218 if not imgExt: 219 raise ValueError('File %s does not have an extension' % outputImagePath) 220 newFilePath = os.path.join(self._output_dir, md5Hash + '.' + imgExt) 221 shutil.copy2(outputImagePath, newFilePath) 222 223 # Add an entry to the list of test results 224 self._results.append({ 225 'key': { 226 'name': testName, 227 'source_type': self._source_type, 228 }, 229 'md5': md5Hash, 230 'options': { 231 'ext': imgExt, 232 'gamma_correct': 'no' 233 } 234 }) 235 236 self._passfail.append((testName, matchResult)) 237 238 def WriteResults(self): 239 self._properties.update({'results': self._results}) 240 241 output_file_name = os.path.join(self._output_dir, 'dm.json') 242 with open(output_file_name, 'wb') as outfile: 243 json.dump(self._properties, outfile, indent=1) 244 outfile.write('\n') 245 246 output_file_name = os.path.join(self._output_dir, 'passfail.json') 247 with open(output_file_name, 'wb') as outfile: 248 json.dump(self._passfail, outfile, indent=1) 249 outfile.write('\n') 250 251 252# Produce example output for manual testing. 253def _Example(): 254 # Create a test directory with three empty 'image' files. 255 test_dir = './testdirectory' 256 if not os.path.exists(test_dir): 257 os.makedirs(test_dir) 258 open(os.path.join(test_dir, 'image1.png'), 'wb').close() 259 open(os.path.join(test_dir, 'image2.png'), 'wb').close() 260 open(os.path.join(test_dir, 'image3.png'), 'wb').close() 261 262 # Create an instance and add results. 263 prop_str = 'build_number 2 "builder name" Builder-Name gitHash ' \ 264 'a4a338179013b029d6dd55e737b5bd648a9fb68c' 265 266 key_str = 'arch arm64 compiler Clang configuration Debug' 267 268 hash_file = os.path.join(test_dir, 'ignore_hashes.txt') 269 with open(hash_file, 'wb') as f: 270 f.write('\n'.join(['hash-1', 'hash-4']) + '\n') 271 272 output_dir = './output_directory' 273 gr = GoldResults('pdfium', output_dir, prop_str, key_str, hash_file) 274 gr.AddTestResult('test-1', 'hash-1', os.path.join(test_dir, 'image1.png'), 275 GoldBaseline.MATCH) 276 gr.AddTestResult('test-2', 'hash-2', os.path.join(test_dir, 'image2.png'), 277 GoldBaseline.MATCH) 278 gr.AddTestResult('test-3', 'hash-3', os.path.join(test_dir, 'image3.png'), 279 GoldBaseline.MISMATCH) 280 gr.WriteResults() 281 282 283if __name__ == '__main__': 284 _Example() 285