1# Copyright 2015 The PDFium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5 6import json 7import os 8import shlex 9import shutil 10import urllib2 11 12 13def _ParseKeyValuePairs(kv_str): 14 """ 15 Parses a string of the type 'key1 value1 key2 value2' into a dict. 16 """ 17 kv_pairs = shlex.split(kv_str) 18 if len(kv_pairs) % 2: 19 raise ValueError('Uneven number of key/value pairs. Got %s' % kv_str) 20 return { kv_pairs[i]:kv_pairs[i + 1] for i in xrange(0, len(kv_pairs), 2) } 21 22 23# This module downloads a json provided by Skia Gold with the expected baselines 24# for each test file. 25# 26# The expected format for the json is: 27# { 28# "commit": { 29# "author": "John Doe (jdoe@chromium.org)", 30# "commit_time": 1510598123, 31# "hash": "cee39e6e90c219cc91f2c94a912a06977f4461a0" 32# }, 33# "master": { 34# "abc.pdf.1": { 35# "0ec3d86f545052acd7c9a16fde8ca9d4": 1, 36# "80455b71673becc9fbc100d6da56ca65": 1, 37# "b68e2ecb80090b4502ec89ad1be2322c": 1 38# }, 39# "defgh.pdf.0": { 40# "01e020cd4cd05c6738e479a46a506044": 1, 41# "b68e2ecb80090b4502ec89ad1be2322c": 1 42# } 43# }, 44# "changeLists": { 45# "18499" : { 46# "abc.pdf.1": { 47# "d5dd649124cf1779152253dc8fb239c5": 1, 48# "42a270581930579cdb0f28674972fb1a": 1, 49# } 50# } 51# } 52# } 53class GoldBaseline(object): 54 55 def __init__(self, properties_str): 56 """ 57 properties_str is a string with space separated key/value pairs that 58 is used to find the cl number for which to baseline 59 """ 60 self._properties = _ParseKeyValuePairs(properties_str) 61 self._baselines = self._LoadSkiaGoldBaselines() 62 63 def _LoadSkiaGoldBaselines(self): 64 """ 65 Download the baseline json and return a list of the two baselines that 66 should be used to match hashes (master and cl#). 67 """ 68 GOLD_BASELINE_URL = ('https://storage.googleapis.com/skia-infra-gm/' 69 'hash_files/gold-pdfium-baseline.json') 70 try: 71 response = urllib2.urlopen(GOLD_BASELINE_URL) 72 json_data = response.read() 73 except (urllib2.HTTPError, urllib2.URLError) as e: 74 print ('Error: Unable to read skia gold json from %s: %s' 75 % (GOLD_BASELINE_URL, e)) 76 return None 77 78 try: 79 data = json.loads(json_data) 80 except ValueError: 81 print 'Error: Malformed json read from %s: %s' % (GOLD_BASELINE_URL, e) 82 return None 83 84 try: 85 master_baseline = data['master'] 86 except (KeyError, TypeError): 87 print ('Error: "master" key not in json read from %s: %s' 88 % (GOLD_BASELINE_URL, e)) 89 return None 90 91 cl_number_str = self._properties.get('issue') 92 if cl_number_str is None: 93 return [master_baseline] 94 95 try: 96 cl_baseline = data['changeLists'][cl_number_str] 97 except KeyError: 98 return [master_baseline] 99 100 return [cl_baseline, master_baseline] 101 102 # Return values for MatchLocalResult(). 103 MATCH = 'match' 104 MISMATCH = 'mismatch' 105 NO_BASELINE = 'no_baseline' 106 BASELINE_DOWNLOAD_FAILED = 'baseline_download_failed' 107 108 def MatchLocalResult(self, test_name, md5_hash): 109 """ 110 Match a locally generated hash of a test cases rendered image with the 111 expected hashes downloaded in the baselines json. 112 113 Each baseline is a dict mapping the test case name to a dict with the 114 expected hashes as keys. Therefore, this list of baselines should be 115 searched until the test case name is found, then the hash should be matched 116 with the options in that dict. If the hashes don't match, it should be 117 considered a failure and we should not continue searching the baseline list. 118 119 Returns MATCH if the md5 provided matches the ones in the baseline json, 120 MISMATCH if it does not, NO_BASELINE if the test case has no baseline, or 121 BASELINE_DOWNLOAD_FAILED if the baseline could not be downloaded and parsed. 122 """ 123 if self._baselines is None: 124 return GoldBaseline.BASELINE_DOWNLOAD_FAILED 125 126 found_test_case = False 127 for baseline in self._baselines: 128 if test_name in baseline: 129 found_test_case = True 130 if md5_hash in baseline[test_name]: 131 return GoldBaseline.MATCH 132 133 return (GoldBaseline.MISMATCH if found_test_case 134 else GoldBaseline.NO_BASELINE) 135 136 137# This module collects and writes output in a format expected by the 138# Gold baseline tool. Based on meta data provided explicitly and by 139# adding a series of test results it can be used to produce 140# a JSON file that is uploaded to Google Storage and ingested by Gold. 141# 142# The output will look similar this: 143# 144# { 145# "build_number" : "2", 146# "gitHash" : "a4a338179013b029d6dd55e737b5bd648a9fb68c", 147# "key" : { 148# "arch" : "arm64", 149# "compiler" : "Clang", 150# }, 151# "results" : [ 152# { 153# "key" : { 154# "config" : "vk", 155# "name" : "yuv_nv12_to_rgb_effect", 156# "source_type" : "gm" 157# }, 158# "md5" : "7db34da246868d50ab9ddd776ce6d779", 159# "options" : { 160# "ext" : "png", 161# "gamma_correct" : "no" 162# } 163# }, 164# { 165# "key" : { 166# "config" : "vk", 167# "name" : "yuv_to_rgb_effect", 168# "source_type" : "gm" 169# }, 170# "md5" : "0b955f387740c66eb23bf0e253c80d64", 171# "options" : { 172# "ext" : "png", 173# "gamma_correct" : "no" 174# } 175# } 176# ], 177# } 178# 179class GoldResults(object): 180 def __init__(self, source_type, outputDir, propertiesStr, keyStr, 181 ignore_hashes_file): 182 """ 183 source_type is the source_type (=corpus) field used for all results. 184 output_dir is the directory where the resulting images are copied and 185 the dm.json file is written. If the directory exists it will 186 be removed and recreated. 187 propertiesStr is a string with space separated key/value pairs that 188 is used to set the top level fields in the output JSON file. 189 keyStr is a string with space separated key/value pairs that 190 is used to set the 'key' field in the output JSON file. 191 ignore_hashes_file is a file that contains a list of image hashes 192 that should be ignored. 193 """ 194 self._source_type = source_type 195 self._properties = _ParseKeyValuePairs(propertiesStr) 196 self._properties["key"] = _ParseKeyValuePairs(keyStr) 197 self._results = [] 198 self._outputDir = outputDir 199 200 # make sure the output directory exists and is empty. 201 if os.path.exists(outputDir): 202 shutil.rmtree(outputDir, ignore_errors=True) 203 os.makedirs(outputDir) 204 205 self._ignore_hashes = set() 206 if ignore_hashes_file: 207 with open(ignore_hashes_file, 'r') as ig_file: 208 hashes=[x.strip() for x in ig_file.readlines() if x.strip()] 209 self._ignore_hashes = set(hashes) 210 211 def AddTestResult(self, testName, md5Hash, outputImagePath): 212 # If the hash is in the list of hashes to ignore then we don'try 213 # make a copy, but add it to the result. 214 imgExt = os.path.splitext(outputImagePath)[1].lstrip(".") 215 if md5Hash not in self._ignore_hashes: 216 # Copy the image to <output_dir>/<md5Hash>.<image_extension> 217 if not imgExt: 218 raise ValueError("File %s does not have an extension" % outputImagePath) 219 newFilePath = os.path.join(self._outputDir, md5Hash + '.' + imgExt) 220 shutil.copy2(outputImagePath, newFilePath) 221 222 # Add an entry to the list of test results 223 self._results.append({ 224 "key": { 225 "name": testName, 226 "source_type": self._source_type, 227 }, 228 "md5": md5Hash, 229 "options": { 230 "ext": imgExt, 231 "gamma_correct": "no" 232 } 233 }) 234 235 def WriteResults(self): 236 self._properties.update({ 237 "results": self._results 238 }) 239 240 outputFileName = os.path.join(self._outputDir, "dm.json") 241 with open(outputFileName, 'wb') as outfile: 242 json.dump(self._properties, outfile, indent=1) 243 outfile.write("\n") 244 245# Produce example output for manual testing. 246if __name__ == "__main__": 247 # Create a test directory with three empty 'image' files. 248 testDir = "./testdirectory" 249 if not os.path.exists(testDir): 250 os.makedirs(testDir) 251 open(os.path.join(testDir, "image1.png"), 'wb').close() 252 open(os.path.join(testDir, "image2.png"), 'wb').close() 253 open(os.path.join(testDir, "image3.png"), 'wb').close() 254 255 # Create an instance and add results. 256 propStr = """build_number 2 "builder name" Builder-Name gitHash a4a338179013b029d6dd55e737b5bd648a9fb68c""" 257 258 keyStr = "arch arm64 compiler Clang configuration Debug" 259 260 hash_file = os.path.join(testDir, "ignore_hashes.txt") 261 with open(hash_file, 'wb') as f: 262 f.write("\n".join(["hash-1","hash-4"]) + "\n") 263 264 gr = GoldResults("pdfium", testDir, propStr, keyStr, hash_file) 265 gr.AddTestResult("test-1", "hash-1", os.path.join(testDir, "image1.png")) 266 gr.AddTestResult("test-2", "hash-2", os.path.join(testDir, "image2.png")) 267 gr.AddTestResult("test-3", "hash-3", os.path.join(testDir, "image3.png")) 268 gr.WriteResults() 269