1#!/usr/bin/env python 2# Copyright 2019 Google LLC. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6''' 7This tool compares the PDF output of Skia's DM tool of two commits. 8 9It relies on pdfium_test being in the PATH. To build: 10 11mkdir -p ~/src/pdfium 12cd ~/src/pdfium 13gclient config --unmanaged https://pdfium.googlesource.com/pdfium.git 14gclient sync 15cd pdfium 16gn gen out/default --args='pdf_enable_xfa=false pdf_enable_v8=false pdf_is_standalone=true' 17ninja -C out/default pdfium_test 18cp out/default/pdfium_test ~/bin/ 19''' 20 21import os 22import re 23import shutil 24import subprocess 25import sys 26import tempfile 27import threading 28 29EXTRA_GN_ARGS = os.environ.get('PDF_COMPARISON_GN_ARGS', '') 30 31REFERENCE_BACKEND = 'gl' if 'PDF_COMPARISON_NOGPU' not in os.environ else '8888' 32 33DPI = float(os.environ.get('PDF_COMPARISON_DPI', 72)) 34 35PDF_CONFIG = 'pdf' if 'PDF_COMPARISON_300DPI' not in os.environ else 'pdf300' 36 37BAD_TESTS = [ 38 'image-cacherator-from-picture', 39 'image-cacherator-from-raster', 40 'mixershader', 41 'shadermaskfilter_image', 42 'tilemode_decal', 43] 44 45NINJA = 'ninja' 46 47PDFIUM_TEST = 'pdfium_test' 48 49NUM_THREADS = int(os.environ.get('PDF_COMPARISON_THREADS', 40)) 50 51SOURCES = ['gm'] 52 53def test_exe(cmd): 54 with open(os.devnull, 'w') as o: 55 try: 56 subprocess.call([cmd], stdout=o, stderr=o) 57 except OSError: 58 return False 59 return True 60 61def print_cmd(cmd, o): 62 m = re.compile('[^A-Za-z0-9_./-]') 63 o.write('+ ') 64 for c in cmd: 65 if m.search(c) is not None: 66 o.write(repr(c) + ' ') 67 else: 68 o.write(c + ' ') 69 o.write('\n') 70 o.flush() 71 72def check_call(cmd, **kwargs): 73 print_cmd(cmd, sys.stdout) 74 return subprocess.check_call(cmd, **kwargs) 75 76def check_output(cmd, **kwargs): 77 print_cmd(cmd, sys.stdout) 78 return subprocess.check_output(cmd, **kwargs) 79 80def remove(*paths): 81 for path in paths: 82 os.remove(path) 83 84def timeout(deadline, cmd): 85 #print_cmd(cmd, sys.stdout) 86 with open(os.devnull, 'w') as o: 87 proc = subprocess.Popen(cmd, stdout=o, stderr=subprocess.STDOUT) 88 timer = threading.Timer(deadline, proc.terminate) 89 timer.start() 90 proc.wait() 91 timer.cancel() 92 return proc.returncode 93 94def is_same(path1, path2): 95 if not os.path.isfile(path1) or not os.path.isfile(path2): 96 return os.path.isfile(path1) == os.path.isfile(path2) 97 with open(path1, 'rb') as f1: 98 with open(path2, 'rb') as f2: 99 while True: 100 c1, c2 = f1.read(4096), f2.read(4096) 101 if c1 != c2: 102 return False 103 if not c1: 104 return True 105 106 107def getfilesoftype(directory, ending): 108 for dirpath, _, filenames in os.walk(directory): 109 rp = os.path.normpath(os.path.relpath(dirpath, directory)) 110 for f in filenames: 111 if f.endswith(ending): 112 yield os.path.join(rp, f) 113 114def get_common_paths(dirs, ext): 115 return sorted(list( 116 set.intersection(*(set(getfilesoftype(d, ext)) for d in dirs)))) 117 118def printable_path(d): 119 if 'TMPDIR' in os.environ: 120 return d.replace(os.path.normpath(os.environ['TMPDIR']) + '/', '$TMPDIR/') 121 return d 122 123def spawn(cmd): 124 with open(os.devnull, 'w') as o: 125 subprocess.Popen(cmd, stdout=o, stderr=o) 126 127def sysopen(arg): 128 plat = sys.platform 129 if plat.startswith('darwin'): 130 spawn(["open", arg]) 131 elif plat.startswith('win'): 132 # pylint: disable=no-member 133 os.startfile(arg) 134 else: 135 spawn(["xdg-open", arg]) 136 137HTML_HEAD = ''' 138<!DOCTYPE html> 139<html lang="en"> 140<head> 141<meta charset="utf-8"> 142<title>DIFF</title> 143<style> 144body{ 145background-size:16px 16px; 146background-color:rgb(230,230,230); 147background-image: 148linear-gradient(45deg,rgba(255,255,255,.2) 25%,transparent 25%,transparent 50%, 149rgba(255,255,255,.2) 50%,rgba(255,255,255,.2) 75%,transparent 75%,transparent)} 150div.r{position:relative;left:0;top:0} 151table{table-layout:fixed;width:100%} 152img.s{max-width:100%;max-height:320;left:0;top:0} 153img.b{position:absolute;mix-blend-mode:difference} 154</style> 155<script> 156function r(c,e,n,g){ 157t=document.getElementById("t"); 158function ce(t){return document.createElement(t);} 159function ct(n){return document.createTextNode(n);} 160function ac(u,v){u.appendChild(v);} 161function cn(u,v){u.className=v;} 162function it(s){ td=ce("td"); a=ce("a"); a.href=s; img=ce("img"); img.src=s; 163 cn(img,"s"); ac(a,img); ac(td,a); return td; } 164tr=ce("tr"); td=ce("td"); td.colSpan="4"; ac(td, ct(n)); ac(tr,td); 165ac(t,tr); tr=ce("tr"); td=ce("td"); dv=ce("div"); cn(dv,"r"); 166img=ce("img"); img.src=c; cn(img,"s"); ac(dv,img); img=ce("img"); 167img.src=e; cn(img,"s b"); ac(dv,img); ac(td,dv); ac(tr,td); 168ac(tr,it(c)); ac(tr,it(e)); ac(tr,it(g)); ac(t,tr); } 169document.addEventListener('DOMContentLoaded',function(){ 170''' 171 172HTML_TAIL = ''']; 173for(i=0;i<z.length;i++){ 174r(c+z[i][0],e+z[i][0],z[i][2],c+z[i][1]);}},false); 175</script></head><body><table id="t"> 176<tr><th>BEFORE-AFTER DIFF</th> 177<th>BEFORE</th><th>AFTER</th> 178<th>REFERENCE</th></tr> 179</table></body></html>''' 180 181def shard(fn, arglist): 182 jobs = [[arg for j, arg in enumerate(arglist) if j % NUM_THREADS == i] 183 for i in range(NUM_THREADS)] 184 results = [] 185 def do_shard(*args): 186 for arg in args: 187 results.append(fn(arg)) 188 thread_list = [] 189 for job in jobs: 190 t = threading.Thread(target=do_shard, args=job) 191 t.start() 192 thread_list += [t] 193 for t in thread_list: 194 t.join() 195 return results 196 197def shardsum(fn, arglist): 198 'return the number of True results returned by fn(arg) for arg in arglist.' 199 return sum(1 for result in shard(fn, arglist) if result) 200 201def checkout_worktree(checkoutable): 202 directory = os.path.join(tempfile.gettempdir(), 'skpdf_control_tree') 203 commit = check_output(['git', 'rev-parse', checkoutable]).strip() 204 if os.path.isdir(directory): 205 try: 206 check_call(['git', 'checkout', commit], cwd=directory) 207 return directory 208 except subprocess.CalledProcessError: 209 shutil.rmtree(directory) 210 check_call(['git', 'worktree', 'add', '-f', directory, commit]) 211 return directory 212 213def build_skia(directory, executable): 214 args = ('--args=is_debug=false') 215 if test_exe('ccache'): 216 args += ' cc_wrapper="ccache"' 217 args += EXTRA_GN_ARGS 218 build_dir = directory + '/out/pdftest' 219 check_call([sys.executable, 'bin/sync'], cwd=directory) 220 check_call([directory + '/bin/gn', 'gen', 'out/pdftest', args], 221 cwd=directory) 222 check_call([NINJA, executable], cwd=build_dir) 223 return os.path.join(build_dir, executable) 224 225def build_and_run_dm(directory, data_dir): 226 dm = build_skia(directory, 'dm') 227 for source in SOURCES: 228 os.makedirs(os.path.join(data_dir, PDF_CONFIG, source)) 229 dm_args = [dm, '--src'] + SOURCES + ['--config', PDF_CONFIG, '-w', data_dir] 230 if BAD_TESTS: 231 dm_args += ['-m'] + ['~^%s$' % x for x in BAD_TESTS] 232 check_call(dm_args, cwd=directory) 233 return dm 234 235def rasterize(path): 236 ret = timeout(30, [PDFIUM_TEST, '--png', '--scale=%g' % (DPI / 72.0), path]) 237 if ret != 0: 238 sys.stdout.write( 239 '\nTIMEOUT OR ERROR [%d] "%s"\n' % (ret, printable_path(path))) 240 return 241 assert os.path.isfile(path + '.0.png') 242 243def main(control_commitish): 244 assert os.pardir == '..' and '/' in [os.sep, os.altsep] 245 assert test_exe(NINJA) 246 assert test_exe(PDFIUM_TEST) 247 os.chdir(os.path.dirname(__file__) + '/../..') 248 control_worktree = checkout_worktree(control_commitish) 249 tmpdir = tempfile.mkdtemp(prefix='skpdf_') 250 exp = tmpdir + '/experim' 251 con = tmpdir + '/control' 252 build_and_run_dm(os.curdir, exp) 253 dm = build_and_run_dm(control_worktree, con) 254 image_diff_metric = build_skia(control_worktree, 'image_diff_metric') 255 256 out = sys.stdout 257 common_paths = get_common_paths([con, exp], '.pdf') 258 out.write('\nNumber of PDFs: %d\n\n' % len(common_paths)) 259 def compare_identical(path): 260 cpath, epath = (os.path.join(x, path) for x in (con, exp)) 261 if is_same(cpath, epath): 262 remove(cpath, epath) 263 return True 264 return False 265 identical_count = shardsum(compare_identical, common_paths) 266 out.write('Number of identical PDFs: %d\n\n' % identical_count) 267 268 differing_paths = get_common_paths([con, exp], '.pdf') 269 if not differing_paths: 270 out.write('All PDFs are the same!\n') 271 sys.exit(0) 272 out.write('Number of differing PDFs: %d\n' % len(differing_paths)) 273 for p in differing_paths: 274 out.write(' %s\n' % printable_path(tmpdir + '/*/' + p)) 275 out.write('\n') 276 shard(rasterize, 277 [os.path.join(x, p) for p in differing_paths for x in [con, exp]]) 278 279 common_pngs = get_common_paths([con, exp], '.pdf.0.png') 280 identical_count = shardsum(compare_identical, common_pngs) 281 out.write('Number of PDFs that rasterize the same: %d\n\n' 282 % identical_count) 283 284 differing_pngs = get_common_paths([con, exp], '.pdf.0.png') 285 if not differing_pngs: 286 out.write('All PDFs rasterize the same!\n') 287 sys.exit(0) 288 out.write('Number of PDFs that rasterize differently: %d\n' 289 % len(differing_pngs)) 290 for p in differing_pngs: 291 out.write(' %s\n' % printable_path(tmpdir + '/*/' + p)) 292 out.write('\n') 293 294 scores = dict() 295 def compare_differing_pngs(path): 296 cpath, epath = (os.path.join(x, path) for x in (con, exp)) 297 s = float(subprocess.check_output([image_diff_metric, cpath, epath])) 298 indicator = '.' if s < 0.001 else ':' if s < 0.01 else '!' 299 sys.stdout.write(indicator) 300 sys.stdout.flush() 301 scores[path] = s 302 shard(compare_differing_pngs, differing_pngs) 303 paths = sorted(scores.iterkeys(), key=lambda p: -scores[p]) 304 out.write('\n\n') 305 for p in paths: 306 pdfpath = printable_path(tmpdir + '/*/' + p.replace('.0.png', '')) 307 out.write(' %6.4f %s\n' % (scores[p], pdfpath)) 308 out.write('\n') 309 310 errors = [] 311 rc = re.compile('^' + PDF_CONFIG + r'/([^/]*)/([^/]*)\.pdf\.0\.png$') 312 for p in paths: 313 m = rc.match(p) 314 assert(m) 315 source, name = m.groups() 316 errors.append((source, name, scores[p])) 317 318 for source in SOURCES: 319 os.makedirs(os.path.join(con, REFERENCE_BACKEND, source)) 320 dm_args = [dm, '--src'] + SOURCES + [ 321 '--config', REFERENCE_BACKEND, '-w', con, '-m'] + [ 322 '^%s$' % name for _, name, _ in errors] 323 check_call(dm_args, cwd=control_worktree) 324 325 report = tmpdir + '/report.html' 326 with open(report, 'w') as o: 327 o.write(HTML_HEAD) 328 o.write('c="%s/";\n' % os.path.relpath(con, tmpdir)) 329 o.write('e="%s/";\n' % os.path.relpath(exp, tmpdir)) 330 o.write('z=[\n') 331 for source, name, score in errors: 332 gt = REFERENCE_BACKEND + '/' + source + '/' + name + '.png' 333 p = '%s/%s/%s.pdf.0.png' % (PDF_CONFIG, source, name) 334 desc = '%s | %s | %g' % (source, name, score) 335 o.write('["%s","%s","%s"],\n' % (p, gt, desc)) 336 o.write(HTML_TAIL) 337 out.write(printable_path(report) + '\n') 338 sysopen(report) 339 340if __name__ == '__main__': 341 if len(sys.argv) != 2: 342 USAGE = ('\nusage:\n {0} COMMIT_OR_BRANCH_TO_COMPARE_TO\n\n' 343 'e.g.:\n {0} HEAD\nor\n {0} HEAD~1\n\n') 344 sys.stderr.write(USAGE.format(sys.argv[0])) 345 sys.exit(1) 346 main(sys.argv[1]) 347