1#!/usr/bin/env python 2# Copyright 2019 Google LLC. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6''' 7This tool compares the PDF output of Skia's DM tool of two commits. 8 9It relies on pdfium_test being in the PATH. To build: 10 11mkdir -p ~/src/pdfium 12cd ~/src/pdfium 13gclient config --unmanaged https://pdfium.googlesource.com/pdfium.git 14gclient sync 15cd pdfium 16gn gen out/default --args='pdf_enable_xfa=false pdf_enable_v8=false pdf_is_standalone=true' 17ninja -C out/default pdfium_test 18cp out/default/pdfium_test ~/bin/ 19''' 20 21import os 22import re 23import shutil 24import subprocess 25import sys 26import tempfile 27import threading 28 29EXTRA_GN_ARGS = os.environ.get('PDF_COMPARISON_GN_ARGS', '') 30 31REFERENCE_BACKEND = 'gl' if 'PDF_COMPARISON_NOGPU' not in os.environ else '8888' 32 33DPI = float(os.environ.get('PDF_COMPARISON_DPI', 72)) 34 35PDF_CONFIG = 'pdf' if 'PDF_COMPARISON_300DPI' not in os.environ else 'pdf300' 36 37BAD_TESTS = [ 38 'image-cacherator-from-picture', 39 'image-cacherator-from-raster', 40 'mixershader', 41 'shadermaskfilter_image', 42 'tilemode_decal', 43] 44 45NINJA = 'ninja' 46 47PDFIUM_TEST = 'pdfium_test' 48 49NUM_THREADS = int(os.environ.get('PDF_COMPARISON_THREADS', 40)) 50 51SOURCES = ['gm'] 52 53def test_exe(cmd): 54 with open(os.devnull, 'w') as o: 55 try: 56 subprocess.call([cmd], stdout=o, stderr=o) 57 except OSError: 58 return False 59 return True 60 61def print_cmd(cmd, o): 62 m = re.compile('[^A-Za-z0-9_./-]') 63 o.write('+ ') 64 for c in cmd: 65 if m.search(c) is not None: 66 o.write(repr(c) + ' ') 67 else: 68 o.write(c + ' ') 69 o.write('\n') 70 o.flush() 71 72def check_call(cmd, **kwargs): 73 print_cmd(cmd, sys.stdout) 74 return subprocess.check_call(cmd, **kwargs) 75 76def check_output(cmd, **kwargs): 77 print_cmd(cmd, sys.stdout) 78 return subprocess.check_output(cmd, **kwargs) 79 80def remove(*paths): 81 for path in paths: 82 os.remove(path) 83 84def timeout(deadline, cmd): 85 #print_cmd(cmd, sys.stdout) 86 with open(os.devnull, 'w') as o: 87 proc = subprocess.Popen(cmd, stdout=o, stderr=subprocess.STDOUT) 88 timer = threading.Timer(deadline, proc.terminate) 89 timer.start() 90 proc.wait() 91 timer.cancel() 92 return proc.returncode 93 94def is_same(path1, path2): 95 if not os.path.isfile(path1) or not os.path.isfile(path2): 96 return os.path.isfile(path1) == os.path.isfile(path2) 97 with open(path1, 'rb') as f1: 98 with open(path2, 'rb') as f2: 99 while True: 100 c1, c2 = f1.read(4096), f2.read(4096) 101 if c1 != c2: 102 return False 103 if not c1: 104 return True 105 106 107def getfilesoftype(directory, ending): 108 for dirpath, _, filenames in os.walk(directory): 109 rp = os.path.normpath(os.path.relpath(dirpath, directory)) 110 for f in filenames: 111 if f.endswith(ending): 112 yield os.path.join(rp, f) 113 114def get_common_paths(dirs, ext): 115 return sorted(list( 116 set.intersection(*(set(getfilesoftype(d, ext)) for d in dirs)))) 117 118def printable_path(d): 119 if 'TMPDIR' in os.environ: 120 return d.replace(os.path.normpath(os.environ['TMPDIR']) + '/', '$TMPDIR/') 121 return d 122 123def spawn(cmd): 124 with open(os.devnull, 'w') as o: 125 subprocess.Popen(cmd, stdout=o, stderr=o) 126 127def sysopen(arg): 128 plat = sys.platform 129 if plat.startswith('darwin'): 130 spawn(["open", arg]) 131 elif plat.startswith('win'): 132 # pylint: disable=no-member 133 os.startfile(arg) 134 else: 135 spawn(["xdg-open", arg]) 136 137HTML_HEAD = ''' 138<!DOCTYPE html> 139<html lang="en"> 140<head> 141<meta charset="utf-8"> 142<title>DIFF</title> 143<style> 144body{ 145background-size:16px 16px; 146background-color:rgb(230,230,230); 147background-image: 148linear-gradient(45deg,rgba(255,255,255,.2) 25%,transparent 25%,transparent 50%, 149rgba(255,255,255,.2) 50%,rgba(255,255,255,.2) 75%,transparent 75%,transparent)} 150div.r{position:relative;left:0;top:0} 151table{table-layout:fixed;width:100%} 152img.s{max-width:100%;max-height:320;left:0;top:0} 153img.b{position:absolute;mix-blend-mode:difference} 154</style> 155<script> 156function r(c,e,n,g){ 157t=document.getElementById("t"); 158function ce(t){return document.createElement(t);} 159function ct(n){return document.createTextNode(n);} 160function ac(u,v){u.appendChild(v);} 161function cn(u,v){u.className=v;} 162function it(s){ td=ce("td"); a=ce("a"); a.href=s; img=ce("img"); img.src=s; 163 cn(img,"s"); ac(a,img); ac(td,a); return td; } 164tr=ce("tr"); td=ce("td"); td.colSpan="4"; ac(td, ct(n)); ac(tr,td); 165ac(t,tr); tr=ce("tr"); td=ce("td"); dv=ce("div"); cn(dv,"r"); 166img=ce("img"); img.src=c; cn(img,"s"); ac(dv,img); img=ce("img"); 167img.src=e; cn(img,"s b"); ac(dv,img); ac(td,dv); ac(tr,td); 168ac(tr,it(c)); ac(tr,it(e)); ac(tr,it(g)); ac(t,tr); } 169document.addEventListener('DOMContentLoaded',function(){ 170''' 171 172HTML_TAIL = ''']; 173for(i=0;i<z.length;i++){ 174r(c+z[i][0],e+z[i][0],z[i][2],c+z[i][1]);}},false); 175</script></head><body><table id="t"> 176<tr><th>BEFORE-AFTER DIFF</th> 177<th>BEFORE</th><th>AFTER</th> 178<th>REFERENCE</th></tr> 179</table></body></html>''' 180 181def shard(fn, arglist): 182 jobs = [[arg for j, arg in enumerate(arglist) if j % NUM_THREADS == i] 183 for i in range(NUM_THREADS)] 184 results = [] 185 def do_shard(*args): 186 for arg in args: 187 results.append(fn(arg)) 188 thread_list = [] 189 for job in jobs: 190 t = threading.Thread(target=do_shard, args=job) 191 t.start() 192 thread_list += [t] 193 for t in thread_list: 194 t.join() 195 return results 196 197def shardsum(fn, arglist): 198 'return the number of True results returned by fn(arg) for arg in arglist.' 199 return sum(1 for result in shard(fn, arglist) if result) 200 201def checkout_worktree(checkoutable): 202 directory = os.path.join(tempfile.gettempdir(), 'skpdf_control_tree') 203 commit = check_output(['git', 'rev-parse', checkoutable]).strip() 204 if os.path.isdir(directory): 205 try: 206 check_call(['git', 'checkout', commit], cwd=directory) 207 return directory 208 except subprocess.CalledProcessError: 209 shutil.rmtree(directory) 210 check_call(['git', 'worktree', 'add', '-f', directory, commit]) 211 return directory 212 213def build_skia(directory, executable): 214 args = ('--args=is_debug=false' 215 ' extra_cflags=["-DSK_PDF_LESS_COMPRESSION",' 216 ' "-DSK_PDF_BASE85_BINARY"] ') 217 if test_exe('ccache'): 218 args += ' cc_wrapper="ccache"' 219 args += EXTRA_GN_ARGS 220 build_dir = directory + '/out/pdftest' 221 check_call([sys.executable, 'bin/sync'], cwd=directory) 222 check_call([directory + '/bin/gn', 'gen', 'out/pdftest', args], 223 cwd=directory) 224 check_call([NINJA, executable], cwd=build_dir) 225 return os.path.join(build_dir, executable) 226 227def build_and_run_dm(directory, data_dir): 228 dm = build_skia(directory, 'dm') 229 for source in SOURCES: 230 os.makedirs(os.path.join(data_dir, PDF_CONFIG, source)) 231 dm_args = [dm, '--src'] + SOURCES + ['--config', PDF_CONFIG, '-w', data_dir] 232 if BAD_TESTS: 233 dm_args += ['-m'] + ['~^%s$' % x for x in BAD_TESTS] 234 check_call(dm_args, cwd=directory) 235 return dm 236 237def rasterize(path): 238 ret = timeout(30, [PDFIUM_TEST, '--png', '--scale=%g' % (DPI / 72.0), path]) 239 if ret != 0: 240 sys.stdout.write( 241 '\nTIMEOUT OR ERROR [%d] "%s"\n' % (ret, printable_path(path))) 242 return 243 assert os.path.isfile(path + '.0.png') 244 245def main(control_commitish): 246 assert os.pardir == '..' and '/' in [os.sep, os.altsep] 247 assert test_exe(NINJA) 248 assert test_exe(PDFIUM_TEST) 249 os.chdir(os.path.dirname(__file__) + '/../..') 250 control_worktree = checkout_worktree(control_commitish) 251 tmpdir = tempfile.mkdtemp(prefix='skpdf_') 252 exp = tmpdir + '/experim' 253 con = tmpdir + '/control' 254 build_and_run_dm(os.curdir, exp) 255 dm = build_and_run_dm(control_worktree, con) 256 image_diff_metric = build_skia(control_worktree, 'image_diff_metric') 257 258 out = sys.stdout 259 common_paths = get_common_paths([con, exp], '.pdf') 260 out.write('\nNumber of PDFs: %d\n\n' % len(common_paths)) 261 def compare_identical(path): 262 cpath, epath = (os.path.join(x, path) for x in (con, exp)) 263 if is_same(cpath, epath): 264 remove(cpath, epath) 265 return True 266 return False 267 identical_count = shardsum(compare_identical, common_paths) 268 out.write('Number of identical PDFs: %d\n\n' % identical_count) 269 270 differing_paths = get_common_paths([con, exp], '.pdf') 271 if not differing_paths: 272 out.write('All PDFs are the same!\n') 273 sys.exit(0) 274 out.write('Number of differing PDFs: %d\n' % len(differing_paths)) 275 for p in differing_paths: 276 out.write(' %s\n' % printable_path(tmpdir + '/*/' + p)) 277 out.write('\n') 278 shard(rasterize, 279 [os.path.join(x, p) for p in differing_paths for x in [con, exp]]) 280 281 common_pngs = get_common_paths([con, exp], '.pdf.0.png') 282 identical_count = shardsum(compare_identical, common_pngs) 283 out.write('Number of PDFs that rasterize the same: %d\n\n' 284 % identical_count) 285 286 differing_pngs = get_common_paths([con, exp], '.pdf.0.png') 287 if not differing_pngs: 288 out.write('All PDFs rasterize the same!\n') 289 sys.exit(0) 290 out.write('Number of PDFs that rasterize differently: %d\n' 291 % len(differing_pngs)) 292 for p in differing_pngs: 293 out.write(' %s\n' % printable_path(tmpdir + '/*/' + p)) 294 out.write('\n') 295 296 scores = dict() 297 def compare_differing_pngs(path): 298 cpath, epath = (os.path.join(x, path) for x in (con, exp)) 299 s = float(subprocess.check_output([image_diff_metric, cpath, epath])) 300 indicator = '.' if s < 0.001 else ':' if s < 0.01 else '!' 301 sys.stdout.write(indicator) 302 sys.stdout.flush() 303 scores[path] = s 304 shard(compare_differing_pngs, differing_pngs) 305 paths = sorted(scores.iterkeys(), key=lambda p: -scores[p]) 306 out.write('\n\n') 307 for p in paths: 308 pdfpath = printable_path(tmpdir + '/*/' + p.replace('.0.png', '')) 309 out.write(' %6.4f %s\n' % (scores[p], pdfpath)) 310 out.write('\n') 311 312 errors = [] 313 rc = re.compile('^' + PDF_CONFIG + r'/([^/]*)/([^/]*)\.pdf\.0\.png$') 314 for p in paths: 315 m = rc.match(p) 316 assert(m) 317 source, name = m.groups() 318 errors.append((source, name, scores[p])) 319 320 for source in SOURCES: 321 os.makedirs(os.path.join(con, REFERENCE_BACKEND, source)) 322 dm_args = [dm, '--src'] + SOURCES + [ 323 '--config', REFERENCE_BACKEND, '-w', con, '-m'] + [ 324 '^%s$' % name for _, name, _ in errors] 325 check_call(dm_args, cwd=control_worktree) 326 327 report = tmpdir + '/report.html' 328 with open(report, 'w') as o: 329 o.write(HTML_HEAD) 330 o.write('c="%s/";\n' % os.path.relpath(con, tmpdir)) 331 o.write('e="%s/";\n' % os.path.relpath(exp, tmpdir)) 332 o.write('z=[\n') 333 for source, name, score in errors: 334 gt = REFERENCE_BACKEND + '/' + source + '/' + name + '.png' 335 p = '%s/%s/%s.pdf.0.png' % (PDF_CONFIG, source, name) 336 desc = '%s | %s | %g' % (source, name, score) 337 o.write('["%s","%s","%s"],\n' % (p, gt, desc)) 338 o.write(HTML_TAIL) 339 out.write(printable_path(report) + '\n') 340 sysopen(report) 341 342if __name__ == '__main__': 343 if len(sys.argv) != 2: 344 USAGE = ('\nusage:\n {0} COMMIT_OR_BRANCH_TO_COMPARE_TO\n\n' 345 'e.g.:\n {0} HEAD\nor\n {0} HEAD~1\n\n') 346 sys.stderr.write(USAGE.format(sys.argv[0])) 347 sys.exit(1) 348 main(sys.argv[1]) 349