1# Copyright 2017 The PDFium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4"""Compares pairs of page images and generates an HTML to look at differences. 5""" 6 7import functools 8import glob 9import multiprocessing 10import os 11import re 12import subprocess 13import sys 14import webbrowser 15 16# pylint: disable=relative-import 17from common import DirectoryFinder 18 19 20def GenerateOneDiffParallel(image_comparison, image): 21 return image_comparison.GenerateOneDiff(image) 22 23 24class ImageComparison(object): 25 """Compares pairs of page images and generates an HTML to look at differences. 26 27 The images are all assumed to have the same name and be in two directories: 28 [output_path]/[two_labels[0]] and [output_path]/[two_labels[1]]. For example, 29 if output_path is "/tmp/images" and two_labels is ("before", "after"), 30 images in /tmp/images/before will be compared to /tmp/images/after. The HTML 31 produced will be in /tmp/images/compare.html and have relative links to these 32 images, so /tmp/images is self-contained and can be moved around or shared. 33 """ 34 35 def __init__(self, build_dir, output_path, two_labels, num_workers, 36 threshold_fraction): 37 """Constructor. 38 39 Args: 40 build_dir: Path to the build directory. 41 output_path: Path with the pngs and where the html will be created. 42 two_labels: Tuple of two strings that name the subdirectories in 43 output_path containing the images. 44 num_workers: Number of worker threads to start. 45 threshold_fraction: Minimum percentage (0.0 to 1.0) of pixels below which 46 an image is considered to have only small changes. They will not be 47 displayed on the HTML, only listed. 48 """ 49 self.build_dir = build_dir 50 self.output_path = output_path 51 self.two_labels = two_labels 52 self.num_workers = num_workers 53 self.threshold = threshold_fraction * 100 54 55 def Run(self, open_in_browser): 56 """Runs the comparison and generates an HTML with the results. 57 58 Returns: 59 Exit status. 60 """ 61 62 # Running a test defines a number of attributes on the fly. 63 # pylint: disable=attribute-defined-outside-init 64 65 if len(self.two_labels) != 2: 66 print >> sys.stderr, 'two_labels must be a tuple of length 2' 67 return 1 68 69 finder = DirectoryFinder(self.build_dir) 70 self.img_diff_bin = finder.ExecutablePath('pdfium_diff') 71 72 html_path = os.path.join(self.output_path, 'compare.html') 73 74 self.diff_path = os.path.join(self.output_path, 'diff') 75 if not os.path.exists(self.diff_path): 76 os.makedirs(self.diff_path) 77 78 self.image_locations = ImageLocations(self.output_path, self.diff_path, 79 self.two_labels) 80 81 difference = self._GenerateDiffs() 82 83 small_changes = [] 84 85 with open(html_path, 'w') as f: 86 f.write('<html><body>') 87 f.write('<table>') 88 for image in self.image_locations.Images(): 89 diff = difference[image] 90 if diff is None: 91 print >> sys.stderr, 'Failed to compare image %s' % image 92 elif diff > self.threshold: 93 self._WriteImageRows(f, image, diff) 94 else: 95 small_changes.append((image, diff)) 96 self._WriteSmallChanges(f, small_changes) 97 f.write('</table>') 98 f.write('</body></html>') 99 100 if open_in_browser: 101 webbrowser.open(html_path) 102 103 return 0 104 105 def _GenerateDiffs(self): 106 """Runs a diff over all pairs of page images, producing diff images. 107 108 As a side effect, the diff images will be saved to [output_path]/diff 109 with the same image name. 110 111 Returns: 112 A dict mapping image names to percentage of pixels changes. 113 """ 114 difference = {} 115 pool = multiprocessing.Pool(self.num_workers) 116 worker_func = functools.partial(GenerateOneDiffParallel, self) 117 118 try: 119 # The timeout is a workaround for http://bugs.python.org/issue8296 120 # which prevents KeyboardInterrupt from working. 121 one_year_in_seconds = 3600 * 24 * 365 122 worker_results = ( 123 pool.map_async( 124 worker_func, 125 self.image_locations.Images()).get(one_year_in_seconds)) 126 for worker_result in worker_results: 127 image, result = worker_result 128 difference[image] = result 129 except KeyboardInterrupt: 130 pool.terminate() 131 sys.exit(1) 132 else: 133 pool.close() 134 135 pool.join() 136 137 return difference 138 139 def GenerateOneDiff(self, image): 140 """Runs a diff over one pair of images, producing a diff image. 141 142 As a side effect, the diff image will be saved to [output_path]/diff 143 with the same image name. 144 145 Args: 146 image: Page image to compare. 147 148 Returns: 149 A tuple (image, diff), where image is the parameter and diff is the 150 percentage of pixels changed. 151 """ 152 try: 153 subprocess.check_output([ 154 self.img_diff_bin, 155 self.image_locations.Left(image), 156 self.image_locations.Right(image) 157 ]) 158 except subprocess.CalledProcessError as e: 159 percentage_change = float(re.findall(r'\d+\.\d+', e.output)[0]) 160 else: 161 return image, 0 162 163 try: 164 subprocess.check_output([ 165 self.img_diff_bin, '--diff', 166 self.image_locations.Left(image), 167 self.image_locations.Right(image), 168 self.image_locations.Diff(image) 169 ]) 170 except subprocess.CalledProcessError as e: 171 return image, percentage_change 172 else: 173 print >> sys.stderr, 'Warning: Should have failed the previous diff.' 174 return image, 0 175 176 def _GetRelativePath(self, absolute_path): 177 return os.path.relpath(absolute_path, start=self.output_path) 178 179 def _WriteImageRows(self, f, image, diff): 180 """Write table rows for a page image comparing its two versions. 181 182 Args: 183 f: Open HTML file to write to. 184 image: Image file name. 185 diff: Percentage of different pixels. 186 """ 187 f.write('<tr><td colspan="2">') 188 f.write('%s (%.4f%% changed)' % (image, diff)) 189 f.write('</td></tr>') 190 191 f.write('<tr>') 192 self._WritePageCompareTd( 193 f, self._GetRelativePath(self.image_locations.Left(image)), 194 self._GetRelativePath(self.image_locations.Right(image))) 195 self._WritePageTd(f, self._GetRelativePath( 196 self.image_locations.Diff(image))) 197 f.write('</tr>') 198 199 def _WritePageTd(self, f, image_path): 200 """Write table column with a single image. 201 202 Args: 203 f: Open HTML file to write to. 204 image_path: Path to image file. 205 """ 206 f.write('<td>') 207 f.write('<img src="%s">' % image_path) 208 f.write('</td>') 209 210 def _WritePageCompareTd(self, f, normal_image_path, hover_image_path): 211 """Write table column for an image comparing its two versions. 212 213 Args: 214 f: Open HTML file to write to. 215 normal_image_path: Path to image to be used in the "normal" state. 216 hover_image_path: Path to image to be used in the "hover" state. 217 """ 218 f.write('<td>') 219 f.write('<img src="%s" ' 220 'onmouseover="this.src=\'%s\';" ' 221 'onmouseout="this.src=\'%s\';">' % 222 (normal_image_path, hover_image_path, normal_image_path)) 223 f.write('</td>') 224 225 def _WriteSmallChanges(self, f, small_changes): 226 """Write table rows for all images considered to have only small changes. 227 228 Args: 229 f: Open HTML file to write to. 230 small_changes: List of (image, change) tuples, where image is the page 231 image and change is the percentage of pixels changed. 232 """ 233 for image, change in small_changes: 234 f.write('<tr><td colspan="2">') 235 if not change: 236 f.write('No change for: %s' % image) 237 else: 238 f.write('Small change of %.4f%% for: %s' % (change, image)) 239 f.write('</td></tr>') 240 241 242class ImageLocations(object): 243 """Contains the locations of input and output image files. 244 """ 245 246 def __init__(self, output_path, diff_path, two_labels): 247 """Constructor. 248 249 Args: 250 output_path: Path to directory with the pngs. 251 diff_path: Path to directory where the diffs will be generated. 252 two_labels: Tuple of two strings that name the subdirectories in 253 output_path containing the images. 254 """ 255 self.output_path = output_path 256 self.diff_path = diff_path 257 self.two_labels = two_labels 258 259 self.left = self._FindImages(self.two_labels[0]) 260 self.right = self._FindImages(self.two_labels[1]) 261 262 self.images = list(self.left.viewkeys() & self.right.viewkeys()) 263 264 # Sort by pdf filename, then page number 265 def KeyFn(s): 266 pieces = s.rsplit('.', 2) 267 return (pieces[0], int(pieces[1])) 268 269 self.images.sort(key=KeyFn) 270 self.diff = { 271 image: os.path.join(self.diff_path, image) for image in self.images 272 } 273 274 def _FindImages(self, label): 275 """Traverses a dir and builds a dict of all page images to compare in it. 276 277 Args: 278 label: name of subdirectory of output_path to traverse. 279 280 Returns: 281 Dict mapping page image names to the path of the image file. 282 """ 283 image_path_matcher = os.path.join(self.output_path, label, '*.*.png') 284 image_paths = glob.glob(image_path_matcher) 285 286 image_dict = { 287 os.path.split(image_path)[1]: image_path for image_path in image_paths 288 } 289 290 return image_dict 291 292 def Images(self): 293 """Returns a list of all page images present in both directories.""" 294 return self.images 295 296 def Left(self, test_case): 297 """Returns the path for a page image in the first subdirectory.""" 298 return self.left[test_case] 299 300 def Right(self, test_case): 301 """Returns the path for a page image in the second subdirectory.""" 302 return self.right[test_case] 303 304 def Diff(self, test_case): 305 """Returns the path for a page diff image.""" 306 return self.diff[test_case] 307