1# Copyright 2017 The PDFium Authors 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4"""Compares pairs of page images and generates an HTML to look at differences. 5""" 6 7import functools 8import glob 9import multiprocessing 10import os 11import re 12import subprocess 13import sys 14import webbrowser 15 16from common import DirectoryFinder 17 18 19def GenerateOneDiffParallel(image_comparison, image): 20 return image_comparison.GenerateOneDiff(image) 21 22 23class ImageComparison: 24 """Compares pairs of page images and generates an HTML to look at differences. 25 26 The images are all assumed to have the same name and be in two directories: 27 [output_path]/[two_labels[0]] and [output_path]/[two_labels[1]]. For example, 28 if output_path is "/tmp/images" and two_labels is ("before", "after"), 29 images in /tmp/images/before will be compared to /tmp/images/after. The HTML 30 produced will be in /tmp/images/compare.html and have relative links to these 31 images, so /tmp/images is self-contained and can be moved around or shared. 32 """ 33 34 def __init__(self, build_dir, output_path, two_labels, num_workers, 35 threshold_fraction): 36 """Constructor. 37 38 Args: 39 build_dir: Path to the build directory. 40 output_path: Path with the pngs and where the html will be created. 41 two_labels: Tuple of two strings that name the subdirectories in 42 output_path containing the images. 43 num_workers: Number of worker threads to start. 44 threshold_fraction: Minimum percentage (0.0 to 1.0) of pixels below which 45 an image is considered to have only small changes. They will not be 46 displayed on the HTML, only listed. 47 """ 48 self.build_dir = build_dir 49 self.output_path = output_path 50 self.two_labels = two_labels 51 self.num_workers = num_workers 52 self.threshold = threshold_fraction * 100 53 54 def Run(self, open_in_browser): 55 """Runs the comparison and generates an HTML with the results. 56 57 Returns: 58 Exit status. 59 """ 60 61 # Running a test defines a number of attributes on the fly. 62 # pylint: disable=attribute-defined-outside-init 63 64 if len(self.two_labels) != 2: 65 print('two_labels must be a tuple of length 2', file=sys.stderr) 66 return 1 67 68 finder = DirectoryFinder(self.build_dir) 69 self.img_diff_bin = finder.ExecutablePath('pdfium_diff') 70 71 html_path = os.path.join(self.output_path, 'compare.html') 72 73 self.diff_path = os.path.join(self.output_path, 'diff') 74 if not os.path.exists(self.diff_path): 75 os.makedirs(self.diff_path) 76 77 self.image_locations = ImageLocations(self.output_path, self.diff_path, 78 self.two_labels) 79 80 difference = self._GenerateDiffs() 81 82 small_changes = [] 83 84 with open(html_path, 'w') as f: 85 f.write('<html><body>') 86 f.write('<table>') 87 for image in self.image_locations.Images(): 88 diff = difference[image] 89 if diff is None: 90 print('Failed to compare image %s' % image, file=sys.stderr) 91 elif diff > self.threshold: 92 self._WriteImageRows(f, image, diff) 93 else: 94 small_changes.append((image, diff)) 95 self._WriteSmallChanges(f, small_changes) 96 f.write('</table>') 97 f.write('</body></html>') 98 99 if open_in_browser: 100 webbrowser.open(html_path) 101 102 return 0 103 104 def _GenerateDiffs(self): 105 """Runs a diff over all pairs of page images, producing diff images. 106 107 As a side effect, the diff images will be saved to [output_path]/diff 108 with the same image name. 109 110 Returns: 111 A dict mapping image names to percentage of pixels changes. 112 """ 113 difference = {} 114 pool = multiprocessing.Pool(self.num_workers) 115 worker_func = functools.partial(GenerateOneDiffParallel, self) 116 117 try: 118 # The timeout is a workaround for http://bugs.python.org/issue8296 119 # which prevents KeyboardInterrupt from working. 120 one_year_in_seconds = 3600 * 24 * 365 121 worker_results = ( 122 pool.map_async( 123 worker_func, 124 self.image_locations.Images()).get(one_year_in_seconds)) 125 for worker_result in worker_results: 126 image, result = worker_result 127 difference[image] = result 128 except KeyboardInterrupt: 129 pool.terminate() 130 sys.exit(1) 131 else: 132 pool.close() 133 134 pool.join() 135 136 return difference 137 138 def GenerateOneDiff(self, image): 139 """Runs a diff over one pair of images, producing a diff image. 140 141 As a side effect, the diff image will be saved to [output_path]/diff 142 with the same image name. 143 144 Args: 145 image: Page image to compare. 146 147 Returns: 148 A tuple (image, diff), where image is the parameter and diff is the 149 percentage of pixels changed. 150 """ 151 try: 152 subprocess.check_output([ 153 self.img_diff_bin, 154 self.image_locations.Left(image), 155 self.image_locations.Right(image) 156 ]) 157 except subprocess.CalledProcessError as e: 158 percentage_change = float(re.findall(r'\d+\.\d+', e.output)[0]) 159 else: 160 return image, 0 161 162 try: 163 subprocess.check_output([ 164 self.img_diff_bin, '--diff', 165 self.image_locations.Left(image), 166 self.image_locations.Right(image), 167 self.image_locations.Diff(image) 168 ]) 169 except subprocess.CalledProcessError as e: 170 return image, percentage_change 171 else: 172 print('Warning: Should have failed the previous diff.', file=sys.stderr) 173 return image, 0 174 175 def _GetRelativePath(self, absolute_path): 176 return os.path.relpath(absolute_path, start=self.output_path) 177 178 def _WriteImageRows(self, f, image, diff): 179 """Write table rows for a page image comparing its two versions. 180 181 Args: 182 f: Open HTML file to write to. 183 image: Image file name. 184 diff: Percentage of different pixels. 185 """ 186 f.write('<tr><td colspan="2">') 187 f.write('%s (%.4f%% changed)' % (image, diff)) 188 f.write('</td></tr>') 189 190 f.write('<tr>') 191 self._WritePageCompareTd( 192 f, self._GetRelativePath(self.image_locations.Left(image)), 193 self._GetRelativePath(self.image_locations.Right(image))) 194 self._WritePageTd(f, self._GetRelativePath( 195 self.image_locations.Diff(image))) 196 f.write('</tr>') 197 198 def _WritePageTd(self, f, image_path): 199 """Write table column with a single image. 200 201 Args: 202 f: Open HTML file to write to. 203 image_path: Path to image file. 204 """ 205 f.write('<td>') 206 f.write('<img src="%s">' % image_path) 207 f.write('</td>') 208 209 def _WritePageCompareTd(self, f, normal_image_path, hover_image_path): 210 """Write table column for an image comparing its two versions. 211 212 Args: 213 f: Open HTML file to write to. 214 normal_image_path: Path to image to be used in the "normal" state. 215 hover_image_path: Path to image to be used in the "hover" state. 216 """ 217 f.write('<td>') 218 f.write('<img src="%s" ' 219 'onmouseover="this.src=\'%s\';" ' 220 'onmouseout="this.src=\'%s\';">' % 221 (normal_image_path, hover_image_path, normal_image_path)) 222 f.write('</td>') 223 224 def _WriteSmallChanges(self, f, small_changes): 225 """Write table rows for all images considered to have only small changes. 226 227 Args: 228 f: Open HTML file to write to. 229 small_changes: List of (image, change) tuples, where image is the page 230 image and change is the percentage of pixels changed. 231 """ 232 for image, change in small_changes: 233 f.write('<tr><td colspan="2">') 234 if not change: 235 f.write('No change for: %s' % image) 236 else: 237 f.write('Small change of %.4f%% for: %s' % (change, image)) 238 f.write('</td></tr>') 239 240 241class ImageLocations: 242 """Contains the locations of input and output image files. 243 """ 244 245 def __init__(self, output_path, diff_path, two_labels): 246 """Constructor. 247 248 Args: 249 output_path: Path to directory with the pngs. 250 diff_path: Path to directory where the diffs will be generated. 251 two_labels: Tuple of two strings that name the subdirectories in 252 output_path containing the images. 253 """ 254 self.output_path = output_path 255 self.diff_path = diff_path 256 self.two_labels = two_labels 257 258 self.left = self._FindImages(self.two_labels[0]) 259 self.right = self._FindImages(self.two_labels[1]) 260 261 self.images = list(self.left.viewkeys() & self.right.viewkeys()) 262 263 # Sort by pdf filename, then page number 264 def KeyFn(s): 265 pieces = s.rsplit('.', 2) 266 return (pieces[0], int(pieces[1])) 267 268 self.images.sort(key=KeyFn) 269 self.diff = { 270 image: os.path.join(self.diff_path, image) for image in self.images 271 } 272 273 def _FindImages(self, label): 274 """Traverses a dir and builds a dict of all page images to compare in it. 275 276 Args: 277 label: name of subdirectory of output_path to traverse. 278 279 Returns: 280 Dict mapping page image names to the path of the image file. 281 """ 282 image_path_matcher = os.path.join(self.output_path, label, '*.*.png') 283 image_paths = glob.glob(image_path_matcher) 284 285 image_dict = { 286 os.path.split(image_path)[1]: image_path for image_path in image_paths 287 } 288 289 return image_dict 290 291 def Images(self): 292 """Returns a list of all page images present in both directories.""" 293 return self.images 294 295 def Left(self, test_case): 296 """Returns the path for a page image in the first subdirectory.""" 297 return self.left[test_case] 298 299 def Right(self, test_case): 300 """Returns the path for a page image in the second subdirectory.""" 301 return self.right[test_case] 302 303 def Diff(self, test_case): 304 """Returns the path for a page diff image.""" 305 return self.diff[test_case] 306