• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2017 The PDFium Authors
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4"""Compares pairs of page images and generates an HTML to look at differences.
5"""
6
7import functools
8import glob
9import multiprocessing
10import os
11import re
12import subprocess
13import sys
14import webbrowser
15
16from common import DirectoryFinder
17
18
19def GenerateOneDiffParallel(image_comparison, image):
20  return image_comparison.GenerateOneDiff(image)
21
22
23class ImageComparison:
24  """Compares pairs of page images and generates an HTML to look at differences.
25
26  The images are all assumed to have the same name and be in two directories:
27  [output_path]/[two_labels[0]] and [output_path]/[two_labels[1]]. For example,
28  if output_path is "/tmp/images" and two_labels is ("before", "after"),
29  images in /tmp/images/before will be compared to /tmp/images/after. The HTML
30  produced will be in /tmp/images/compare.html and have relative links to these
31  images, so /tmp/images is self-contained and can be moved around or shared.
32  """
33
34  def __init__(self, build_dir, output_path, two_labels, num_workers,
35               threshold_fraction):
36    """Constructor.
37
38    Args:
39      build_dir: Path to the build directory.
40      output_path: Path with the pngs and where the html will be created.
41      two_labels: Tuple of two strings that name the subdirectories in
42          output_path containing the images.
43      num_workers: Number of worker threads to start.
44      threshold_fraction: Minimum percentage (0.0 to 1.0) of pixels below which
45          an image is considered to have only small changes. They will not be
46          displayed on the HTML, only listed.
47    """
48    self.build_dir = build_dir
49    self.output_path = output_path
50    self.two_labels = two_labels
51    self.num_workers = num_workers
52    self.threshold = threshold_fraction * 100
53
54  def Run(self, open_in_browser):
55    """Runs the comparison and generates an HTML with the results.
56
57    Returns:
58        Exit status.
59    """
60
61    # Running a test defines a number of attributes on the fly.
62    # pylint: disable=attribute-defined-outside-init
63
64    if len(self.two_labels) != 2:
65      print('two_labels must be a tuple of length 2', file=sys.stderr)
66      return 1
67
68    finder = DirectoryFinder(self.build_dir)
69    self.img_diff_bin = finder.ExecutablePath('pdfium_diff')
70
71    html_path = os.path.join(self.output_path, 'compare.html')
72
73    self.diff_path = os.path.join(self.output_path, 'diff')
74    if not os.path.exists(self.diff_path):
75      os.makedirs(self.diff_path)
76
77    self.image_locations = ImageLocations(self.output_path, self.diff_path,
78                                          self.two_labels)
79
80    difference = self._GenerateDiffs()
81
82    small_changes = []
83
84    with open(html_path, 'w') as f:
85      f.write('<html><body>')
86      f.write('<table>')
87      for image in self.image_locations.Images():
88        diff = difference[image]
89        if diff is None:
90          print('Failed to compare image %s' % image, file=sys.stderr)
91        elif diff > self.threshold:
92          self._WriteImageRows(f, image, diff)
93        else:
94          small_changes.append((image, diff))
95      self._WriteSmallChanges(f, small_changes)
96      f.write('</table>')
97      f.write('</body></html>')
98
99    if open_in_browser:
100      webbrowser.open(html_path)
101
102    return 0
103
104  def _GenerateDiffs(self):
105    """Runs a diff over all pairs of page images, producing diff images.
106
107    As a side effect, the diff images will be saved to [output_path]/diff
108    with the same image name.
109
110    Returns:
111      A dict mapping image names to percentage of pixels changes.
112    """
113    difference = {}
114    pool = multiprocessing.Pool(self.num_workers)
115    worker_func = functools.partial(GenerateOneDiffParallel, self)
116
117    try:
118      # The timeout is a workaround for http://bugs.python.org/issue8296
119      # which prevents KeyboardInterrupt from working.
120      one_year_in_seconds = 3600 * 24 * 365
121      worker_results = (
122          pool.map_async(
123              worker_func,
124              self.image_locations.Images()).get(one_year_in_seconds))
125      for worker_result in worker_results:
126        image, result = worker_result
127        difference[image] = result
128    except KeyboardInterrupt:
129      pool.terminate()
130      sys.exit(1)
131    else:
132      pool.close()
133
134    pool.join()
135
136    return difference
137
138  def GenerateOneDiff(self, image):
139    """Runs a diff over one pair of images, producing a diff image.
140
141    As a side effect, the diff image will be saved to [output_path]/diff
142    with the same image name.
143
144    Args:
145      image: Page image to compare.
146
147    Returns:
148      A tuple (image, diff), where image is the parameter and diff is the
149      percentage of pixels changed.
150    """
151    try:
152      subprocess.check_output([
153          self.img_diff_bin,
154          self.image_locations.Left(image),
155          self.image_locations.Right(image)
156      ])
157    except subprocess.CalledProcessError as e:
158      percentage_change = float(re.findall(r'\d+\.\d+', e.output)[0])
159    else:
160      return image, 0
161
162    try:
163      subprocess.check_output([
164          self.img_diff_bin, '--diff',
165          self.image_locations.Left(image),
166          self.image_locations.Right(image),
167          self.image_locations.Diff(image)
168      ])
169    except subprocess.CalledProcessError as e:
170      return image, percentage_change
171    else:
172      print('Warning: Should have failed the previous diff.', file=sys.stderr)
173      return image, 0
174
175  def _GetRelativePath(self, absolute_path):
176    return os.path.relpath(absolute_path, start=self.output_path)
177
178  def _WriteImageRows(self, f, image, diff):
179    """Write table rows for a page image comparing its two versions.
180
181    Args:
182      f: Open HTML file to write to.
183      image: Image file name.
184      diff: Percentage of different pixels.
185    """
186    f.write('<tr><td colspan="2">')
187    f.write('%s (%.4f%% changed)' % (image, diff))
188    f.write('</td></tr>')
189
190    f.write('<tr>')
191    self._WritePageCompareTd(
192        f, self._GetRelativePath(self.image_locations.Left(image)),
193        self._GetRelativePath(self.image_locations.Right(image)))
194    self._WritePageTd(f, self._GetRelativePath(
195        self.image_locations.Diff(image)))
196    f.write('</tr>')
197
198  def _WritePageTd(self, f, image_path):
199    """Write table column with a single image.
200
201    Args:
202      f: Open HTML file to write to.
203      image_path: Path to image file.
204    """
205    f.write('<td>')
206    f.write('<img src="%s">' % image_path)
207    f.write('</td>')
208
209  def _WritePageCompareTd(self, f, normal_image_path, hover_image_path):
210    """Write table column for an image comparing its two versions.
211
212    Args:
213      f: Open HTML file to write to.
214      normal_image_path: Path to image to be used in the "normal" state.
215      hover_image_path: Path to image to be used in the "hover" state.
216    """
217    f.write('<td>')
218    f.write('<img src="%s" '
219            'onmouseover="this.src=\'%s\';" '
220            'onmouseout="this.src=\'%s\';">' %
221            (normal_image_path, hover_image_path, normal_image_path))
222    f.write('</td>')
223
224  def _WriteSmallChanges(self, f, small_changes):
225    """Write table rows for all images considered to have only small changes.
226
227    Args:
228      f: Open HTML file to write to.
229      small_changes: List of (image, change) tuples, where image is the page
230          image and change is the percentage of pixels changed.
231    """
232    for image, change in small_changes:
233      f.write('<tr><td colspan="2">')
234      if not change:
235        f.write('No change for: %s' % image)
236      else:
237        f.write('Small change of %.4f%% for: %s' % (change, image))
238      f.write('</td></tr>')
239
240
241class ImageLocations:
242  """Contains the locations of input and output image files.
243  """
244
245  def __init__(self, output_path, diff_path, two_labels):
246    """Constructor.
247
248    Args:
249      output_path: Path to directory with the pngs.
250      diff_path: Path to directory where the diffs will be generated.
251      two_labels: Tuple of two strings that name the subdirectories in
252          output_path containing the images.
253    """
254    self.output_path = output_path
255    self.diff_path = diff_path
256    self.two_labels = two_labels
257
258    self.left = self._FindImages(self.two_labels[0])
259    self.right = self._FindImages(self.two_labels[1])
260
261    self.images = list(self.left.viewkeys() & self.right.viewkeys())
262
263    # Sort by pdf filename, then page number
264    def KeyFn(s):
265      pieces = s.rsplit('.', 2)
266      return (pieces[0], int(pieces[1]))
267
268    self.images.sort(key=KeyFn)
269    self.diff = {
270        image: os.path.join(self.diff_path, image) for image in self.images
271    }
272
273  def _FindImages(self, label):
274    """Traverses a dir and builds a dict of all page images to compare in it.
275
276    Args:
277      label: name of subdirectory of output_path to traverse.
278
279    Returns:
280      Dict mapping page image names to the path of the image file.
281    """
282    image_path_matcher = os.path.join(self.output_path, label, '*.*.png')
283    image_paths = glob.glob(image_path_matcher)
284
285    image_dict = {
286        os.path.split(image_path)[1]: image_path for image_path in image_paths
287    }
288
289    return image_dict
290
291  def Images(self):
292    """Returns a list of all page images present in both directories."""
293    return self.images
294
295  def Left(self, test_case):
296    """Returns the path for a page image in the first subdirectory."""
297    return self.left[test_case]
298
299  def Right(self, test_case):
300    """Returns the path for a page image in the second subdirectory."""
301    return self.right[test_case]
302
303  def Diff(self, test_case):
304    """Returns the path for a page diff image."""
305    return self.diff[test_case]
306