• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# Copyright 2015 The PDFium Authors
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6from dataclasses import dataclass
7import itertools
8import os
9import shutil
10import subprocess
11import sys
12
13EXACT_MATCHING = 'exact'
14FUZZY_MATCHING = 'fuzzy'
15
16_PNG_OPTIMIZER = 'optipng'
17
18_COMMON_SUFFIX_ORDER = ('_{os}', '')
19_AGG_SUFFIX_ORDER = ('_agg_{os}', '_agg') + _COMMON_SUFFIX_ORDER
20_SKIA_SUFFIX_ORDER = ('_skia_{os}', '_skia') + _COMMON_SUFFIX_ORDER
21
22
23@dataclass
24class ImageDiff:
25  """Details about an image diff.
26
27  Attributes:
28    actual_path: Path to the actual image file.
29    expected_path: Path to the expected image file, or `None` if no matches.
30    diff_path: Path to the diff image file, or `None` if no diff.
31    reason: Optional reason for the diff.
32  """
33  actual_path: str
34  expected_path: str = None
35  diff_path: str = None
36  reason: str = None
37
38class PNGDiffer():
39
40  def __init__(self, finder, features, reverse_byte_order):
41    self.pdfium_diff_path = finder.ExecutablePath('pdfium_diff')
42    self.os_name = finder.os_name
43    self.reverse_byte_order = reverse_byte_order
44    if 'SKIA' in features:
45      self.suffix_order = _SKIA_SUFFIX_ORDER
46    else:
47      self.suffix_order = _AGG_SUFFIX_ORDER
48
49  def CheckMissingTools(self, regenerate_expected):
50    if regenerate_expected and not shutil.which(_PNG_OPTIMIZER):
51      return f'Please install "{_PNG_OPTIMIZER}" to regenerate expected images.'
52    return None
53
54  def GetActualFiles(self, input_filename, source_dir, working_dir):
55    actual_paths = []
56    path_templates = _PathTemplates(input_filename, source_dir, working_dir,
57                                    self.os_name, self.suffix_order)
58
59    for page in itertools.count():
60      actual_path = path_templates.GetActualPath(page)
61      if path_templates.GetExpectedPath(page, default_to_base=False):
62        actual_paths.append(actual_path)
63      else:
64        break
65    return actual_paths
66
67  def _RunCommand(self, cmd):
68    try:
69      subprocess.run(cmd, capture_output=True, check=True)
70      return None
71    except subprocess.CalledProcessError as e:
72      return e
73
74  def _RunImageCompareCommand(self, image_diff, image_matching_algorithm):
75    cmd = [self.pdfium_diff_path]
76    if self.reverse_byte_order:
77      cmd.append('--reverse-byte-order')
78    if image_matching_algorithm == FUZZY_MATCHING:
79      cmd.append('--fuzzy')
80    cmd.extend([image_diff.actual_path, image_diff.expected_path])
81    return self._RunCommand(cmd)
82
83  def _RunImageDiffCommand(self, image_diff):
84    # TODO(crbug.com/pdfium/1925): Diff mode ignores --reverse-byte-order.
85    return self._RunCommand([
86        self.pdfium_diff_path, '--subtract', image_diff.actual_path,
87        image_diff.expected_path, image_diff.diff_path
88    ])
89
90  def ComputeDifferences(self, input_filename, source_dir, working_dir,
91                         image_matching_algorithm):
92    """Computes differences between actual and expected image files.
93
94    Returns:
95      A list of `ImageDiff` instances, one per differing page.
96    """
97    image_diffs = []
98
99    path_templates = _PathTemplates(input_filename, source_dir, working_dir,
100                                    self.os_name, self.suffix_order)
101    for page in itertools.count():
102      page_diff = ImageDiff(actual_path=path_templates.GetActualPath(page))
103      if not os.path.exists(page_diff.actual_path):
104        # No more actual pages.
105        break
106
107      expected_path = path_templates.GetExpectedPath(page)
108      if os.path.exists(expected_path):
109        page_diff.expected_path = expected_path
110
111        compare_error = self._RunImageCompareCommand(page_diff,
112                                                     image_matching_algorithm)
113        if compare_error:
114          page_diff.reason = str(compare_error)
115
116          # TODO(crbug.com/pdfium/1925): Compare and diff simultaneously.
117          page_diff.diff_path = path_templates.GetDiffPath(page)
118          if not self._RunImageDiffCommand(page_diff):
119            print(f'WARNING: No diff for {page_diff.actual_path}')
120            page_diff.diff_path = None
121        else:
122          # Validate that no other paths match.
123          for unexpected_path in path_templates.GetExpectedPaths(page)[1:]:
124            page_diff.expected_path = unexpected_path
125            if not self._RunImageCompareCommand(page_diff,
126                                                image_matching_algorithm):
127              page_diff.reason = f'Also matches {unexpected_path}'
128              break
129          page_diff.expected_path = expected_path
130      else:
131        if page == 0:
132          print(f'WARNING: no expected results files for {input_filename}')
133        page_diff.reason = f'{expected_path} does not exist'
134
135      if page_diff.reason:
136        image_diffs.append(page_diff)
137
138    return image_diffs
139
140  def Regenerate(self, input_filename, source_dir, working_dir,
141                 image_matching_algorithm):
142    path_templates = _PathTemplates(input_filename, source_dir, working_dir,
143                                    self.os_name, self.suffix_order)
144    for page in itertools.count():
145      expected_paths = path_templates.GetExpectedPaths(page)
146
147      first_match = None
148      last_match = None
149      page_diff = ImageDiff(actual_path=path_templates.GetActualPath(page))
150      if os.path.exists(page_diff.actual_path):
151        # Match against all expected page images.
152        for index, expected_path in enumerate(expected_paths):
153          page_diff.expected_path = expected_path
154          if not self._RunImageCompareCommand(page_diff,
155                                              image_matching_algorithm):
156            if first_match is None:
157              first_match = index
158            last_match = index
159
160        if last_match == 0:
161          # Regeneration not needed. This case may be reached if only some, but
162          # not all, pages need to be regenerated.
163          continue
164      elif expected_paths:
165        # Remove all expected page images.
166        print(f'WARNING: {input_filename} has extra expected page {page}')
167        first_match = 0
168        last_match = len(expected_paths)
169      else:
170        # No more expected or actual pages.
171        break
172
173      # Try to reuse expectations by removing intervening non-matches.
174      #
175      # TODO(crbug.com/pdfium/1988): This can make mistakes due to a lack of
176      # global knowledge about other test configurations, which is why it just
177      # creates backup files rather than immediately removing files.
178      if last_match is not None:
179        if first_match > 1:
180          print(f'WARNING: {input_filename}.{page} has non-adjacent match')
181        if first_match != last_match:
182          print(f'WARNING: {input_filename}.{page} has redundant matches')
183
184        for expected_path in expected_paths[:last_match]:
185          os.rename(expected_path, expected_path + '.bak')
186        continue
187
188      # Regenerate the most specific expected path that exists. If there are no
189      # existing expectations, regenerate the base case.
190      expected_path = path_templates.GetExpectedPath(page)
191      shutil.copyfile(page_diff.actual_path, expected_path)
192      self._RunCommand([_PNG_OPTIMIZER, expected_path])
193
194
195_ACTUAL_TEMPLATE = '.pdf.%d.png'
196_DIFF_TEMPLATE = '.pdf.%d.diff.png'
197
198
199class _PathTemplates:
200
201  def __init__(self, input_filename, source_dir, working_dir, os_name,
202               suffix_order):
203    input_root, _ = os.path.splitext(input_filename)
204    self.actual_path_template = os.path.join(working_dir,
205                                             input_root + _ACTUAL_TEMPLATE)
206    self.diff_path_template = os.path.join(working_dir,
207                                           input_root + _DIFF_TEMPLATE)
208
209    # Pre-create the available templates from most to least specific. We
210    # generally expect the most specific case to match first.
211    self.expected_templates = []
212    for suffix in suffix_order:
213      formatted_suffix = suffix.format(os=os_name)
214      self.expected_templates.append(
215          os.path.join(
216              source_dir,
217              f'{input_root}_expected{formatted_suffix}{_ACTUAL_TEMPLATE}'))
218    assert self.expected_templates
219
220  def GetActualPath(self, page):
221    return self.actual_path_template % page
222
223  def GetDiffPath(self, page):
224    return self.diff_path_template % page
225
226  def _GetPossibleExpectedPaths(self, page):
227    return [template % page for template in self.expected_templates]
228
229  def GetExpectedPaths(self, page):
230    return list(filter(os.path.exists, self._GetPossibleExpectedPaths(page)))
231
232  def GetExpectedPath(self, page, default_to_base=True):
233    """Returns the most specific expected path that exists."""
234    last_not_found_expected_path = None
235    for expected_path in self._GetPossibleExpectedPaths(page):
236      if os.path.exists(expected_path):
237        return expected_path
238      last_not_found_expected_path = expected_path
239    return last_not_found_expected_path if default_to_base else None
240