• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# Copyright 2015 The PDFium Authors
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6from dataclasses import dataclass
7import itertools
8import os
9import shutil
10import subprocess
11import sys
12
13EXACT_MATCHING = 'exact'
14FUZZY_MATCHING = 'fuzzy'
15
16_PNG_OPTIMIZER = 'optipng'
17
18# Each suffix order acts like a path along a tree, with the leaves being the
19# most specific, and the root being the least specific.
20_COMMON_SUFFIX_ORDER = ('_{os}', '')
21_AGG_SUFFIX_ORDER = ('_agg_{os}', '_agg') + _COMMON_SUFFIX_ORDER
22_GDI_AGG_SUFFIX_ORDER = ('_gdi_agg_{os}', '_gdi_agg', '_gdi_{os}',
23                         '_gdi') + _COMMON_SUFFIX_ORDER
24_GDI_SKIA_SUFFIX_ORDER = ('_gdi_skia_{os}', '_gdi_skia', '_gdi_{os}',
25                          '_gdi') + _COMMON_SUFFIX_ORDER
26_SKIA_SUFFIX_ORDER = ('_skia_{os}', '_skia') + _COMMON_SUFFIX_ORDER
27
28
29@dataclass
30class ImageDiff:
31  """Details about an image diff.
32
33  Attributes:
34    actual_path: Path to the actual image file.
35    expected_path: Path to the expected image file, or `None` if no matches.
36    diff_path: Path to the diff image file, or `None` if no diff.
37    reason: Optional reason for the diff.
38  """
39  actual_path: str
40  expected_path: str = None
41  diff_path: str = None
42  reason: str = None
43
44class PNGDiffer():
45
46  def __init__(self, finder, reverse_byte_order, rendering_option,
47               default_renderer):
48    self.pdfium_diff_path = finder.ExecutablePath('pdfium_diff')
49    self.os_name = finder.os_name
50    self.reverse_byte_order = reverse_byte_order
51
52    self.suffix_order = None
53    if rendering_option == 'gdi':
54      if default_renderer == 'agg':
55        self.suffix_order = _GDI_AGG_SUFFIX_ORDER
56      elif default_renderer == 'skia':
57        self.suffix_order = _GDI_SKIA_SUFFIX_ORDER
58    elif rendering_option == 'agg':
59      self.suffix_order = _AGG_SUFFIX_ORDER
60    elif rendering_option == 'skia':
61      self.suffix_order = _SKIA_SUFFIX_ORDER
62
63    if not self.suffix_order:
64      raise ValueError(f'rendering_option={rendering_option}')
65
66  def CheckMissingTools(self, regenerate_expected):
67    if regenerate_expected and not shutil.which(_PNG_OPTIMIZER):
68      return f'Please install "{_PNG_OPTIMIZER}" to regenerate expected images.'
69    return None
70
71  def GetActualFiles(self, input_filename, source_dir, working_dir):
72    actual_paths = []
73    path_templates = _PathTemplates(input_filename, source_dir, working_dir,
74                                    self.os_name, self.suffix_order)
75
76    for page in itertools.count():
77      actual_path = path_templates.GetActualPath(page)
78      if path_templates.GetExpectedPath(page, default_to_base=False):
79        actual_paths.append(actual_path)
80      else:
81        break
82    return actual_paths
83
84  def _RunCommand(self, cmd):
85    try:
86      subprocess.run(cmd, capture_output=True, check=True)
87      return None
88    except subprocess.CalledProcessError as e:
89      return e
90
91  def _RunImageCompareCommand(self, image_diff, image_matching_algorithm):
92    cmd = [self.pdfium_diff_path]
93    if self.reverse_byte_order:
94      cmd.append('--reverse-byte-order')
95    if image_matching_algorithm == FUZZY_MATCHING:
96      cmd.append('--fuzzy')
97    cmd.extend([image_diff.actual_path, image_diff.expected_path])
98    return self._RunCommand(cmd)
99
100  def _RunImageDiffCommand(self, image_diff):
101    # TODO(crbug.com/pdfium/1925): Diff mode ignores --reverse-byte-order.
102    return self._RunCommand([
103        self.pdfium_diff_path, '--subtract', image_diff.actual_path,
104        image_diff.expected_path, image_diff.diff_path
105    ])
106
107  def ComputeDifferences(self, input_filename, source_dir, working_dir,
108                         image_matching_algorithm):
109    """Computes differences between actual and expected image files.
110
111    Returns:
112      A list of `ImageDiff` instances, one per differing page.
113    """
114    image_diffs = []
115
116    path_templates = _PathTemplates(input_filename, source_dir, working_dir,
117                                    self.os_name, self.suffix_order)
118    for page in itertools.count():
119      page_diff = ImageDiff(actual_path=path_templates.GetActualPath(page))
120      if not os.path.exists(page_diff.actual_path):
121        # No more actual pages.
122        break
123
124      expected_path = path_templates.GetExpectedPath(page)
125      if os.path.exists(expected_path):
126        page_diff.expected_path = expected_path
127
128        compare_error = self._RunImageCompareCommand(page_diff,
129                                                     image_matching_algorithm)
130        if compare_error:
131          page_diff.reason = str(compare_error)
132
133          # TODO(crbug.com/pdfium/1925): Compare and diff simultaneously.
134          page_diff.diff_path = path_templates.GetDiffPath(page)
135          if not self._RunImageDiffCommand(page_diff):
136            print(f'WARNING: No diff for {page_diff.actual_path}')
137            page_diff.diff_path = None
138        else:
139          # Validate that no other paths match.
140          for unexpected_path in path_templates.GetExpectedPaths(page)[1:]:
141            page_diff.expected_path = unexpected_path
142            if not self._RunImageCompareCommand(page_diff,
143                                                image_matching_algorithm):
144              page_diff.reason = f'Also matches {unexpected_path}'
145              break
146          page_diff.expected_path = expected_path
147      else:
148        if page == 0:
149          print(f'WARNING: no expected results files for {input_filename}')
150        page_diff.reason = f'{expected_path} does not exist'
151
152      if page_diff.reason:
153        image_diffs.append(page_diff)
154
155    return image_diffs
156
157  def Regenerate(self, input_filename, source_dir, working_dir,
158                 image_matching_algorithm):
159    path_templates = _PathTemplates(input_filename, source_dir, working_dir,
160                                    self.os_name, self.suffix_order)
161    for page in itertools.count():
162      expected_paths = path_templates.GetExpectedPaths(page)
163
164      first_match = None
165      last_match = None
166      page_diff = ImageDiff(actual_path=path_templates.GetActualPath(page))
167      if os.path.exists(page_diff.actual_path):
168        # Match against all expected page images.
169        for index, expected_path in enumerate(expected_paths):
170          page_diff.expected_path = expected_path
171          if not self._RunImageCompareCommand(page_diff,
172                                              image_matching_algorithm):
173            if first_match is None:
174              first_match = index
175            last_match = index
176
177        if last_match == 0:
178          # Regeneration not needed. This case may be reached if only some, but
179          # not all, pages need to be regenerated.
180          continue
181      elif expected_paths:
182        # Remove all expected page images.
183        print(f'WARNING: {input_filename} has extra expected page {page}')
184        first_match = 0
185        last_match = len(expected_paths)
186      else:
187        # No more expected or actual pages.
188        break
189
190      # Try to reuse expectations by removing intervening non-matches.
191      #
192      # TODO(crbug.com/pdfium/1988): This can make mistakes due to a lack of
193      # global knowledge about other test configurations, which is why it just
194      # creates backup files rather than immediately removing files.
195      if last_match is not None:
196        if first_match > 1:
197          print(f'WARNING: {input_filename}.{page} has non-adjacent match')
198        if first_match != last_match:
199          print(f'WARNING: {input_filename}.{page} has redundant matches')
200
201        for expected_path in expected_paths[:last_match]:
202          os.rename(expected_path, expected_path + '.bak')
203        continue
204
205      # Regenerate the most specific expected path that exists. If there are no
206      # existing expectations, regenerate the base case.
207      expected_path = path_templates.GetExpectedPath(page)
208      shutil.copyfile(page_diff.actual_path, expected_path)
209      self._RunCommand([_PNG_OPTIMIZER, expected_path])
210
211
212_ACTUAL_TEMPLATE = '.pdf.%d.png'
213_DIFF_TEMPLATE = '.pdf.%d.diff.png'
214
215
216class _PathTemplates:
217
218  def __init__(self, input_filename, source_dir, working_dir, os_name,
219               suffix_order):
220    input_root, _ = os.path.splitext(input_filename)
221    self.actual_path_template = os.path.join(working_dir,
222                                             input_root + _ACTUAL_TEMPLATE)
223    self.diff_path_template = os.path.join(working_dir,
224                                           input_root + _DIFF_TEMPLATE)
225
226    # Pre-create the available templates from most to least specific. We
227    # generally expect the most specific case to match first.
228    self.expected_templates = []
229    for suffix in suffix_order:
230      formatted_suffix = suffix.format(os=os_name)
231      self.expected_templates.append(
232          os.path.join(
233              source_dir,
234              f'{input_root}_expected{formatted_suffix}{_ACTUAL_TEMPLATE}'))
235    assert self.expected_templates
236
237  def GetActualPath(self, page):
238    return self.actual_path_template % page
239
240  def GetDiffPath(self, page):
241    return self.diff_path_template % page
242
243  def _GetPossibleExpectedPaths(self, page):
244    return [template % page for template in self.expected_templates]
245
246  def GetExpectedPaths(self, page):
247    return list(filter(os.path.exists, self._GetPossibleExpectedPaths(page)))
248
249  def GetExpectedPath(self, page, default_to_base=True):
250    """Returns the most specific expected path that exists."""
251    last_not_found_expected_path = None
252    for expected_path in self._GetPossibleExpectedPaths(page):
253      if os.path.exists(expected_path):
254        return expected_path
255      last_not_found_expected_path = expected_path
256    return last_not_found_expected_path if default_to_base else None
257