1#!/usr/bin/env python3 2# Copyright 2015 The PDFium Authors 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6from dataclasses import dataclass 7import itertools 8import os 9import shutil 10import subprocess 11import sys 12 13EXACT_MATCHING = 'exact' 14FUZZY_MATCHING = 'fuzzy' 15 16_PNG_OPTIMIZER = 'optipng' 17 18# Each suffix order acts like a path along a tree, with the leaves being the 19# most specific, and the root being the least specific. 20_COMMON_SUFFIX_ORDER = ('_{os}', '') 21_AGG_SUFFIX_ORDER = ('_agg_{os}', '_agg') + _COMMON_SUFFIX_ORDER 22_GDI_AGG_SUFFIX_ORDER = ('_gdi_agg_{os}', '_gdi_agg', '_gdi_{os}', 23 '_gdi') + _COMMON_SUFFIX_ORDER 24_GDI_SKIA_SUFFIX_ORDER = ('_gdi_skia_{os}', '_gdi_skia', '_gdi_{os}', 25 '_gdi') + _COMMON_SUFFIX_ORDER 26_SKIA_SUFFIX_ORDER = ('_skia_{os}', '_skia') + _COMMON_SUFFIX_ORDER 27 28 29@dataclass 30class ImageDiff: 31 """Details about an image diff. 32 33 Attributes: 34 actual_path: Path to the actual image file. 35 expected_path: Path to the expected image file, or `None` if no matches. 36 diff_path: Path to the diff image file, or `None` if no diff. 37 reason: Optional reason for the diff. 38 """ 39 actual_path: str 40 expected_path: str = None 41 diff_path: str = None 42 reason: str = None 43 44class PNGDiffer(): 45 46 def __init__(self, finder, reverse_byte_order, rendering_option, 47 default_renderer): 48 self.pdfium_diff_path = finder.ExecutablePath('pdfium_diff') 49 self.os_name = finder.os_name 50 self.reverse_byte_order = reverse_byte_order 51 52 self.suffix_order = None 53 if rendering_option == 'gdi': 54 if default_renderer == 'agg': 55 self.suffix_order = _GDI_AGG_SUFFIX_ORDER 56 elif default_renderer == 'skia': 57 self.suffix_order = _GDI_SKIA_SUFFIX_ORDER 58 elif rendering_option == 'agg': 59 self.suffix_order = _AGG_SUFFIX_ORDER 60 elif rendering_option == 'skia': 61 self.suffix_order = _SKIA_SUFFIX_ORDER 62 63 if not self.suffix_order: 64 raise ValueError(f'rendering_option={rendering_option}') 65 66 def CheckMissingTools(self, regenerate_expected): 67 if regenerate_expected and not shutil.which(_PNG_OPTIMIZER): 68 return f'Please install "{_PNG_OPTIMIZER}" to regenerate expected images.' 69 return None 70 71 def GetActualFiles(self, input_filename, source_dir, working_dir): 72 actual_paths = [] 73 path_templates = _PathTemplates(input_filename, source_dir, working_dir, 74 self.os_name, self.suffix_order) 75 76 for page in itertools.count(): 77 actual_path = path_templates.GetActualPath(page) 78 if path_templates.GetExpectedPath(page, default_to_base=False): 79 actual_paths.append(actual_path) 80 else: 81 break 82 return actual_paths 83 84 def _RunCommand(self, cmd): 85 try: 86 subprocess.run(cmd, capture_output=True, check=True) 87 return None 88 except subprocess.CalledProcessError as e: 89 return e 90 91 def _RunImageCompareCommand(self, image_diff, image_matching_algorithm): 92 cmd = [self.pdfium_diff_path] 93 if self.reverse_byte_order: 94 cmd.append('--reverse-byte-order') 95 if image_matching_algorithm == FUZZY_MATCHING: 96 cmd.append('--fuzzy') 97 cmd.extend([image_diff.actual_path, image_diff.expected_path]) 98 return self._RunCommand(cmd) 99 100 def _RunImageDiffCommand(self, image_diff): 101 # TODO(crbug.com/pdfium/1925): Diff mode ignores --reverse-byte-order. 102 return self._RunCommand([ 103 self.pdfium_diff_path, '--subtract', image_diff.actual_path, 104 image_diff.expected_path, image_diff.diff_path 105 ]) 106 107 def ComputeDifferences(self, input_filename, source_dir, working_dir, 108 image_matching_algorithm): 109 """Computes differences between actual and expected image files. 110 111 Returns: 112 A list of `ImageDiff` instances, one per differing page. 113 """ 114 image_diffs = [] 115 116 path_templates = _PathTemplates(input_filename, source_dir, working_dir, 117 self.os_name, self.suffix_order) 118 for page in itertools.count(): 119 page_diff = ImageDiff(actual_path=path_templates.GetActualPath(page)) 120 if not os.path.exists(page_diff.actual_path): 121 # No more actual pages. 122 break 123 124 expected_path = path_templates.GetExpectedPath(page) 125 if os.path.exists(expected_path): 126 page_diff.expected_path = expected_path 127 128 compare_error = self._RunImageCompareCommand(page_diff, 129 image_matching_algorithm) 130 if compare_error: 131 page_diff.reason = str(compare_error) 132 133 # TODO(crbug.com/pdfium/1925): Compare and diff simultaneously. 134 page_diff.diff_path = path_templates.GetDiffPath(page) 135 if not self._RunImageDiffCommand(page_diff): 136 print(f'WARNING: No diff for {page_diff.actual_path}') 137 page_diff.diff_path = None 138 else: 139 # Validate that no other paths match. 140 for unexpected_path in path_templates.GetExpectedPaths(page)[1:]: 141 page_diff.expected_path = unexpected_path 142 if not self._RunImageCompareCommand(page_diff, 143 image_matching_algorithm): 144 page_diff.reason = f'Also matches {unexpected_path}' 145 break 146 page_diff.expected_path = expected_path 147 else: 148 if page == 0: 149 print(f'WARNING: no expected results files for {input_filename}') 150 page_diff.reason = f'{expected_path} does not exist' 151 152 if page_diff.reason: 153 image_diffs.append(page_diff) 154 155 return image_diffs 156 157 def Regenerate(self, input_filename, source_dir, working_dir, 158 image_matching_algorithm): 159 path_templates = _PathTemplates(input_filename, source_dir, working_dir, 160 self.os_name, self.suffix_order) 161 for page in itertools.count(): 162 expected_paths = path_templates.GetExpectedPaths(page) 163 164 first_match = None 165 last_match = None 166 page_diff = ImageDiff(actual_path=path_templates.GetActualPath(page)) 167 if os.path.exists(page_diff.actual_path): 168 # Match against all expected page images. 169 for index, expected_path in enumerate(expected_paths): 170 page_diff.expected_path = expected_path 171 if not self._RunImageCompareCommand(page_diff, 172 image_matching_algorithm): 173 if first_match is None: 174 first_match = index 175 last_match = index 176 177 if last_match == 0: 178 # Regeneration not needed. This case may be reached if only some, but 179 # not all, pages need to be regenerated. 180 continue 181 elif expected_paths: 182 # Remove all expected page images. 183 print(f'WARNING: {input_filename} has extra expected page {page}') 184 first_match = 0 185 last_match = len(expected_paths) 186 else: 187 # No more expected or actual pages. 188 break 189 190 # Try to reuse expectations by removing intervening non-matches. 191 # 192 # TODO(crbug.com/pdfium/1988): This can make mistakes due to a lack of 193 # global knowledge about other test configurations, which is why it just 194 # creates backup files rather than immediately removing files. 195 if last_match is not None: 196 if first_match > 1: 197 print(f'WARNING: {input_filename}.{page} has non-adjacent match') 198 if first_match != last_match: 199 print(f'WARNING: {input_filename}.{page} has redundant matches') 200 201 for expected_path in expected_paths[:last_match]: 202 os.rename(expected_path, expected_path + '.bak') 203 continue 204 205 # Regenerate the most specific expected path that exists. If there are no 206 # existing expectations, regenerate the base case. 207 expected_path = path_templates.GetExpectedPath(page) 208 shutil.copyfile(page_diff.actual_path, expected_path) 209 self._RunCommand([_PNG_OPTIMIZER, expected_path]) 210 211 212_ACTUAL_TEMPLATE = '.pdf.%d.png' 213_DIFF_TEMPLATE = '.pdf.%d.diff.png' 214 215 216class _PathTemplates: 217 218 def __init__(self, input_filename, source_dir, working_dir, os_name, 219 suffix_order): 220 input_root, _ = os.path.splitext(input_filename) 221 self.actual_path_template = os.path.join(working_dir, 222 input_root + _ACTUAL_TEMPLATE) 223 self.diff_path_template = os.path.join(working_dir, 224 input_root + _DIFF_TEMPLATE) 225 226 # Pre-create the available templates from most to least specific. We 227 # generally expect the most specific case to match first. 228 self.expected_templates = [] 229 for suffix in suffix_order: 230 formatted_suffix = suffix.format(os=os_name) 231 self.expected_templates.append( 232 os.path.join( 233 source_dir, 234 f'{input_root}_expected{formatted_suffix}{_ACTUAL_TEMPLATE}')) 235 assert self.expected_templates 236 237 def GetActualPath(self, page): 238 return self.actual_path_template % page 239 240 def GetDiffPath(self, page): 241 return self.diff_path_template % page 242 243 def _GetPossibleExpectedPaths(self, page): 244 return [template % page for template in self.expected_templates] 245 246 def GetExpectedPaths(self, page): 247 return list(filter(os.path.exists, self._GetPossibleExpectedPaths(page))) 248 249 def GetExpectedPath(self, page, default_to_base=True): 250 """Returns the most specific expected path that exists.""" 251 last_not_found_expected_path = None 252 for expected_path in self._GetPossibleExpectedPaths(page): 253 if os.path.exists(expected_path): 254 return expected_path 255 last_not_found_expected_path = expected_path 256 return last_not_found_expected_path if default_to_base else None 257