1#!/usr/bin/env python3 2# Copyright 2015 The PDFium Authors 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6from dataclasses import dataclass 7import itertools 8import os 9import shutil 10import subprocess 11import sys 12 13EXACT_MATCHING = 'exact' 14FUZZY_MATCHING = 'fuzzy' 15 16_PNG_OPTIMIZER = 'optipng' 17 18_COMMON_SUFFIX_ORDER = ('_{os}', '') 19_AGG_SUFFIX_ORDER = ('_agg_{os}', '_agg') + _COMMON_SUFFIX_ORDER 20_SKIA_SUFFIX_ORDER = ('_skia_{os}', '_skia') + _COMMON_SUFFIX_ORDER 21 22 23@dataclass 24class ImageDiff: 25 """Details about an image diff. 26 27 Attributes: 28 actual_path: Path to the actual image file. 29 expected_path: Path to the expected image file, or `None` if no matches. 30 diff_path: Path to the diff image file, or `None` if no diff. 31 reason: Optional reason for the diff. 32 """ 33 actual_path: str 34 expected_path: str = None 35 diff_path: str = None 36 reason: str = None 37 38class PNGDiffer(): 39 40 def __init__(self, finder, features, reverse_byte_order): 41 self.pdfium_diff_path = finder.ExecutablePath('pdfium_diff') 42 self.os_name = finder.os_name 43 self.reverse_byte_order = reverse_byte_order 44 if 'SKIA' in features: 45 self.suffix_order = _SKIA_SUFFIX_ORDER 46 else: 47 self.suffix_order = _AGG_SUFFIX_ORDER 48 49 def CheckMissingTools(self, regenerate_expected): 50 if regenerate_expected and not shutil.which(_PNG_OPTIMIZER): 51 return f'Please install "{_PNG_OPTIMIZER}" to regenerate expected images.' 52 return None 53 54 def GetActualFiles(self, input_filename, source_dir, working_dir): 55 actual_paths = [] 56 path_templates = _PathTemplates(input_filename, source_dir, working_dir, 57 self.os_name, self.suffix_order) 58 59 for page in itertools.count(): 60 actual_path = path_templates.GetActualPath(page) 61 if path_templates.GetExpectedPath(page, default_to_base=False): 62 actual_paths.append(actual_path) 63 else: 64 break 65 return actual_paths 66 67 def _RunCommand(self, cmd): 68 try: 69 subprocess.run(cmd, capture_output=True, check=True) 70 return None 71 except subprocess.CalledProcessError as e: 72 return e 73 74 def _RunImageCompareCommand(self, image_diff, image_matching_algorithm): 75 cmd = [self.pdfium_diff_path] 76 if self.reverse_byte_order: 77 cmd.append('--reverse-byte-order') 78 if image_matching_algorithm == FUZZY_MATCHING: 79 cmd.append('--fuzzy') 80 cmd.extend([image_diff.actual_path, image_diff.expected_path]) 81 return self._RunCommand(cmd) 82 83 def _RunImageDiffCommand(self, image_diff): 84 # TODO(crbug.com/pdfium/1925): Diff mode ignores --reverse-byte-order. 85 return self._RunCommand([ 86 self.pdfium_diff_path, '--subtract', image_diff.actual_path, 87 image_diff.expected_path, image_diff.diff_path 88 ]) 89 90 def ComputeDifferences(self, input_filename, source_dir, working_dir, 91 image_matching_algorithm): 92 """Computes differences between actual and expected image files. 93 94 Returns: 95 A list of `ImageDiff` instances, one per differing page. 96 """ 97 image_diffs = [] 98 99 path_templates = _PathTemplates(input_filename, source_dir, working_dir, 100 self.os_name, self.suffix_order) 101 for page in itertools.count(): 102 page_diff = ImageDiff(actual_path=path_templates.GetActualPath(page)) 103 if not os.path.exists(page_diff.actual_path): 104 # No more actual pages. 105 break 106 107 expected_path = path_templates.GetExpectedPath(page) 108 if os.path.exists(expected_path): 109 page_diff.expected_path = expected_path 110 111 compare_error = self._RunImageCompareCommand(page_diff, 112 image_matching_algorithm) 113 if compare_error: 114 page_diff.reason = str(compare_error) 115 116 # TODO(crbug.com/pdfium/1925): Compare and diff simultaneously. 117 page_diff.diff_path = path_templates.GetDiffPath(page) 118 if not self._RunImageDiffCommand(page_diff): 119 print(f'WARNING: No diff for {page_diff.actual_path}') 120 page_diff.diff_path = None 121 else: 122 # Validate that no other paths match. 123 for unexpected_path in path_templates.GetExpectedPaths(page)[1:]: 124 page_diff.expected_path = unexpected_path 125 if not self._RunImageCompareCommand(page_diff, 126 image_matching_algorithm): 127 page_diff.reason = f'Also matches {unexpected_path}' 128 break 129 page_diff.expected_path = expected_path 130 else: 131 if page == 0: 132 print(f'WARNING: no expected results files for {input_filename}') 133 page_diff.reason = f'{expected_path} does not exist' 134 135 if page_diff.reason: 136 image_diffs.append(page_diff) 137 138 return image_diffs 139 140 def Regenerate(self, input_filename, source_dir, working_dir, 141 image_matching_algorithm): 142 path_templates = _PathTemplates(input_filename, source_dir, working_dir, 143 self.os_name, self.suffix_order) 144 for page in itertools.count(): 145 expected_paths = path_templates.GetExpectedPaths(page) 146 147 first_match = None 148 last_match = None 149 page_diff = ImageDiff(actual_path=path_templates.GetActualPath(page)) 150 if os.path.exists(page_diff.actual_path): 151 # Match against all expected page images. 152 for index, expected_path in enumerate(expected_paths): 153 page_diff.expected_path = expected_path 154 if not self._RunImageCompareCommand(page_diff, 155 image_matching_algorithm): 156 if first_match is None: 157 first_match = index 158 last_match = index 159 160 if last_match == 0: 161 # Regeneration not needed. This case may be reached if only some, but 162 # not all, pages need to be regenerated. 163 continue 164 elif expected_paths: 165 # Remove all expected page images. 166 print(f'WARNING: {input_filename} has extra expected page {page}') 167 first_match = 0 168 last_match = len(expected_paths) 169 else: 170 # No more expected or actual pages. 171 break 172 173 # Try to reuse expectations by removing intervening non-matches. 174 # 175 # TODO(crbug.com/pdfium/1988): This can make mistakes due to a lack of 176 # global knowledge about other test configurations, which is why it just 177 # creates backup files rather than immediately removing files. 178 if last_match is not None: 179 if first_match > 1: 180 print(f'WARNING: {input_filename}.{page} has non-adjacent match') 181 if first_match != last_match: 182 print(f'WARNING: {input_filename}.{page} has redundant matches') 183 184 for expected_path in expected_paths[:last_match]: 185 os.rename(expected_path, expected_path + '.bak') 186 continue 187 188 # Regenerate the most specific expected path that exists. If there are no 189 # existing expectations, regenerate the base case. 190 expected_path = path_templates.GetExpectedPath(page) 191 shutil.copyfile(page_diff.actual_path, expected_path) 192 self._RunCommand([_PNG_OPTIMIZER, expected_path]) 193 194 195_ACTUAL_TEMPLATE = '.pdf.%d.png' 196_DIFF_TEMPLATE = '.pdf.%d.diff.png' 197 198 199class _PathTemplates: 200 201 def __init__(self, input_filename, source_dir, working_dir, os_name, 202 suffix_order): 203 input_root, _ = os.path.splitext(input_filename) 204 self.actual_path_template = os.path.join(working_dir, 205 input_root + _ACTUAL_TEMPLATE) 206 self.diff_path_template = os.path.join(working_dir, 207 input_root + _DIFF_TEMPLATE) 208 209 # Pre-create the available templates from most to least specific. We 210 # generally expect the most specific case to match first. 211 self.expected_templates = [] 212 for suffix in suffix_order: 213 formatted_suffix = suffix.format(os=os_name) 214 self.expected_templates.append( 215 os.path.join( 216 source_dir, 217 f'{input_root}_expected{formatted_suffix}{_ACTUAL_TEMPLATE}')) 218 assert self.expected_templates 219 220 def GetActualPath(self, page): 221 return self.actual_path_template % page 222 223 def GetDiffPath(self, page): 224 return self.diff_path_template % page 225 226 def _GetPossibleExpectedPaths(self, page): 227 return [template % page for template in self.expected_templates] 228 229 def GetExpectedPaths(self, page): 230 return list(filter(os.path.exists, self._GetPossibleExpectedPaths(page))) 231 232 def GetExpectedPath(self, page, default_to_base=True): 233 """Returns the most specific expected path that exists.""" 234 last_not_found_expected_path = None 235 for expected_path in self._GetPossibleExpectedPaths(page): 236 if os.path.exists(expected_path): 237 return expected_path 238 last_not_found_expected_path = expected_path 239 return last_not_found_expected_path if default_to_base else None 240