1# Copyright 2024 The Pigweed Authors 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); you may not 4# use this file except in compliance with the License. You may obtain a copy of 5# the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12# License for the specific language governing permissions and limitations under 13# the License. 14"""Formatting library core.""" 15 16import abc 17from dataclasses import dataclass 18import difflib 19import logging 20from pathlib import Path 21from typing import Callable, Iterable, Iterator 22 23from pw_cli.file_filter import FileFilter 24from pw_cli.tool_runner import ToolRunner, BasicSubprocessRunner 25 26 27_LOG: logging.Logger = logging.getLogger(__name__) 28 29 30def _ensure_newline(orig: str) -> str: 31 """Adds a warning and newline to any file without a trailing newline.""" 32 33 if orig.endswith('\n'): 34 return orig 35 return orig + '\nNo newline at end of file\n' 36 37 38def simple_diff(path: Path, original: str, formatted: str) -> str: 39 """Produces a diff of the contents of two files.""" 40 41 original = _ensure_newline(original) 42 formatted = _ensure_newline(formatted) 43 return ''.join( 44 difflib.unified_diff( 45 original.splitlines(keepends=True), 46 formatted.splitlines(keepends=True), 47 f'{path} (original)', 48 f'{path} (reformatted)', 49 ) 50 ) 51 52 53DiffCallback = Callable[[Path, str, str], str] 54"""The callback type for producing diffs. 55 56Arugments: 57 path: File path of the file being diffed. 58 original: The contents of the original file, as a string. 59 formatted: The contents of the formatted file, as a string. 60 61Returns: 62 A human readable diff as a string. 63""" 64 65 66@dataclass(frozen=True) 67class FormattedFileContents: 68 """The result of running a code formatter on the contents of a file. 69 70 This type is returned by in-memory formatting check operations. 71 72 Attributes: 73 ok: A boolean indicating whether or not formatting was successful. 74 formatted_file_contents: The contents of the resulting formatted file 75 as bytes. 76 error_message: A string containing any errors or warnings produced by 77 the formatting process. 78 """ 79 80 ok: bool 81 formatted_file_contents: bytes 82 error_message: str | None 83 84 85@dataclass(frozen=True) 86class FormattedDiff: 87 """The resulting diff of applying a code formatter to a file. 88 89 Attributes: 90 ok: A boolean indicating whether or not formatting was successful. 91 diff: The resulting diff of applying code formatting, as a 92 human-readable string. 93 error_message: A string containing any errors or warnings produced by 94 the formatting process. 95 file_path: The path of the corresponding file that produced this diff. 96 """ 97 98 ok: bool 99 diff: str 100 error_message: str | None 101 file_path: Path 102 103 104@dataclass(frozen=True) 105class FormatFixStatus: 106 """The status of running a code formatter in-place on a file. 107 108 This type is returned by in-place formatting fix operations. 109 110 Attributes: 111 ok: A boolean indicating whether or not formatting was successful. 112 error_message: A string containing any errors or warnings produced by 113 the formatting process. 114 """ 115 116 ok: bool 117 error_message: str | None 118 119 120class FileChecker(abc.ABC): 121 """Abstract class for a code format check tool. 122 123 This class does not have the ability to apply formatting to files, and 124 instead only allows in-memory checks to produce expected resulting diffs. 125 126 Attributes: 127 file_patterns: A :py:class:`pw_cli.file_filter.FileFilter` that 128 describes what kind of files this check applies to. 129 mnemonic: A human-readable description of the kind of checker this is 130 (e.g. "C and C++", "Bazel", "Python (black)"). 131 tool_runner: The :py:class:`pw_presubmit.format.core.ToolRunner` to use 132 when calling out to subprocesses. 133 diff_tool: The :py:attr:`pw_presubmit.format.core.DiffCallback` to use 134 when producing formatting diffs. 135 """ 136 137 def __init__( 138 self, 139 *, 140 file_patterns: FileFilter, 141 mnemonic: str, 142 tool_runner: ToolRunner = BasicSubprocessRunner(), 143 diff_tool: DiffCallback = simple_diff, 144 ): 145 self.file_patterns = file_patterns 146 self.mnemonic = mnemonic 147 # Always call `self.run_tool` rather than `subprocess.run`, as it allows 148 # injection of tools and other environment-specific handlers. 149 self.run_tool = tool_runner 150 self.diff_tool = diff_tool 151 152 @abc.abstractmethod 153 def format_file_in_memory( 154 self, file_path: Path, file_contents: bytes 155 ) -> FormattedFileContents: 156 """Returns the formatted version of a file as in-memory bytes. 157 158 ``file_path`` and ``file_content`` represent the same file. Both are 159 provided for convenience. Use ``file_path`` if you can do so without 160 modifying the file, or use ``file_contents`` if the formatting tool 161 provides a mechanism for formatting the file by piping it to stdin. 162 163 Any subprocess calls should be initiated with ``self.run_tool()`` to 164 enable testing and injection of tools and tool wrappers. 165 166 **WARNING**: A :py:class:`pw_presubmit.format.core.FileChecker` must 167 **never** modify the file at``file_path``. 168 169 Returns: 170 A populated 171 :py:class:`pw_presubmit.format.core.FormattedFileContents` that 172 contains either the result of formatting the file, or an error 173 message. 174 """ 175 176 def get_formatting_diff( 177 self, file_path: Path, dry_run: bool = False 178 ) -> FormattedDiff | None: 179 """Returns a diff comparing a file to its formatted version. 180 181 If ``dry_run`` is ``True``, the diff will always be ``None``. 182 183 Returns: 184 ``None`` if there is no difference after formatting **OR** if 185 ``dry_run`` is enabled. Otherwise, a 186 :py:class:`pw_presubmit.format.core.FormattedDiff` is returned 187 containing either a diff or an error. 188 """ 189 original = file_path.read_bytes() 190 191 formatted = self.format_file_in_memory(file_path, original) 192 193 if not formatted.ok: 194 return FormattedDiff( 195 diff='', # Don't try to diff. 196 ok=False, 197 file_path=file_path, 198 error_message=formatted.error_message, 199 ) 200 201 if dry_run: 202 return None 203 204 # No difference found. 205 if formatted.formatted_file_contents == original: 206 return None 207 208 return FormattedDiff( 209 diff=self.diff_tool( 210 file_path, 211 original.decode(errors='replace'), 212 formatted.formatted_file_contents.decode(errors='replace'), 213 ), 214 file_path=file_path, 215 error_message=formatted.error_message, 216 ok=True, 217 ) 218 219 def get_formatting_diffs( 220 self, paths: Iterable[Path], dry_run: bool = False 221 ) -> Iterator[FormattedDiff]: 222 """Checks the formatting of many files without modifying them. 223 224 This method may be overridden to optimize for formatters that allow 225 checking multiple files in a single invocation, though you may need 226 to do additional parsing to produce diffs or error messages associated 227 with each file path. 228 229 Returns: 230 An iterator of :py:class:`pw_presubmit.format.core.FormattingDiff` 231 objects for each file with identified formatting issues. 232 """ 233 234 for path in paths: 235 diff = self.get_formatting_diff(path, dry_run) 236 if diff is not None: 237 yield diff 238 239 240class FileFormatter(FileChecker): 241 """Abstract class for a code format fix tool.""" 242 243 def __init__(self, **kwargs): 244 super().__init__(**kwargs) 245 246 @abc.abstractmethod 247 def format_file(self, file_path: Path) -> FormatFixStatus: 248 """Formats the provided file in-place. 249 250 Any subprocess calls should be initiated with ``self.run_tool()`` to 251 enable testing and injection of tools and tool wrappers. 252 253 Returns: 254 A FormatFixStatus that contains relevant errors/warnings. 255 """ 256 257 def format_files( 258 self, paths: Iterable[Path], keep_warnings: bool = True 259 ) -> Iterator[tuple[Path, FormatFixStatus]]: 260 """Formats the provided files and fixes them in-place. 261 262 All files must be updated to contain the formatted version. If errors 263 are encountered along the way, they should be collected and returned as 264 a dictionary that maps the path of the file to a string that 265 describes the errors encountered while processing that file. 266 267 Any subprocess calls should be initiated with ``self.run_tool()`` to 268 enable testing and injection of tools and tool wrappers. 269 270 This method may be overridden to optimize for formatters that allow 271 formatting multiple files in a single invocation, though you may need 272 to do additional parsing to associate error messages with the paths of 273 the files that produced them. 274 275 Returns: 276 An iterator of ``Path`` and 277 :py:class:`pw_presubmit.format.core.FormatFixStatus` pairs for each 278 file that was not successfully formatted. If ``keep_warnings`` is 279 ``True``, any successful format operations with warnings will also 280 be returned. 281 """ 282 283 for file_path in paths: 284 status = self.format_file(file_path) 285 if not status.ok or (status.error_message and keep_warnings): 286 yield (file_path, status) 287