• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2024 The Pigweed Authors
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may not
4# use this file except in compliance with the License. You may obtain a copy of
5# the License at
6#
7#     https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations under
13# the License.
14"""Formatting library core."""
15
16import abc
17from dataclasses import dataclass
18import difflib
19import logging
20from pathlib import Path
21from typing import Callable, Iterable, Iterator
22
23from pw_cli.file_filter import FileFilter
24from pw_cli.tool_runner import ToolRunner, BasicSubprocessRunner
25
26
27_LOG: logging.Logger = logging.getLogger(__name__)
28
29
30def _ensure_newline(orig: str) -> str:
31    """Adds a warning and newline to any file without a trailing newline."""
32
33    if orig.endswith('\n'):
34        return orig
35    return orig + '\nNo newline at end of file\n'
36
37
38def simple_diff(path: Path, original: str, formatted: str) -> str:
39    """Produces a diff of the contents of two files."""
40
41    original = _ensure_newline(original)
42    formatted = _ensure_newline(formatted)
43    return ''.join(
44        difflib.unified_diff(
45            original.splitlines(keepends=True),
46            formatted.splitlines(keepends=True),
47            f'{path}  (original)',
48            f'{path}  (reformatted)',
49        )
50    )
51
52
53DiffCallback = Callable[[Path, str, str], str]
54"""The callback type for producing diffs.
55
56Arugments:
57    path: File path of the file being diffed.
58    original: The contents of the original file, as a string.
59    formatted: The contents of the formatted file, as a string.
60
61Returns:
62    A human readable diff as a string.
63"""
64
65
66@dataclass(frozen=True)
67class FormattedFileContents:
68    """The result of running a code formatter on the contents of a file.
69
70    This type is returned by in-memory formatting check operations.
71
72    Attributes:
73        ok: A boolean indicating whether or not formatting was successful.
74        formatted_file_contents: The contents of the resulting formatted file
75            as bytes.
76        error_message: A string containing any errors or warnings produced by
77            the formatting process.
78    """
79
80    ok: bool
81    formatted_file_contents: bytes
82    error_message: str | None
83
84
85@dataclass(frozen=True)
86class FormattedDiff:
87    """The resulting diff of applying a code formatter to a file.
88
89    Attributes:
90        ok: A boolean indicating whether or not formatting was successful.
91        diff: The resulting diff of applying code formatting, as a
92            human-readable string.
93        error_message: A string containing any errors or warnings produced by
94            the formatting process.
95        file_path: The path of the corresponding file that produced this diff.
96    """
97
98    ok: bool
99    diff: str
100    error_message: str | None
101    file_path: Path
102
103
104@dataclass(frozen=True)
105class FormatFixStatus:
106    """The status of running a code formatter in-place on a file.
107
108    This type is returned by in-place formatting fix operations.
109
110    Attributes:
111        ok: A boolean indicating whether or not formatting was successful.
112        error_message: A string containing any errors or warnings produced by
113            the formatting process.
114    """
115
116    ok: bool
117    error_message: str | None
118
119
120class FileChecker(abc.ABC):
121    """Abstract class for a code format check tool.
122
123    This class does not have the ability to apply formatting to files, and
124    instead only allows in-memory checks to produce expected resulting diffs.
125
126    Attributes:
127        file_patterns: A :py:class:`pw_cli.file_filter.FileFilter` that
128            describes what kind of files this check applies to.
129        mnemonic: A human-readable description of the kind of checker this is
130            (e.g. "C and C++", "Bazel", "Python (black)").
131        tool_runner: The :py:class:`pw_presubmit.format.core.ToolRunner` to use
132            when calling out to subprocesses.
133        diff_tool: The :py:attr:`pw_presubmit.format.core.DiffCallback` to use
134            when producing formatting diffs.
135    """
136
137    def __init__(
138        self,
139        *,
140        file_patterns: FileFilter,
141        mnemonic: str,
142        tool_runner: ToolRunner = BasicSubprocessRunner(),
143        diff_tool: DiffCallback = simple_diff,
144    ):
145        self.file_patterns = file_patterns
146        self.mnemonic = mnemonic
147        # Always call `self.run_tool` rather than `subprocess.run`, as it allows
148        # injection of tools and other environment-specific handlers.
149        self.run_tool = tool_runner
150        self.diff_tool = diff_tool
151
152    @abc.abstractmethod
153    def format_file_in_memory(
154        self, file_path: Path, file_contents: bytes
155    ) -> FormattedFileContents:
156        """Returns the formatted version of a file as in-memory bytes.
157
158        ``file_path`` and ``file_content`` represent the same file. Both are
159        provided for convenience. Use ``file_path`` if you can do so without
160        modifying the file, or use ``file_contents`` if the formatting tool
161        provides a mechanism for formatting the file by piping it to stdin.
162
163        Any subprocess calls should be initiated with ``self.run_tool()`` to
164        enable testing and injection of tools and tool wrappers.
165
166        **WARNING**: A :py:class:`pw_presubmit.format.core.FileChecker` must
167        **never** modify the file at``file_path``.
168
169        Returns:
170            A populated
171            :py:class:`pw_presubmit.format.core.FormattedFileContents` that
172            contains either the result of formatting the file, or an error
173            message.
174        """
175
176    def get_formatting_diff(
177        self, file_path: Path, dry_run: bool = False
178    ) -> FormattedDiff | None:
179        """Returns a diff comparing a file to its formatted version.
180
181        If ``dry_run`` is ``True``, the diff will always be ``None``.
182
183        Returns:
184            ``None`` if there is no difference after formatting **OR** if
185            ``dry_run`` is enabled. Otherwise, a
186            :py:class:`pw_presubmit.format.core.FormattedDiff` is returned
187            containing either a diff or an error.
188        """
189        original = file_path.read_bytes()
190
191        formatted = self.format_file_in_memory(file_path, original)
192
193        if not formatted.ok:
194            return FormattedDiff(
195                diff='',  # Don't try to diff.
196                ok=False,
197                file_path=file_path,
198                error_message=formatted.error_message,
199            )
200
201        if dry_run:
202            return None
203
204        # No difference found.
205        if formatted.formatted_file_contents == original:
206            return None
207
208        return FormattedDiff(
209            diff=self.diff_tool(
210                file_path,
211                original.decode(errors='replace'),
212                formatted.formatted_file_contents.decode(errors='replace'),
213            ),
214            file_path=file_path,
215            error_message=formatted.error_message,
216            ok=True,
217        )
218
219    def get_formatting_diffs(
220        self, paths: Iterable[Path], dry_run: bool = False
221    ) -> Iterator[FormattedDiff]:
222        """Checks the formatting of many files without modifying them.
223
224        This method may be overridden to optimize for formatters that allow
225        checking multiple files in a single invocation, though you may need
226        to do additional parsing to produce diffs or error messages associated
227        with each file path.
228
229        Returns:
230            An iterator of :py:class:`pw_presubmit.format.core.FormattingDiff`
231            objects for each file with identified formatting issues.
232        """
233
234        for path in paths:
235            diff = self.get_formatting_diff(path, dry_run)
236            if diff is not None:
237                yield diff
238
239
240class FileFormatter(FileChecker):
241    """Abstract class for a code format fix tool."""
242
243    def __init__(self, **kwargs):
244        super().__init__(**kwargs)
245
246    @abc.abstractmethod
247    def format_file(self, file_path: Path) -> FormatFixStatus:
248        """Formats the provided file in-place.
249
250        Any subprocess calls should be initiated with ``self.run_tool()`` to
251        enable testing and injection of tools and tool wrappers.
252
253        Returns:
254            A FormatFixStatus that contains relevant errors/warnings.
255        """
256
257    def format_files(
258        self, paths: Iterable[Path], keep_warnings: bool = True
259    ) -> Iterator[tuple[Path, FormatFixStatus]]:
260        """Formats the provided files and fixes them in-place.
261
262        All files must be updated to contain the formatted version. If errors
263        are encountered along the way, they should be collected and returned as
264        a dictionary that maps the path of the file to a string that
265        describes the errors encountered while processing that file.
266
267        Any subprocess calls should be initiated with ``self.run_tool()`` to
268        enable testing and injection of tools and tool wrappers.
269
270        This method may be overridden to optimize for formatters that allow
271        formatting multiple files in a single invocation, though you may need
272        to do additional parsing to associate error messages with the paths of
273        the files that produced them.
274
275        Returns:
276            An iterator of ``Path`` and
277            :py:class:`pw_presubmit.format.core.FormatFixStatus` pairs for each
278            file that was not successfully formatted. If ``keep_warnings`` is
279            ``True``, any successful format operations with warnings will also
280            be returned.
281        """
282
283        for file_path in paths:
284            status = self.format_file(file_path)
285            if not status.ok or (status.error_message and keep_warnings):
286                yield (file_path, status)
287