1# Copyright 2020 The Pigweed Authors 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); you may not 4# use this file except in compliance with the License. You may obtain a copy of 5# the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12# License for the specific language governing permissions and limitations under 13# the License. 14"""Helpful commands for working with a Git repository.""" 15 16import logging 17from pathlib import Path 18import subprocess 19from typing import Collection, Iterable, Iterator, List, NamedTuple, Optional 20from typing import Pattern, Set, Tuple, Union 21 22from pw_presubmit.tools import log_run, plural 23 24_LOG = logging.getLogger(__name__) 25PathOrStr = Union[Path, str] 26 27 28def git_stdout(*args: PathOrStr, 29 show_stderr=False, 30 repo: PathOrStr = '.') -> str: 31 return log_run(['git', '-C', repo, *args], 32 stdout=subprocess.PIPE, 33 stderr=None if show_stderr else subprocess.DEVNULL, 34 check=True).stdout.decode().strip() 35 36 37def _ls_files(args: Collection[PathOrStr], repo: Path) -> Iterable[Path]: 38 """Returns results of git ls-files as absolute paths.""" 39 git_root = repo.resolve() 40 for file in git_stdout('ls-files', '--', *args, repo=repo).splitlines(): 41 yield git_root / file 42 43 44def _diff_names(commit: str, pathspecs: Collection[PathOrStr], 45 repo: Path) -> Iterable[Path]: 46 """Returns absolute paths of files changed since the specified commit.""" 47 git_root = root(repo) 48 for file in git_stdout('diff', 49 '--name-only', 50 '--diff-filter=d', 51 commit, 52 '--', 53 *pathspecs, 54 repo=repo).splitlines(): 55 yield git_root / file 56 57 58def list_files(commit: Optional[str] = None, 59 pathspecs: Collection[PathOrStr] = (), 60 repo_path: Optional[Path] = None) -> List[Path]: 61 """Lists files with git ls-files or git diff --name-only. 62 63 Args: 64 commit: commit to use as a base for git diff 65 pathspecs: Git pathspecs to use in git ls-files or diff 66 repo_path: repo path from which to run commands; defaults to Path.cwd() 67 68 Returns: 69 A sorted list of absolute paths 70 """ 71 if repo_path is None: 72 repo_path = Path.cwd() 73 74 if commit: 75 return sorted(_diff_names(commit, pathspecs, repo_path)) 76 77 return sorted(_ls_files(pathspecs, repo_path)) 78 79 80def has_uncommitted_changes(repo: Optional[Path] = None) -> bool: 81 """Returns True if the Git repo has uncommitted changes in it. 82 83 This does not check for untracked files. 84 """ 85 if repo is None: 86 repo = Path.cwd() 87 88 # Refresh the Git index so that the diff-index command will be accurate. 89 log_run(['git', '-C', repo, 'update-index', '-q', '--refresh'], check=True) 90 91 # diff-index exits with 1 if there are uncommitted changes. 92 return log_run(['git', '-C', repo, 'diff-index', '--quiet', 'HEAD', 93 '--']).returncode == 1 94 95 96def _describe_constraints(git_root: Path, repo_path: Path, 97 commit: Optional[str], 98 pathspecs: Collection[PathOrStr], 99 exclude: Collection[Pattern[str]]) -> Iterable[str]: 100 if not git_root.samefile(repo_path): 101 yield ( 102 f'under the {repo_path.resolve().relative_to(git_root.resolve())} ' 103 'subdirectory') 104 105 if commit: 106 yield f'that have changed since {commit}' 107 108 if pathspecs: 109 paths_str = ', '.join(str(p) for p in pathspecs) 110 yield f'that match {plural(pathspecs, "pathspec")} ({paths_str})' 111 112 if exclude: 113 yield (f'that do not match {plural(exclude, "pattern")} (' + 114 ', '.join(p.pattern for p in exclude) + ')') 115 116 117def describe_files(git_root: Path, repo_path: Path, commit: Optional[str], 118 pathspecs: Collection[PathOrStr], 119 exclude: Collection[Pattern]) -> str: 120 """Completes 'Doing something to ...' for a set of files in a Git repo.""" 121 constraints = list( 122 _describe_constraints(git_root, repo_path, commit, pathspecs, exclude)) 123 if not constraints: 124 return f'all files in the {git_root.name} repo' 125 126 msg = f'files in the {git_root.name} repo' 127 if len(constraints) == 1: 128 return f'{msg} {constraints[0]}' 129 130 return msg + ''.join(f'\n - {line}' for line in constraints) 131 132 133def root(repo_path: PathOrStr = '.', *, show_stderr: bool = True) -> Path: 134 """Returns the repository root as an absolute path. 135 136 Raises: 137 FileNotFoundError: the path does not exist 138 subprocess.CalledProcessError: the path is not in a Git repo 139 """ 140 repo_path = Path(repo_path) 141 if not repo_path.exists(): 142 raise FileNotFoundError(f'{repo_path} does not exist') 143 144 return Path( 145 git_stdout('rev-parse', 146 '--show-toplevel', 147 repo=repo_path if repo_path.is_dir() else repo_path.parent, 148 show_stderr=show_stderr)) 149 150 151def within_repo(repo_path: PathOrStr = '.') -> Optional[Path]: 152 """Similar to root(repo_path), returns None if the path is not in a repo.""" 153 try: 154 return root(repo_path, show_stderr=False) 155 except subprocess.CalledProcessError: 156 return None 157 158 159def is_repo(repo_path: PathOrStr = '.') -> bool: 160 """True if the path is tracked by a Git repo.""" 161 return within_repo(repo_path) is not None 162 163 164def path(repo_path: PathOrStr, 165 *additional_repo_paths: PathOrStr, 166 repo: PathOrStr = '.') -> Path: 167 """Returns a path relative to a Git repository's root.""" 168 return root(repo).joinpath(repo_path, *additional_repo_paths) 169 170 171class PythonPackage(NamedTuple): 172 root: Path # Path to the file containing the setup.py 173 package: Path # Path to the main package directory 174 packaged_files: Tuple[Path, ...] # All sources in the main package dir 175 other_files: Tuple[Path, ...] # Other Python files under root 176 177 def all_files(self) -> Tuple[Path, ...]: 178 return self.packaged_files + self.other_files 179 180 181def all_python_packages(repo: PathOrStr = '.') -> Iterator[PythonPackage]: 182 """Finds all Python packages in the repo based on setup.py locations.""" 183 root_py_dirs = [ 184 file.parent 185 for file in _ls_files(['setup.py', '*/setup.py'], Path(repo)) 186 ] 187 188 for py_dir in root_py_dirs: 189 all_packaged_files = _ls_files([py_dir / '*' / '*.py'], repo=py_dir) 190 common_dir: Optional[str] = None 191 192 # Make there is only one package directory with Python files in it. 193 for file in all_packaged_files: 194 package_dir = file.relative_to(py_dir).parts[0] 195 196 if common_dir is None: 197 common_dir = package_dir 198 elif common_dir != package_dir: 199 _LOG.warning( 200 'There are multiple Python package directories in %s: %s ' 201 'and %s. This is not supported by pw presubmit. Each ' 202 'setup.py should correspond with a single Python package', 203 py_dir, common_dir, package_dir) 204 break 205 206 if common_dir is not None: 207 packaged_files = tuple(_ls_files(['*/*.py'], repo=py_dir)) 208 other_files = tuple( 209 f for f in _ls_files(['*.py'], repo=py_dir) 210 if f.name != 'setup.py' and f not in packaged_files) 211 212 yield PythonPackage(py_dir, py_dir / common_dir, packaged_files, 213 other_files) 214 215 216def python_packages_containing( 217 python_paths: Iterable[Path], 218 repo: PathOrStr = '.') -> Tuple[List[PythonPackage], List[Path]]: 219 """Finds all Python packages containing the provided Python paths. 220 221 Returns: 222 ([packages], [files_not_in_packages]) 223 """ 224 all_packages = list(all_python_packages(repo)) 225 226 packages: Set[PythonPackage] = set() 227 files_not_in_packages: List[Path] = [] 228 229 for python_path in python_paths: 230 for package in all_packages: 231 if package.root in python_path.parents: 232 packages.add(package) 233 break 234 else: 235 files_not_in_packages.append(python_path) 236 237 return list(packages), files_not_in_packages 238 239 240def commit_message(commit: str = 'HEAD', repo: PathOrStr = '.') -> str: 241 return git_stdout('log', '--format=%B', '-n1', commit, repo=repo) 242