1# Copyright 2022 The Pigweed Authors 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); you may not 4# use this file except in compliance with the License. You may obtain a copy of 5# the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12# License for the specific language governing permissions and limitations under 13# the License. 14"""Check various rules for .gitmodules files.""" 15 16import dataclasses 17import logging 18from pathlib import Path 19from typing import Callable, Dict, Optional, Sequence 20import urllib.parse 21 22from pw_presubmit import ( 23 git_repo, 24 PresubmitContext, 25 PresubmitFailure, 26 filter_paths, 27) 28 29_LOG: logging.Logger = logging.getLogger(__name__) 30 31 32@dataclasses.dataclass 33class Config: 34 # Allow direct references to non-Google hosts. 35 allow_non_googlesource_hosts: bool = False 36 37 # Allow a specific subset of googlesource.com hosts. If an empty list then 38 # all googlesource hosts are permitted. 39 allowed_googlesource_hosts: Sequence[str] = () 40 41 # Require relative URLs, like those that start with "/" or "../". 42 require_relative_urls: bool = False 43 44 # Allow "sso://" URLs. 45 allow_sso: bool = True 46 47 # Allow use of "git.corp.google.com" URLs. 48 allow_git_corp_google_com: bool = True 49 50 # Require a branch for each submodule. 51 require_branch: bool = False 52 53 # Arbitrary validator. Gets invoked with the submodule name and a dict of 54 # the submodule properties. Should throw exceptions or call ctx.fail to 55 # register errors. 56 validator: Optional[ 57 Callable[[PresubmitContext, Path, str, Dict[str, str]], None] 58 ] = None 59 60 61def _parse_gitmodules(path: Path) -> Dict[str, Dict[str, str]]: 62 raw_submodules: str = git_repo.git_stdout( 63 'config', '--file', path, '--list' 64 ) 65 submodules: Dict[str, Dict[str, str]] = {} 66 for line in raw_submodules.splitlines(): 67 key: str 68 value: str 69 key, value = line.split('=', 1) 70 if not key.startswith('submodule.'): 71 raise PresubmitFailure(f'unexpected key {key!r}', path) 72 key = key.split('.', 1)[1] 73 74 submodule: str 75 param: str 76 submodule, param = key.rsplit('.', 1) 77 78 submodules.setdefault(submodule, {}) 79 submodules[submodule][param] = value 80 81 return submodules 82 83 84_GERRIT_HOST_SUFFIXES = ('.googlesource.com', '.git.corp.google.com') 85 86 87def process_gitmodules(ctx: PresubmitContext, config: Config, path: Path): 88 """Check if a specific .gitmodules file passes the options in the config.""" 89 _LOG.debug('Evaluating path %s', path) 90 submodules: Dict[str, Dict[str, str]] = _parse_gitmodules(path) 91 92 assert isinstance(config.allowed_googlesource_hosts, (list, tuple)) 93 for allowed in config.allowed_googlesource_hosts: 94 if '.' in allowed or '-review' in allowed: 95 raise PresubmitFailure( 96 f'invalid googlesource requirement: {allowed}' 97 ) 98 99 for name, submodule in submodules.items(): 100 _LOG.debug('======================') 101 _LOG.debug('evaluating submodule %s', name) 102 _LOG.debug('%r', submodule) 103 104 if config.require_branch: 105 _LOG.debug('branch is required') 106 if 'branch' not in submodule: 107 ctx.fail( 108 f'submodule {name} does not have a branch set but ' 109 'branches are required' 110 ) 111 112 url = submodule['url'] 113 114 if config.validator: 115 config.validator(ctx, path, name, submodule) 116 117 if url.startswith(('/', '../')): 118 _LOG.debug('URL is relative, remaining checks are irrelevant') 119 continue 120 121 if config.require_relative_urls: 122 _LOG.debug('relative URLs required') 123 ctx.fail( 124 f'submodule {name} has non-relative url {url!r} but ' 125 'relative urls are required' 126 ) 127 continue 128 129 parsed = urllib.parse.urlparse(url) 130 131 if not config.allow_sso: 132 _LOG.debug('sso not allowed') 133 if parsed.scheme in ('sso', 'rpc'): 134 ctx.fail( 135 f'submodule {name} has sso/rpc url {url!r} but ' 136 'sso/rpc urls are not allowed' 137 ) 138 continue 139 140 if not config.allow_git_corp_google_com: 141 _LOG.debug('git.corp.google.com not allowed') 142 if '.git.corp.google.com' in parsed.netloc: 143 ctx.fail( 144 f'submodule {name} has git.corp.google.com url ' 145 f'{url!r} but git.corp.google.com urls are not ' 146 'allowed' 147 ) 148 continue 149 150 if not config.allow_non_googlesource_hosts: 151 _LOG.debug('non-google hosted repos not allowed') 152 if parsed.scheme not in ( 153 'sso', 154 'rpc', 155 ) and not parsed.netloc.endswith(_GERRIT_HOST_SUFFIXES): 156 ctx.fail( 157 f'submodule {name} has prohibited non-Google url ' f'{url}' 158 ) 159 continue 160 161 if config.allowed_googlesource_hosts: 162 _LOG.debug( 163 'allowed googlesource hosts: %r', 164 config.allowed_googlesource_hosts, 165 ) 166 _LOG.debug('raw url: %s', url) 167 host = parsed.netloc 168 if host.endswith(_GERRIT_HOST_SUFFIXES) or parsed.scheme in ( 169 'sso', 170 'rpc', 171 ): 172 for suffix in _GERRIT_HOST_SUFFIXES: 173 host = host.replace(suffix, '') 174 _LOG.debug('host: %s', host) 175 if host not in config.allowed_googlesource_hosts: 176 ctx.fail( 177 f'submodule {name} is from prohibited Google ' 178 f'Gerrit host {parsed.netloc}' 179 ) 180 continue 181 182 183def create(config: Config = Config()): 184 """Create a gitmodules presubmit step with a given config.""" 185 186 @filter_paths(endswith='.gitmodules') 187 def gitmodules(ctx: PresubmitContext): 188 """Check various rules for .gitmodules files.""" 189 for path in ctx.paths: 190 process_gitmodules(ctx, config, path) 191 192 return gitmodules 193