1# Copyright 2023 The Bazel Authors. All rights reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15"Implementation of PEP440 version string normalization" 16 17def mkmethod(self, method): 18 """Bind a struct as the first arg to a function. 19 20 This is loosely equivalent to creating a bound method of a class. 21 """ 22 return lambda *args, **kwargs: method(self, *args, **kwargs) 23 24def _isdigit(token): 25 return token.isdigit() 26 27def _isalnum(token): 28 return token.isalnum() 29 30def _lower(token): 31 # PEP 440: Case sensitivity 32 return token.lower() 33 34def _is(reference): 35 """Predicate testing a token for equality with `reference`.""" 36 return lambda token: token == reference 37 38def _is_not(reference): 39 """Predicate testing a token for inequality with `reference`.""" 40 return lambda token: token != reference 41 42def _in(reference): 43 """Predicate testing if a token is in the list `reference`.""" 44 return lambda token: token in reference 45 46def _ctx(start): 47 return {"norm": "", "start": start} 48 49def _open_context(self): 50 """Open an new parsing ctx. 51 52 If the current parsing step succeeds, call self.accept(). 53 If the current parsing step fails, call self.discard() to 54 go back to how it was before we opened a new ctx. 55 56 Args: 57 self: The normalizer. 58 """ 59 self.contexts.append(_ctx(_context(self)["start"])) 60 return self.contexts[-1] 61 62def _accept(self): 63 """Close the current ctx successfully and merge the results.""" 64 finished = self.contexts.pop() 65 self.contexts[-1]["norm"] += finished["norm"] 66 self.contexts[-1]["start"] = finished["start"] 67 return True 68 69def _context(self): 70 return self.contexts[-1] 71 72def _discard(self): 73 self.contexts.pop() 74 return False 75 76def _new(input): 77 """Create a new normalizer""" 78 self = struct( 79 input = input, 80 contexts = [_ctx(0)], 81 ) 82 83 public = struct( 84 # methods: keep sorted 85 accept = mkmethod(self, _accept), 86 context = mkmethod(self, _context), 87 discard = mkmethod(self, _discard), 88 open_context = mkmethod(self, _open_context), 89 90 # attributes: keep sorted 91 input = self.input, 92 ) 93 return public 94 95def accept(parser, predicate, value): 96 """If `predicate` matches the next token, accept the token. 97 98 Accepting the token means adding it (according to `value`) to 99 the running results maintained in ctx["norm"] and 100 advancing the cursor in ctx["start"] to the next token in 101 `version`. 102 103 Args: 104 parser: The normalizer. 105 predicate: function taking a token and returning a boolean 106 saying if we want to accept the token. 107 value: the string to add if there's a match, or, if `value` 108 is a function, the function to apply to the current token 109 to get the string to add. 110 111 Returns: 112 whether a token was accepted. 113 """ 114 115 ctx = parser.context() 116 117 if ctx["start"] >= len(parser.input): 118 return False 119 120 token = parser.input[ctx["start"]] 121 122 if predicate(token): 123 if type(value) in ["function", "builtin_function_or_method"]: 124 value = value(token) 125 126 ctx["norm"] += value 127 ctx["start"] += 1 128 return True 129 130 return False 131 132def accept_placeholder(parser): 133 """Accept a Bazel placeholder. 134 135 Placeholders aren't actually part of PEP 440, but are used for 136 stamping purposes. A placeholder might be 137 ``{BUILD_TIMESTAMP}``, for instance. We'll accept these as 138 they are, assuming they will expand to something that makes 139 sense where they appear. Before the stamping has happened, a 140 resulting wheel file name containing a placeholder will not 141 actually be valid. 142 143 Args: 144 parser: The normalizer. 145 146 Returns: 147 whether a placeholder was accepted. 148 """ 149 ctx = parser.open_context() 150 151 if not accept(parser, _is("{"), str): 152 return parser.discard() 153 154 start = ctx["start"] 155 for _ in range(start, len(parser.input) + 1): 156 if not accept(parser, _is_not("}"), str): 157 break 158 159 if not accept(parser, _is("}"), str): 160 return parser.discard() 161 162 return parser.accept() 163 164def accept_digits(parser): 165 """Accept multiple digits (or placeholders). 166 167 Args: 168 parser: The normalizer. 169 170 Returns: 171 whether some digits (or placeholders) were accepted. 172 """ 173 174 ctx = parser.open_context() 175 start = ctx["start"] 176 177 for i in range(start, len(parser.input) + 1): 178 if not accept(parser, _isdigit, str) and not accept_placeholder(parser): 179 if i - start >= 1: 180 if ctx["norm"].isdigit(): 181 # PEP 440: Integer Normalization 182 ctx["norm"] = str(int(ctx["norm"])) 183 return parser.accept() 184 break 185 186 return parser.discard() 187 188def accept_string(parser, string, replacement): 189 """Accept a `string` in the input. Output `replacement`. 190 191 Args: 192 parser: The normalizer. 193 string: The string to search for in the parser input. 194 replacement: The normalized string to use if the string was found. 195 196 Returns: 197 whether the string was accepted. 198 """ 199 ctx = parser.open_context() 200 201 for character in string.elems(): 202 if not accept(parser, _in([character, character.upper()]), ""): 203 return parser.discard() 204 205 ctx["norm"] = replacement 206 207 return parser.accept() 208 209def accept_alnum(parser): 210 """Accept an alphanumeric sequence. 211 212 Args: 213 parser: The normalizer. 214 215 Returns: 216 whether an alphanumeric sequence was accepted. 217 """ 218 219 ctx = parser.open_context() 220 start = ctx["start"] 221 222 for i in range(start, len(parser.input) + 1): 223 if not accept(parser, _isalnum, _lower) and not accept_placeholder(parser): 224 if i - start >= 1: 225 return parser.accept() 226 break 227 228 return parser.discard() 229 230def accept_dot_number(parser): 231 """Accept a dot followed by digits. 232 233 Args: 234 parser: The normalizer. 235 236 Returns: 237 whether a dot+digits pair was accepted. 238 """ 239 parser.open_context() 240 241 if accept(parser, _is("."), ".") and accept_digits(parser): 242 return parser.accept() 243 else: 244 return parser.discard() 245 246def accept_dot_number_sequence(parser): 247 """Accept a sequence of dot+digits. 248 249 Args: 250 parser: The normalizer. 251 252 Returns: 253 whether a sequence of dot+digits pairs was accepted. 254 """ 255 ctx = parser.context() 256 start = ctx["start"] 257 i = start 258 259 for i in range(start, len(parser.input) + 1): 260 if not accept_dot_number(parser): 261 break 262 return i - start >= 1 263 264def accept_separator_alnum(parser): 265 """Accept a separator followed by an alphanumeric string. 266 267 Args: 268 parser: The normalizer. 269 270 Returns: 271 whether a separator and an alphanumeric string were accepted. 272 """ 273 parser.open_context() 274 275 # PEP 440: Local version segments 276 if ( 277 accept(parser, _in([".", "-", "_"]), ".") and 278 (accept_digits(parser) or accept_alnum(parser)) 279 ): 280 return parser.accept() 281 282 return parser.discard() 283 284def accept_separator_alnum_sequence(parser): 285 """Accept a sequence of separator+alphanumeric. 286 287 Args: 288 parser: The normalizer. 289 290 Returns: 291 whether a sequence of separator+alphanumerics was accepted. 292 """ 293 ctx = parser.context() 294 start = ctx["start"] 295 i = start 296 297 for i in range(start, len(parser.input) + 1): 298 if not accept_separator_alnum(parser): 299 break 300 301 return i - start >= 1 302 303def accept_epoch(parser): 304 """PEP 440: Version epochs. 305 306 Args: 307 parser: The normalizer. 308 309 Returns: 310 whether a PEP 440 epoch identifier was accepted. 311 """ 312 ctx = parser.open_context() 313 if accept_digits(parser) and accept(parser, _is("!"), "!"): 314 if ctx["norm"] == "0!": 315 ctx["norm"] = "" 316 return parser.accept() 317 else: 318 return parser.discard() 319 320def accept_release(parser): 321 """Accept the release segment, numbers separated by dots. 322 323 Args: 324 parser: The normalizer. 325 326 Returns: 327 whether a release segment was accepted. 328 """ 329 parser.open_context() 330 331 if not accept_digits(parser): 332 return parser.discard() 333 334 accept_dot_number_sequence(parser) 335 return parser.accept() 336 337def accept_pre_l(parser): 338 """PEP 440: Pre-release spelling. 339 340 Args: 341 parser: The normalizer. 342 343 Returns: 344 whether a prerelease keyword was accepted. 345 """ 346 parser.open_context() 347 348 if ( 349 accept_string(parser, "alpha", "a") or 350 accept_string(parser, "a", "a") or 351 accept_string(parser, "beta", "b") or 352 accept_string(parser, "b", "b") or 353 accept_string(parser, "c", "rc") or 354 accept_string(parser, "preview", "rc") or 355 accept_string(parser, "pre", "rc") or 356 accept_string(parser, "rc", "rc") 357 ): 358 return parser.accept() 359 else: 360 return parser.discard() 361 362def accept_prerelease(parser): 363 """PEP 440: Pre-releases. 364 365 Args: 366 parser: The normalizer. 367 368 Returns: 369 whether a prerelease identifier was accepted. 370 """ 371 ctx = parser.open_context() 372 373 # PEP 440: Pre-release separators 374 accept(parser, _in(["-", "_", "."]), "") 375 376 if not accept_pre_l(parser): 377 return parser.discard() 378 379 accept(parser, _in(["-", "_", "."]), "") 380 381 if not accept_digits(parser): 382 # PEP 440: Implicit pre-release number 383 ctx["norm"] += "0" 384 385 return parser.accept() 386 387def accept_implicit_postrelease(parser): 388 """PEP 440: Implicit post releases. 389 390 Args: 391 parser: The normalizer. 392 393 Returns: 394 whether an implicit postrelease identifier was accepted. 395 """ 396 ctx = parser.open_context() 397 398 if accept(parser, _is("-"), "") and accept_digits(parser): 399 ctx["norm"] = ".post" + ctx["norm"] 400 return parser.accept() 401 402 return parser.discard() 403 404def accept_explicit_postrelease(parser): 405 """PEP 440: Post-releases. 406 407 Args: 408 parser: The normalizer. 409 410 Returns: 411 whether an explicit postrelease identifier was accepted. 412 """ 413 ctx = parser.open_context() 414 415 # PEP 440: Post release separators 416 if not accept(parser, _in(["-", "_", "."]), "."): 417 ctx["norm"] += "." 418 419 # PEP 440: Post release spelling 420 if ( 421 accept_string(parser, "post", "post") or 422 accept_string(parser, "rev", "post") or 423 accept_string(parser, "r", "post") 424 ): 425 accept(parser, _in(["-", "_", "."]), "") 426 427 if not accept_digits(parser): 428 # PEP 440: Implicit post release number 429 ctx["norm"] += "0" 430 431 return parser.accept() 432 433 return parser.discard() 434 435def accept_postrelease(parser): 436 """PEP 440: Post-releases. 437 438 Args: 439 parser: The normalizer. 440 441 Returns: 442 whether a postrelease identifier was accepted. 443 """ 444 parser.open_context() 445 446 if accept_implicit_postrelease(parser) or accept_explicit_postrelease(parser): 447 return parser.accept() 448 449 return parser.discard() 450 451def accept_devrelease(parser): 452 """PEP 440: Developmental releases. 453 454 Args: 455 parser: The normalizer. 456 457 Returns: 458 whether a developmental release identifier was accepted. 459 """ 460 ctx = parser.open_context() 461 462 # PEP 440: Development release separators 463 if not accept(parser, _in(["-", "_", "."]), "."): 464 ctx["norm"] += "." 465 466 if accept_string(parser, "dev", "dev"): 467 accept(parser, _in(["-", "_", "."]), "") 468 469 if not accept_digits(parser): 470 # PEP 440: Implicit development release number 471 ctx["norm"] += "0" 472 473 return parser.accept() 474 475 return parser.discard() 476 477def accept_local(parser): 478 """PEP 440: Local version identifiers. 479 480 Args: 481 parser: The normalizer. 482 483 Returns: 484 whether a local version identifier was accepted. 485 """ 486 parser.open_context() 487 488 if accept(parser, _is("+"), "+") and accept_alnum(parser): 489 accept_separator_alnum_sequence(parser) 490 return parser.accept() 491 492 return parser.discard() 493 494def normalize_pep440(version): 495 """Escape the version component of a filename. 496 497 See https://packaging.python.org/en/latest/specifications/binary-distribution-format/#escaping-and-unicode 498 and https://peps.python.org/pep-0440/ 499 500 Args: 501 version: version string to be normalized according to PEP 440. 502 503 Returns: 504 string containing the normalized version. 505 """ 506 parser = _new(version.strip()) # PEP 440: Leading and Trailing Whitespace 507 accept(parser, _is("v"), "") # PEP 440: Preceding v character 508 accept_epoch(parser) 509 accept_release(parser) 510 accept_prerelease(parser) 511 accept_postrelease(parser) 512 accept_devrelease(parser) 513 accept_local(parser) 514 if parser.input[parser.context()["start"]:]: 515 fail( 516 "Failed to parse PEP 440 version identifier '%s'." % parser.input, 517 "Parse error at '%s'" % parser.input[parser.context()["start"]:], 518 ) 519 return parser.context()["norm"] 520