• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2023 The Bazel Authors. All rights reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"Implementation of PEP440 version string normalization"
16
17def mkmethod(self, method):
18    """Bind a struct as the first arg to a function.
19
20    This is loosely equivalent to creating a bound method of a class.
21    """
22    return lambda *args, **kwargs: method(self, *args, **kwargs)
23
24def _isdigit(token):
25    return token.isdigit()
26
27def _isalnum(token):
28    return token.isalnum()
29
30def _lower(token):
31    # PEP 440: Case sensitivity
32    return token.lower()
33
34def _is(reference):
35    """Predicate testing a token for equality with `reference`."""
36    return lambda token: token == reference
37
38def _is_not(reference):
39    """Predicate testing a token for inequality with `reference`."""
40    return lambda token: token != reference
41
42def _in(reference):
43    """Predicate testing if a token is in the list `reference`."""
44    return lambda token: token in reference
45
46def _ctx(start):
47    return {"norm": "", "start": start}
48
49def _open_context(self):
50    """Open an new parsing ctx.
51
52    If the current parsing step succeeds, call self.accept().
53    If the current parsing step fails, call self.discard() to
54    go back to how it was before we opened a new ctx.
55
56    Args:
57      self: The normalizer.
58    """
59    self.contexts.append(_ctx(_context(self)["start"]))
60    return self.contexts[-1]
61
62def _accept(self):
63    """Close the current ctx successfully and merge the results."""
64    finished = self.contexts.pop()
65    self.contexts[-1]["norm"] += finished["norm"]
66    self.contexts[-1]["start"] = finished["start"]
67    return True
68
69def _context(self):
70    return self.contexts[-1]
71
72def _discard(self):
73    self.contexts.pop()
74    return False
75
76def _new(input):
77    """Create a new normalizer"""
78    self = struct(
79        input = input,
80        contexts = [_ctx(0)],
81    )
82
83    public = struct(
84        # methods: keep sorted
85        accept = mkmethod(self, _accept),
86        context = mkmethod(self, _context),
87        discard = mkmethod(self, _discard),
88        open_context = mkmethod(self, _open_context),
89
90        # attributes: keep sorted
91        input = self.input,
92    )
93    return public
94
95def accept(parser, predicate, value):
96    """If `predicate` matches the next token, accept the token.
97
98    Accepting the token means adding it (according to `value`) to
99    the running results maintained in ctx["norm"] and
100    advancing the cursor in ctx["start"] to the next token in
101    `version`.
102
103    Args:
104      parser: The normalizer.
105      predicate: function taking a token and returning a boolean
106        saying if we want to accept the token.
107      value: the string to add if there's a match, or, if `value`
108        is a function, the function to apply to the current token
109        to get the string to add.
110
111    Returns:
112      whether a token was accepted.
113    """
114
115    ctx = parser.context()
116
117    if ctx["start"] >= len(parser.input):
118        return False
119
120    token = parser.input[ctx["start"]]
121
122    if predicate(token):
123        if type(value) in ["function", "builtin_function_or_method"]:
124            value = value(token)
125
126        ctx["norm"] += value
127        ctx["start"] += 1
128        return True
129
130    return False
131
132def accept_placeholder(parser):
133    """Accept a Bazel placeholder.
134
135    Placeholders aren't actually part of PEP 440, but are used for
136    stamping purposes. A placeholder might be
137    ``{BUILD_TIMESTAMP}``, for instance. We'll accept these as
138    they are, assuming they will expand to something that makes
139    sense where they appear. Before the stamping has happened, a
140    resulting wheel file name containing a placeholder will not
141    actually be valid.
142
143    Args:
144      parser: The normalizer.
145
146    Returns:
147      whether a placeholder was accepted.
148    """
149    ctx = parser.open_context()
150
151    if not accept(parser, _is("{"), str):
152        return parser.discard()
153
154    start = ctx["start"]
155    for _ in range(start, len(parser.input) + 1):
156        if not accept(parser, _is_not("}"), str):
157            break
158
159    if not accept(parser, _is("}"), str):
160        return parser.discard()
161
162    return parser.accept()
163
164def accept_digits(parser):
165    """Accept multiple digits (or placeholders).
166
167    Args:
168      parser: The normalizer.
169
170    Returns:
171      whether some digits (or placeholders) were accepted.
172    """
173
174    ctx = parser.open_context()
175    start = ctx["start"]
176
177    for i in range(start, len(parser.input) + 1):
178        if not accept(parser, _isdigit, str) and not accept_placeholder(parser):
179            if i - start >= 1:
180                if ctx["norm"].isdigit():
181                    # PEP 440: Integer Normalization
182                    ctx["norm"] = str(int(ctx["norm"]))
183                return parser.accept()
184            break
185
186    return parser.discard()
187
188def accept_string(parser, string, replacement):
189    """Accept a `string` in the input. Output `replacement`.
190
191    Args:
192      parser: The normalizer.
193      string: The string to search for in the parser input.
194      replacement: The normalized string to use if the string was found.
195
196    Returns:
197      whether the string was accepted.
198    """
199    ctx = parser.open_context()
200
201    for character in string.elems():
202        if not accept(parser, _in([character, character.upper()]), ""):
203            return parser.discard()
204
205    ctx["norm"] = replacement
206
207    return parser.accept()
208
209def accept_alnum(parser):
210    """Accept an alphanumeric sequence.
211
212    Args:
213      parser: The normalizer.
214
215    Returns:
216      whether an alphanumeric sequence was accepted.
217    """
218
219    ctx = parser.open_context()
220    start = ctx["start"]
221
222    for i in range(start, len(parser.input) + 1):
223        if not accept(parser, _isalnum, _lower) and not accept_placeholder(parser):
224            if i - start >= 1:
225                return parser.accept()
226            break
227
228    return parser.discard()
229
230def accept_dot_number(parser):
231    """Accept a dot followed by digits.
232
233    Args:
234      parser: The normalizer.
235
236    Returns:
237      whether a dot+digits pair was accepted.
238    """
239    parser.open_context()
240
241    if accept(parser, _is("."), ".") and accept_digits(parser):
242        return parser.accept()
243    else:
244        return parser.discard()
245
246def accept_dot_number_sequence(parser):
247    """Accept a sequence of dot+digits.
248
249    Args:
250      parser: The normalizer.
251
252    Returns:
253      whether a sequence of dot+digits pairs was accepted.
254    """
255    ctx = parser.context()
256    start = ctx["start"]
257    i = start
258
259    for i in range(start, len(parser.input) + 1):
260        if not accept_dot_number(parser):
261            break
262    return i - start >= 1
263
264def accept_separator_alnum(parser):
265    """Accept a separator followed by an alphanumeric string.
266
267    Args:
268      parser: The normalizer.
269
270    Returns:
271      whether a separator and an alphanumeric string were accepted.
272    """
273    parser.open_context()
274
275    # PEP 440: Local version segments
276    if (
277        accept(parser, _in([".", "-", "_"]), ".") and
278        (accept_digits(parser) or accept_alnum(parser))
279    ):
280        return parser.accept()
281
282    return parser.discard()
283
284def accept_separator_alnum_sequence(parser):
285    """Accept a sequence of separator+alphanumeric.
286
287    Args:
288      parser: The normalizer.
289
290    Returns:
291      whether a sequence of separator+alphanumerics was accepted.
292    """
293    ctx = parser.context()
294    start = ctx["start"]
295    i = start
296
297    for i in range(start, len(parser.input) + 1):
298        if not accept_separator_alnum(parser):
299            break
300
301    return i - start >= 1
302
303def accept_epoch(parser):
304    """PEP 440: Version epochs.
305
306    Args:
307      parser: The normalizer.
308
309    Returns:
310      whether a PEP 440 epoch identifier was accepted.
311    """
312    ctx = parser.open_context()
313    if accept_digits(parser) and accept(parser, _is("!"), "!"):
314        if ctx["norm"] == "0!":
315            ctx["norm"] = ""
316        return parser.accept()
317    else:
318        return parser.discard()
319
320def accept_release(parser):
321    """Accept the release segment, numbers separated by dots.
322
323    Args:
324      parser: The normalizer.
325
326    Returns:
327      whether a release segment was accepted.
328    """
329    parser.open_context()
330
331    if not accept_digits(parser):
332        return parser.discard()
333
334    accept_dot_number_sequence(parser)
335    return parser.accept()
336
337def accept_pre_l(parser):
338    """PEP 440: Pre-release spelling.
339
340    Args:
341      parser: The normalizer.
342
343    Returns:
344      whether a prerelease keyword was accepted.
345    """
346    parser.open_context()
347
348    if (
349        accept_string(parser, "alpha", "a") or
350        accept_string(parser, "a", "a") or
351        accept_string(parser, "beta", "b") or
352        accept_string(parser, "b", "b") or
353        accept_string(parser, "c", "rc") or
354        accept_string(parser, "preview", "rc") or
355        accept_string(parser, "pre", "rc") or
356        accept_string(parser, "rc", "rc")
357    ):
358        return parser.accept()
359    else:
360        return parser.discard()
361
362def accept_prerelease(parser):
363    """PEP 440: Pre-releases.
364
365    Args:
366      parser: The normalizer.
367
368    Returns:
369      whether a prerelease identifier was accepted.
370    """
371    ctx = parser.open_context()
372
373    # PEP 440: Pre-release separators
374    accept(parser, _in(["-", "_", "."]), "")
375
376    if not accept_pre_l(parser):
377        return parser.discard()
378
379    accept(parser, _in(["-", "_", "."]), "")
380
381    if not accept_digits(parser):
382        # PEP 440: Implicit pre-release number
383        ctx["norm"] += "0"
384
385    return parser.accept()
386
387def accept_implicit_postrelease(parser):
388    """PEP 440: Implicit post releases.
389
390    Args:
391      parser: The normalizer.
392
393    Returns:
394      whether an implicit postrelease identifier was accepted.
395    """
396    ctx = parser.open_context()
397
398    if accept(parser, _is("-"), "") and accept_digits(parser):
399        ctx["norm"] = ".post" + ctx["norm"]
400        return parser.accept()
401
402    return parser.discard()
403
404def accept_explicit_postrelease(parser):
405    """PEP 440: Post-releases.
406
407    Args:
408      parser: The normalizer.
409
410    Returns:
411      whether an explicit postrelease identifier was accepted.
412    """
413    ctx = parser.open_context()
414
415    # PEP 440: Post release separators
416    if not accept(parser, _in(["-", "_", "."]), "."):
417        ctx["norm"] += "."
418
419    # PEP 440: Post release spelling
420    if (
421        accept_string(parser, "post", "post") or
422        accept_string(parser, "rev", "post") or
423        accept_string(parser, "r", "post")
424    ):
425        accept(parser, _in(["-", "_", "."]), "")
426
427        if not accept_digits(parser):
428            # PEP 440: Implicit post release number
429            ctx["norm"] += "0"
430
431        return parser.accept()
432
433    return parser.discard()
434
435def accept_postrelease(parser):
436    """PEP 440: Post-releases.
437
438    Args:
439      parser: The normalizer.
440
441    Returns:
442      whether a postrelease identifier was accepted.
443    """
444    parser.open_context()
445
446    if accept_implicit_postrelease(parser) or accept_explicit_postrelease(parser):
447        return parser.accept()
448
449    return parser.discard()
450
451def accept_devrelease(parser):
452    """PEP 440: Developmental releases.
453
454    Args:
455      parser: The normalizer.
456
457    Returns:
458      whether a developmental release identifier was accepted.
459    """
460    ctx = parser.open_context()
461
462    # PEP 440: Development release separators
463    if not accept(parser, _in(["-", "_", "."]), "."):
464        ctx["norm"] += "."
465
466    if accept_string(parser, "dev", "dev"):
467        accept(parser, _in(["-", "_", "."]), "")
468
469        if not accept_digits(parser):
470            # PEP 440: Implicit development release number
471            ctx["norm"] += "0"
472
473        return parser.accept()
474
475    return parser.discard()
476
477def accept_local(parser):
478    """PEP 440: Local version identifiers.
479
480    Args:
481      parser: The normalizer.
482
483    Returns:
484      whether a local version identifier was accepted.
485    """
486    parser.open_context()
487
488    if accept(parser, _is("+"), "+") and accept_alnum(parser):
489        accept_separator_alnum_sequence(parser)
490        return parser.accept()
491
492    return parser.discard()
493
494def normalize_pep440(version):
495    """Escape the version component of a filename.
496
497    See https://packaging.python.org/en/latest/specifications/binary-distribution-format/#escaping-and-unicode
498    and https://peps.python.org/pep-0440/
499
500    Args:
501      version: version string to be normalized according to PEP 440.
502
503    Returns:
504      string containing the normalized version.
505    """
506    parser = _new(version.strip())  # PEP 440: Leading and Trailing Whitespace
507    accept(parser, _is("v"), "")  # PEP 440: Preceding v character
508    accept_epoch(parser)
509    accept_release(parser)
510    accept_prerelease(parser)
511    accept_postrelease(parser)
512    accept_devrelease(parser)
513    accept_local(parser)
514    if parser.input[parser.context()["start"]:]:
515        fail(
516            "Failed to parse PEP 440 version identifier '%s'." % parser.input,
517            "Parse error at '%s'" % parser.input[parser.context()["start"]:],
518        )
519    return parser.context()["norm"]
520