• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import email.feedparser
2import email.header
3import email.message
4import email.parser
5import email.policy
6import sys
7import typing
8from typing import (
9    Any,
10    Callable,
11    Dict,
12    Generic,
13    List,
14    Optional,
15    Tuple,
16    Type,
17    Union,
18    cast,
19)
20
21from . import requirements, specifiers, utils, version as version_module
22
23T = typing.TypeVar("T")
24if sys.version_info[:2] >= (3, 8):  # pragma: no cover
25    from typing import Literal, TypedDict
26else:  # pragma: no cover
27    if typing.TYPE_CHECKING:
28        from typing_extensions import Literal, TypedDict
29    else:
30        try:
31            from typing_extensions import Literal, TypedDict
32        except ImportError:
33
34            class Literal:
35                def __init_subclass__(*_args, **_kwargs):
36                    pass
37
38            class TypedDict:
39                def __init_subclass__(*_args, **_kwargs):
40                    pass
41
42
43try:
44    ExceptionGroup
45except NameError:  # pragma: no cover
46
47    class ExceptionGroup(Exception):  # noqa: N818
48        """A minimal implementation of :external:exc:`ExceptionGroup` from Python 3.11.
49
50        If :external:exc:`ExceptionGroup` is already defined by Python itself,
51        that version is used instead.
52        """
53
54        message: str
55        exceptions: List[Exception]
56
57        def __init__(self, message: str, exceptions: List[Exception]) -> None:
58            self.message = message
59            self.exceptions = exceptions
60
61        def __repr__(self) -> str:
62            return f"{self.__class__.__name__}({self.message!r}, {self.exceptions!r})"
63
64else:  # pragma: no cover
65    ExceptionGroup = ExceptionGroup
66
67
68class InvalidMetadata(ValueError):
69    """A metadata field contains invalid data."""
70
71    field: str
72    """The name of the field that contains invalid data."""
73
74    def __init__(self, field: str, message: str) -> None:
75        self.field = field
76        super().__init__(message)
77
78
79# The RawMetadata class attempts to make as few assumptions about the underlying
80# serialization formats as possible. The idea is that as long as a serialization
81# formats offer some very basic primitives in *some* way then we can support
82# serializing to and from that format.
83class RawMetadata(TypedDict, total=False):
84    """A dictionary of raw core metadata.
85
86    Each field in core metadata maps to a key of this dictionary (when data is
87    provided). The key is lower-case and underscores are used instead of dashes
88    compared to the equivalent core metadata field. Any core metadata field that
89    can be specified multiple times or can hold multiple values in a single
90    field have a key with a plural name. See :class:`Metadata` whose attributes
91    match the keys of this dictionary.
92
93    Core metadata fields that can be specified multiple times are stored as a
94    list or dict depending on which is appropriate for the field. Any fields
95    which hold multiple values in a single field are stored as a list.
96
97    """
98
99    # Metadata 1.0 - PEP 241
100    metadata_version: str
101    name: str
102    version: str
103    platforms: List[str]
104    summary: str
105    description: str
106    keywords: List[str]
107    home_page: str
108    author: str
109    author_email: str
110    license: str
111
112    # Metadata 1.1 - PEP 314
113    supported_platforms: List[str]
114    download_url: str
115    classifiers: List[str]
116    requires: List[str]
117    provides: List[str]
118    obsoletes: List[str]
119
120    # Metadata 1.2 - PEP 345
121    maintainer: str
122    maintainer_email: str
123    requires_dist: List[str]
124    provides_dist: List[str]
125    obsoletes_dist: List[str]
126    requires_python: str
127    requires_external: List[str]
128    project_urls: Dict[str, str]
129
130    # Metadata 2.0
131    # PEP 426 attempted to completely revamp the metadata format
132    # but got stuck without ever being able to build consensus on
133    # it and ultimately ended up withdrawn.
134    #
135    # However, a number of tools had started emitting METADATA with
136    # `2.0` Metadata-Version, so for historical reasons, this version
137    # was skipped.
138
139    # Metadata 2.1 - PEP 566
140    description_content_type: str
141    provides_extra: List[str]
142
143    # Metadata 2.2 - PEP 643
144    dynamic: List[str]
145
146    # Metadata 2.3 - PEP 685
147    # No new fields were added in PEP 685, just some edge case were
148    # tightened up to provide better interoptability.
149
150
151_STRING_FIELDS = {
152    "author",
153    "author_email",
154    "description",
155    "description_content_type",
156    "download_url",
157    "home_page",
158    "license",
159    "maintainer",
160    "maintainer_email",
161    "metadata_version",
162    "name",
163    "requires_python",
164    "summary",
165    "version",
166}
167
168_LIST_FIELDS = {
169    "classifiers",
170    "dynamic",
171    "obsoletes",
172    "obsoletes_dist",
173    "platforms",
174    "provides",
175    "provides_dist",
176    "provides_extra",
177    "requires",
178    "requires_dist",
179    "requires_external",
180    "supported_platforms",
181}
182
183_DICT_FIELDS = {
184    "project_urls",
185}
186
187
188def _parse_keywords(data: str) -> List[str]:
189    """Split a string of comma-separate keyboards into a list of keywords."""
190    return [k.strip() for k in data.split(",")]
191
192
193def _parse_project_urls(data: List[str]) -> Dict[str, str]:
194    """Parse a list of label/URL string pairings separated by a comma."""
195    urls = {}
196    for pair in data:
197        # Our logic is slightly tricky here as we want to try and do
198        # *something* reasonable with malformed data.
199        #
200        # The main thing that we have to worry about, is data that does
201        # not have a ',' at all to split the label from the Value. There
202        # isn't a singular right answer here, and we will fail validation
203        # later on (if the caller is validating) so it doesn't *really*
204        # matter, but since the missing value has to be an empty str
205        # and our return value is dict[str, str], if we let the key
206        # be the missing value, then they'd have multiple '' values that
207        # overwrite each other in a accumulating dict.
208        #
209        # The other potentional issue is that it's possible to have the
210        # same label multiple times in the metadata, with no solid "right"
211        # answer with what to do in that case. As such, we'll do the only
212        # thing we can, which is treat the field as unparseable and add it
213        # to our list of unparsed fields.
214        parts = [p.strip() for p in pair.split(",", 1)]
215        parts.extend([""] * (max(0, 2 - len(parts))))  # Ensure 2 items
216
217        # TODO: The spec doesn't say anything about if the keys should be
218        #       considered case sensitive or not... logically they should
219        #       be case-preserving and case-insensitive, but doing that
220        #       would open up more cases where we might have duplicate
221        #       entries.
222        label, url = parts
223        if label in urls:
224            # The label already exists in our set of urls, so this field
225            # is unparseable, and we can just add the whole thing to our
226            # unparseable data and stop processing it.
227            raise KeyError("duplicate labels in project urls")
228        urls[label] = url
229
230    return urls
231
232
233def _get_payload(msg: email.message.Message, source: Union[bytes, str]) -> str:
234    """Get the body of the message."""
235    # If our source is a str, then our caller has managed encodings for us,
236    # and we don't need to deal with it.
237    if isinstance(source, str):
238        payload: str = msg.get_payload()
239        return payload
240    # If our source is a bytes, then we're managing the encoding and we need
241    # to deal with it.
242    else:
243        bpayload: bytes = msg.get_payload(decode=True)
244        try:
245            return bpayload.decode("utf8", "strict")
246        except UnicodeDecodeError:
247            raise ValueError("payload in an invalid encoding")
248
249
250# The various parse_FORMAT functions here are intended to be as lenient as
251# possible in their parsing, while still returning a correctly typed
252# RawMetadata.
253#
254# To aid in this, we also generally want to do as little touching of the
255# data as possible, except where there are possibly some historic holdovers
256# that make valid data awkward to work with.
257#
258# While this is a lower level, intermediate format than our ``Metadata``
259# class, some light touch ups can make a massive difference in usability.
260
261# Map METADATA fields to RawMetadata.
262_EMAIL_TO_RAW_MAPPING = {
263    "author": "author",
264    "author-email": "author_email",
265    "classifier": "classifiers",
266    "description": "description",
267    "description-content-type": "description_content_type",
268    "download-url": "download_url",
269    "dynamic": "dynamic",
270    "home-page": "home_page",
271    "keywords": "keywords",
272    "license": "license",
273    "maintainer": "maintainer",
274    "maintainer-email": "maintainer_email",
275    "metadata-version": "metadata_version",
276    "name": "name",
277    "obsoletes": "obsoletes",
278    "obsoletes-dist": "obsoletes_dist",
279    "platform": "platforms",
280    "project-url": "project_urls",
281    "provides": "provides",
282    "provides-dist": "provides_dist",
283    "provides-extra": "provides_extra",
284    "requires": "requires",
285    "requires-dist": "requires_dist",
286    "requires-external": "requires_external",
287    "requires-python": "requires_python",
288    "summary": "summary",
289    "supported-platform": "supported_platforms",
290    "version": "version",
291}
292_RAW_TO_EMAIL_MAPPING = {raw: email for email, raw in _EMAIL_TO_RAW_MAPPING.items()}
293
294
295def parse_email(data: Union[bytes, str]) -> Tuple[RawMetadata, Dict[str, List[str]]]:
296    """Parse a distribution's metadata stored as email headers (e.g. from ``METADATA``).
297
298    This function returns a two-item tuple of dicts. The first dict is of
299    recognized fields from the core metadata specification. Fields that can be
300    parsed and translated into Python's built-in types are converted
301    appropriately. All other fields are left as-is. Fields that are allowed to
302    appear multiple times are stored as lists.
303
304    The second dict contains all other fields from the metadata. This includes
305    any unrecognized fields. It also includes any fields which are expected to
306    be parsed into a built-in type but were not formatted appropriately. Finally,
307    any fields that are expected to appear only once but are repeated are
308    included in this dict.
309
310    """
311    raw: Dict[str, Union[str, List[str], Dict[str, str]]] = {}
312    unparsed: Dict[str, List[str]] = {}
313
314    if isinstance(data, str):
315        parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data)
316    else:
317        parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data)
318
319    # We have to wrap parsed.keys() in a set, because in the case of multiple
320    # values for a key (a list), the key will appear multiple times in the
321    # list of keys, but we're avoiding that by using get_all().
322    for name in frozenset(parsed.keys()):
323        # Header names in RFC are case insensitive, so we'll normalize to all
324        # lower case to make comparisons easier.
325        name = name.lower()
326
327        # We use get_all() here, even for fields that aren't multiple use,
328        # because otherwise someone could have e.g. two Name fields, and we
329        # would just silently ignore it rather than doing something about it.
330        headers = parsed.get_all(name) or []
331
332        # The way the email module works when parsing bytes is that it
333        # unconditionally decodes the bytes as ascii using the surrogateescape
334        # handler. When you pull that data back out (such as with get_all() ),
335        # it looks to see if the str has any surrogate escapes, and if it does
336        # it wraps it in a Header object instead of returning the string.
337        #
338        # As such, we'll look for those Header objects, and fix up the encoding.
339        value = []
340        # Flag if we have run into any issues processing the headers, thus
341        # signalling that the data belongs in 'unparsed'.
342        valid_encoding = True
343        for h in headers:
344            # It's unclear if this can return more types than just a Header or
345            # a str, so we'll just assert here to make sure.
346            assert isinstance(h, (email.header.Header, str))
347
348            # If it's a header object, we need to do our little dance to get
349            # the real data out of it. In cases where there is invalid data
350            # we're going to end up with mojibake, but there's no obvious, good
351            # way around that without reimplementing parts of the Header object
352            # ourselves.
353            #
354            # That should be fine since, if mojibacked happens, this key is
355            # going into the unparsed dict anyways.
356            if isinstance(h, email.header.Header):
357                # The Header object stores it's data as chunks, and each chunk
358                # can be independently encoded, so we'll need to check each
359                # of them.
360                chunks: List[Tuple[bytes, Optional[str]]] = []
361                for bin, encoding in email.header.decode_header(h):
362                    try:
363                        bin.decode("utf8", "strict")
364                    except UnicodeDecodeError:
365                        # Enable mojibake.
366                        encoding = "latin1"
367                        valid_encoding = False
368                    else:
369                        encoding = "utf8"
370                    chunks.append((bin, encoding))
371
372                # Turn our chunks back into a Header object, then let that
373                # Header object do the right thing to turn them into a
374                # string for us.
375                value.append(str(email.header.make_header(chunks)))
376            # This is already a string, so just add it.
377            else:
378                value.append(h)
379
380        # We've processed all of our values to get them into a list of str,
381        # but we may have mojibake data, in which case this is an unparsed
382        # field.
383        if not valid_encoding:
384            unparsed[name] = value
385            continue
386
387        raw_name = _EMAIL_TO_RAW_MAPPING.get(name)
388        if raw_name is None:
389            # This is a bit of a weird situation, we've encountered a key that
390            # we don't know what it means, so we don't know whether it's meant
391            # to be a list or not.
392            #
393            # Since we can't really tell one way or another, we'll just leave it
394            # as a list, even though it may be a single item list, because that's
395            # what makes the most sense for email headers.
396            unparsed[name] = value
397            continue
398
399        # If this is one of our string fields, then we'll check to see if our
400        # value is a list of a single item. If it is then we'll assume that
401        # it was emitted as a single string, and unwrap the str from inside
402        # the list.
403        #
404        # If it's any other kind of data, then we haven't the faintest clue
405        # what we should parse it as, and we have to just add it to our list
406        # of unparsed stuff.
407        if raw_name in _STRING_FIELDS and len(value) == 1:
408            raw[raw_name] = value[0]
409        # If this is one of our list of string fields, then we can just assign
410        # the value, since email *only* has strings, and our get_all() call
411        # above ensures that this is a list.
412        elif raw_name in _LIST_FIELDS:
413            raw[raw_name] = value
414        # Special Case: Keywords
415        # The keywords field is implemented in the metadata spec as a str,
416        # but it conceptually is a list of strings, and is serialized using
417        # ", ".join(keywords), so we'll do some light data massaging to turn
418        # this into what it logically is.
419        elif raw_name == "keywords" and len(value) == 1:
420            raw[raw_name] = _parse_keywords(value[0])
421        # Special Case: Project-URL
422        # The project urls is implemented in the metadata spec as a list of
423        # specially-formatted strings that represent a key and a value, which
424        # is fundamentally a mapping, however the email format doesn't support
425        # mappings in a sane way, so it was crammed into a list of strings
426        # instead.
427        #
428        # We will do a little light data massaging to turn this into a map as
429        # it logically should be.
430        elif raw_name == "project_urls":
431            try:
432                raw[raw_name] = _parse_project_urls(value)
433            except KeyError:
434                unparsed[name] = value
435        # Nothing that we've done has managed to parse this, so it'll just
436        # throw it in our unparseable data and move on.
437        else:
438            unparsed[name] = value
439
440    # We need to support getting the Description from the message payload in
441    # addition to getting it from the the headers. This does mean, though, there
442    # is the possibility of it being set both ways, in which case we put both
443    # in 'unparsed' since we don't know which is right.
444    try:
445        payload = _get_payload(parsed, data)
446    except ValueError:
447        unparsed.setdefault("description", []).append(
448            parsed.get_payload(decode=isinstance(data, bytes))
449        )
450    else:
451        if payload:
452            # Check to see if we've already got a description, if so then both
453            # it, and this body move to unparseable.
454            if "description" in raw:
455                description_header = cast(str, raw.pop("description"))
456                unparsed.setdefault("description", []).extend(
457                    [description_header, payload]
458                )
459            elif "description" in unparsed:
460                unparsed["description"].append(payload)
461            else:
462                raw["description"] = payload
463
464    # We need to cast our `raw` to a metadata, because a TypedDict only support
465    # literal key names, but we're computing our key names on purpose, but the
466    # way this function is implemented, our `TypedDict` can only have valid key
467    # names.
468    return cast(RawMetadata, raw), unparsed
469
470
471_NOT_FOUND = object()
472
473
474# Keep the two values in sync.
475_VALID_METADATA_VERSIONS = ["1.0", "1.1", "1.2", "2.1", "2.2", "2.3"]
476_MetadataVersion = Literal["1.0", "1.1", "1.2", "2.1", "2.2", "2.3"]
477
478_REQUIRED_ATTRS = frozenset(["metadata_version", "name", "version"])
479
480
481class _Validator(Generic[T]):
482    """Validate a metadata field.
483
484    All _process_*() methods correspond to a core metadata field. The method is
485    called with the field's raw value. If the raw value is valid it is returned
486    in its "enriched" form (e.g. ``version.Version`` for the ``Version`` field).
487    If the raw value is invalid, :exc:`InvalidMetadata` is raised (with a cause
488    as appropriate).
489    """
490
491    name: str
492    raw_name: str
493    added: _MetadataVersion
494
495    def __init__(
496        self,
497        *,
498        added: _MetadataVersion = "1.0",
499    ) -> None:
500        self.added = added
501
502    def __set_name__(self, _owner: "Metadata", name: str) -> None:
503        self.name = name
504        self.raw_name = _RAW_TO_EMAIL_MAPPING[name]
505
506    def __get__(self, instance: "Metadata", _owner: Type["Metadata"]) -> T:
507        # With Python 3.8, the caching can be replaced with functools.cached_property().
508        # No need to check the cache as attribute lookup will resolve into the
509        # instance's __dict__ before __get__ is called.
510        cache = instance.__dict__
511        value = instance._raw.get(self.name)
512
513        # To make the _process_* methods easier, we'll check if the value is None
514        # and if this field is NOT a required attribute, and if both of those
515        # things are true, we'll skip the the converter. This will mean that the
516        # converters never have to deal with the None union.
517        if self.name in _REQUIRED_ATTRS or value is not None:
518            try:
519                converter: Callable[[Any], T] = getattr(self, f"_process_{self.name}")
520            except AttributeError:
521                pass
522            else:
523                value = converter(value)
524
525        cache[self.name] = value
526        try:
527            del instance._raw[self.name]  # type: ignore[misc]
528        except KeyError:
529            pass
530
531        return cast(T, value)
532
533    def _invalid_metadata(
534        self, msg: str, cause: Optional[Exception] = None
535    ) -> InvalidMetadata:
536        exc = InvalidMetadata(
537            self.raw_name, msg.format_map({"field": repr(self.raw_name)})
538        )
539        exc.__cause__ = cause
540        return exc
541
542    def _process_metadata_version(self, value: str) -> _MetadataVersion:
543        # Implicitly makes Metadata-Version required.
544        if value not in _VALID_METADATA_VERSIONS:
545            raise self._invalid_metadata(f"{value!r} is not a valid metadata version")
546        return cast(_MetadataVersion, value)
547
548    def _process_name(self, value: str) -> str:
549        if not value:
550            raise self._invalid_metadata("{field} is a required field")
551        # Validate the name as a side-effect.
552        try:
553            utils.canonicalize_name(value, validate=True)
554        except utils.InvalidName as exc:
555            raise self._invalid_metadata(
556                f"{value!r} is invalid for {{field}}", cause=exc
557            )
558        else:
559            return value
560
561    def _process_version(self, value: str) -> version_module.Version:
562        if not value:
563            raise self._invalid_metadata("{field} is a required field")
564        try:
565            return version_module.parse(value)
566        except version_module.InvalidVersion as exc:
567            raise self._invalid_metadata(
568                f"{value!r} is invalid for {{field}}", cause=exc
569            )
570
571    def _process_summary(self, value: str) -> str:
572        """Check the field contains no newlines."""
573        if "\n" in value:
574            raise self._invalid_metadata("{field} must be a single line")
575        return value
576
577    def _process_description_content_type(self, value: str) -> str:
578        content_types = {"text/plain", "text/x-rst", "text/markdown"}
579        message = email.message.EmailMessage()
580        message["content-type"] = value
581
582        content_type, parameters = (
583            # Defaults to `text/plain` if parsing failed.
584            message.get_content_type().lower(),
585            message["content-type"].params,
586        )
587        # Check if content-type is valid or defaulted to `text/plain` and thus was
588        # not parseable.
589        if content_type not in content_types or content_type not in value.lower():
590            raise self._invalid_metadata(
591                f"{{field}} must be one of {list(content_types)}, not {value!r}"
592            )
593
594        charset = parameters.get("charset", "UTF-8")
595        if charset != "UTF-8":
596            raise self._invalid_metadata(
597                f"{{field}} can only specify the UTF-8 charset, not {list(charset)}"
598            )
599
600        markdown_variants = {"GFM", "CommonMark"}
601        variant = parameters.get("variant", "GFM")  # Use an acceptable default.
602        if content_type == "text/markdown" and variant not in markdown_variants:
603            raise self._invalid_metadata(
604                f"valid Markdown variants for {{field}} are {list(markdown_variants)}, "
605                f"not {variant!r}",
606            )
607        return value
608
609    def _process_dynamic(self, value: List[str]) -> List[str]:
610        for dynamic_field in map(str.lower, value):
611            if dynamic_field in {"name", "version", "metadata-version"}:
612                raise self._invalid_metadata(
613                    f"{value!r} is not allowed as a dynamic field"
614                )
615            elif dynamic_field not in _EMAIL_TO_RAW_MAPPING:
616                raise self._invalid_metadata(f"{value!r} is not a valid dynamic field")
617        return list(map(str.lower, value))
618
619    def _process_provides_extra(
620        self,
621        value: List[str],
622    ) -> List[utils.NormalizedName]:
623        normalized_names = []
624        try:
625            for name in value:
626                normalized_names.append(utils.canonicalize_name(name, validate=True))
627        except utils.InvalidName as exc:
628            raise self._invalid_metadata(
629                f"{name!r} is invalid for {{field}}", cause=exc
630            )
631        else:
632            return normalized_names
633
634    def _process_requires_python(self, value: str) -> specifiers.SpecifierSet:
635        try:
636            return specifiers.SpecifierSet(value)
637        except specifiers.InvalidSpecifier as exc:
638            raise self._invalid_metadata(
639                f"{value!r} is invalid for {{field}}", cause=exc
640            )
641
642    def _process_requires_dist(
643        self,
644        value: List[str],
645    ) -> List[requirements.Requirement]:
646        reqs = []
647        try:
648            for req in value:
649                reqs.append(requirements.Requirement(req))
650        except requirements.InvalidRequirement as exc:
651            raise self._invalid_metadata(f"{req!r} is invalid for {{field}}", cause=exc)
652        else:
653            return reqs
654
655
656class Metadata:
657    """Representation of distribution metadata.
658
659    Compared to :class:`RawMetadata`, this class provides objects representing
660    metadata fields instead of only using built-in types. Any invalid metadata
661    will cause :exc:`InvalidMetadata` to be raised (with a
662    :py:attr:`~BaseException.__cause__` attribute as appropriate).
663    """
664
665    _raw: RawMetadata
666
667    @classmethod
668    def from_raw(cls, data: RawMetadata, *, validate: bool = True) -> "Metadata":
669        """Create an instance from :class:`RawMetadata`.
670
671        If *validate* is true, all metadata will be validated. All exceptions
672        related to validation will be gathered and raised as an :class:`ExceptionGroup`.
673        """
674        ins = cls()
675        ins._raw = data.copy()  # Mutations occur due to caching enriched values.
676
677        if validate:
678            exceptions: List[Exception] = []
679            try:
680                metadata_version = ins.metadata_version
681                metadata_age = _VALID_METADATA_VERSIONS.index(metadata_version)
682            except InvalidMetadata as metadata_version_exc:
683                exceptions.append(metadata_version_exc)
684                metadata_version = None
685
686            # Make sure to check for the fields that are present, the required
687            # fields (so their absence can be reported).
688            fields_to_check = frozenset(ins._raw) | _REQUIRED_ATTRS
689            # Remove fields that have already been checked.
690            fields_to_check -= {"metadata_version"}
691
692            for key in fields_to_check:
693                try:
694                    if metadata_version:
695                        # Can't use getattr() as that triggers descriptor protocol which
696                        # will fail due to no value for the instance argument.
697                        try:
698                            field_metadata_version = cls.__dict__[key].added
699                        except KeyError:
700                            exc = InvalidMetadata(key, f"unrecognized field: {key!r}")
701                            exceptions.append(exc)
702                            continue
703                        field_age = _VALID_METADATA_VERSIONS.index(
704                            field_metadata_version
705                        )
706                        if field_age > metadata_age:
707                            field = _RAW_TO_EMAIL_MAPPING[key]
708                            exc = InvalidMetadata(
709                                field,
710                                "{field} introduced in metadata version "
711                                "{field_metadata_version}, not {metadata_version}",
712                            )
713                            exceptions.append(exc)
714                            continue
715                    getattr(ins, key)
716                except InvalidMetadata as exc:
717                    exceptions.append(exc)
718
719            if exceptions:
720                raise ExceptionGroup("invalid metadata", exceptions)
721
722        return ins
723
724    @classmethod
725    def from_email(
726        cls, data: Union[bytes, str], *, validate: bool = True
727    ) -> "Metadata":
728        """Parse metadata from email headers.
729
730        If *validate* is true, the metadata will be validated. All exceptions
731        related to validation will be gathered and raised as an :class:`ExceptionGroup`.
732        """
733        raw, unparsed = parse_email(data)
734
735        if validate:
736            exceptions: list[Exception] = []
737            for unparsed_key in unparsed:
738                if unparsed_key in _EMAIL_TO_RAW_MAPPING:
739                    message = f"{unparsed_key!r} has invalid data"
740                else:
741                    message = f"unrecognized field: {unparsed_key!r}"
742                exceptions.append(InvalidMetadata(unparsed_key, message))
743
744            if exceptions:
745                raise ExceptionGroup("unparsed", exceptions)
746
747        try:
748            return cls.from_raw(raw, validate=validate)
749        except ExceptionGroup as exc_group:
750            raise ExceptionGroup(
751                "invalid or unparsed metadata", exc_group.exceptions
752            ) from None
753
754    metadata_version: _Validator[_MetadataVersion] = _Validator()
755    """:external:ref:`core-metadata-metadata-version`
756    (required; validated to be a valid metadata version)"""
757    name: _Validator[str] = _Validator()
758    """:external:ref:`core-metadata-name`
759    (required; validated using :func:`~packaging.utils.canonicalize_name` and its
760    *validate* parameter)"""
761    version: _Validator[version_module.Version] = _Validator()
762    """:external:ref:`core-metadata-version` (required)"""
763    dynamic: _Validator[Optional[List[str]]] = _Validator(
764        added="2.2",
765    )
766    """:external:ref:`core-metadata-dynamic`
767    (validated against core metadata field names and lowercased)"""
768    platforms: _Validator[Optional[List[str]]] = _Validator()
769    """:external:ref:`core-metadata-platform`"""
770    supported_platforms: _Validator[Optional[List[str]]] = _Validator(added="1.1")
771    """:external:ref:`core-metadata-supported-platform`"""
772    summary: _Validator[Optional[str]] = _Validator()
773    """:external:ref:`core-metadata-summary` (validated to contain no newlines)"""
774    description: _Validator[Optional[str]] = _Validator()  # TODO 2.1: can be in body
775    """:external:ref:`core-metadata-description`"""
776    description_content_type: _Validator[Optional[str]] = _Validator(added="2.1")
777    """:external:ref:`core-metadata-description-content-type` (validated)"""
778    keywords: _Validator[Optional[List[str]]] = _Validator()
779    """:external:ref:`core-metadata-keywords`"""
780    home_page: _Validator[Optional[str]] = _Validator()
781    """:external:ref:`core-metadata-home-page`"""
782    download_url: _Validator[Optional[str]] = _Validator(added="1.1")
783    """:external:ref:`core-metadata-download-url`"""
784    author: _Validator[Optional[str]] = _Validator()
785    """:external:ref:`core-metadata-author`"""
786    author_email: _Validator[Optional[str]] = _Validator()
787    """:external:ref:`core-metadata-author-email`"""
788    maintainer: _Validator[Optional[str]] = _Validator(added="1.2")
789    """:external:ref:`core-metadata-maintainer`"""
790    maintainer_email: _Validator[Optional[str]] = _Validator(added="1.2")
791    """:external:ref:`core-metadata-maintainer-email`"""
792    license: _Validator[Optional[str]] = _Validator()
793    """:external:ref:`core-metadata-license`"""
794    classifiers: _Validator[Optional[List[str]]] = _Validator(added="1.1")
795    """:external:ref:`core-metadata-classifier`"""
796    requires_dist: _Validator[Optional[List[requirements.Requirement]]] = _Validator(
797        added="1.2"
798    )
799    """:external:ref:`core-metadata-requires-dist`"""
800    requires_python: _Validator[Optional[specifiers.SpecifierSet]] = _Validator(
801        added="1.2"
802    )
803    """:external:ref:`core-metadata-requires-python`"""
804    # Because `Requires-External` allows for non-PEP 440 version specifiers, we
805    # don't do any processing on the values.
806    requires_external: _Validator[Optional[List[str]]] = _Validator(added="1.2")
807    """:external:ref:`core-metadata-requires-external`"""
808    project_urls: _Validator[Optional[Dict[str, str]]] = _Validator(added="1.2")
809    """:external:ref:`core-metadata-project-url`"""
810    # PEP 685 lets us raise an error if an extra doesn't pass `Name` validation
811    # regardless of metadata version.
812    provides_extra: _Validator[Optional[List[utils.NormalizedName]]] = _Validator(
813        added="2.1",
814    )
815    """:external:ref:`core-metadata-provides-extra`"""
816    provides_dist: _Validator[Optional[List[str]]] = _Validator(added="1.2")
817    """:external:ref:`core-metadata-provides-dist`"""
818    obsoletes_dist: _Validator[Optional[List[str]]] = _Validator(added="1.2")
819    """:external:ref:`core-metadata-obsoletes-dist`"""
820    requires: _Validator[Optional[List[str]]] = _Validator(added="1.1")
821    """``Requires`` (deprecated)"""
822    provides: _Validator[Optional[List[str]]] = _Validator(added="1.1")
823    """``Provides`` (deprecated)"""
824    obsoletes: _Validator[Optional[List[str]]] = _Validator(added="1.1")
825    """``Obsoletes`` (deprecated)"""
826