1import email.feedparser 2import email.header 3import email.message 4import email.parser 5import email.policy 6import sys 7import typing 8from typing import ( 9 Any, 10 Callable, 11 Dict, 12 Generic, 13 List, 14 Optional, 15 Tuple, 16 Type, 17 Union, 18 cast, 19) 20 21from . import requirements, specifiers, utils, version as version_module 22 23T = typing.TypeVar("T") 24if sys.version_info[:2] >= (3, 8): # pragma: no cover 25 from typing import Literal, TypedDict 26else: # pragma: no cover 27 if typing.TYPE_CHECKING: 28 from typing_extensions import Literal, TypedDict 29 else: 30 try: 31 from typing_extensions import Literal, TypedDict 32 except ImportError: 33 34 class Literal: 35 def __init_subclass__(*_args, **_kwargs): 36 pass 37 38 class TypedDict: 39 def __init_subclass__(*_args, **_kwargs): 40 pass 41 42 43try: 44 ExceptionGroup 45except NameError: # pragma: no cover 46 47 class ExceptionGroup(Exception): # noqa: N818 48 """A minimal implementation of :external:exc:`ExceptionGroup` from Python 3.11. 49 50 If :external:exc:`ExceptionGroup` is already defined by Python itself, 51 that version is used instead. 52 """ 53 54 message: str 55 exceptions: List[Exception] 56 57 def __init__(self, message: str, exceptions: List[Exception]) -> None: 58 self.message = message 59 self.exceptions = exceptions 60 61 def __repr__(self) -> str: 62 return f"{self.__class__.__name__}({self.message!r}, {self.exceptions!r})" 63 64else: # pragma: no cover 65 ExceptionGroup = ExceptionGroup 66 67 68class InvalidMetadata(ValueError): 69 """A metadata field contains invalid data.""" 70 71 field: str 72 """The name of the field that contains invalid data.""" 73 74 def __init__(self, field: str, message: str) -> None: 75 self.field = field 76 super().__init__(message) 77 78 79# The RawMetadata class attempts to make as few assumptions about the underlying 80# serialization formats as possible. The idea is that as long as a serialization 81# formats offer some very basic primitives in *some* way then we can support 82# serializing to and from that format. 83class RawMetadata(TypedDict, total=False): 84 """A dictionary of raw core metadata. 85 86 Each field in core metadata maps to a key of this dictionary (when data is 87 provided). The key is lower-case and underscores are used instead of dashes 88 compared to the equivalent core metadata field. Any core metadata field that 89 can be specified multiple times or can hold multiple values in a single 90 field have a key with a plural name. See :class:`Metadata` whose attributes 91 match the keys of this dictionary. 92 93 Core metadata fields that can be specified multiple times are stored as a 94 list or dict depending on which is appropriate for the field. Any fields 95 which hold multiple values in a single field are stored as a list. 96 97 """ 98 99 # Metadata 1.0 - PEP 241 100 metadata_version: str 101 name: str 102 version: str 103 platforms: List[str] 104 summary: str 105 description: str 106 keywords: List[str] 107 home_page: str 108 author: str 109 author_email: str 110 license: str 111 112 # Metadata 1.1 - PEP 314 113 supported_platforms: List[str] 114 download_url: str 115 classifiers: List[str] 116 requires: List[str] 117 provides: List[str] 118 obsoletes: List[str] 119 120 # Metadata 1.2 - PEP 345 121 maintainer: str 122 maintainer_email: str 123 requires_dist: List[str] 124 provides_dist: List[str] 125 obsoletes_dist: List[str] 126 requires_python: str 127 requires_external: List[str] 128 project_urls: Dict[str, str] 129 130 # Metadata 2.0 131 # PEP 426 attempted to completely revamp the metadata format 132 # but got stuck without ever being able to build consensus on 133 # it and ultimately ended up withdrawn. 134 # 135 # However, a number of tools had started emitting METADATA with 136 # `2.0` Metadata-Version, so for historical reasons, this version 137 # was skipped. 138 139 # Metadata 2.1 - PEP 566 140 description_content_type: str 141 provides_extra: List[str] 142 143 # Metadata 2.2 - PEP 643 144 dynamic: List[str] 145 146 # Metadata 2.3 - PEP 685 147 # No new fields were added in PEP 685, just some edge case were 148 # tightened up to provide better interoptability. 149 150 151_STRING_FIELDS = { 152 "author", 153 "author_email", 154 "description", 155 "description_content_type", 156 "download_url", 157 "home_page", 158 "license", 159 "maintainer", 160 "maintainer_email", 161 "metadata_version", 162 "name", 163 "requires_python", 164 "summary", 165 "version", 166} 167 168_LIST_FIELDS = { 169 "classifiers", 170 "dynamic", 171 "obsoletes", 172 "obsoletes_dist", 173 "platforms", 174 "provides", 175 "provides_dist", 176 "provides_extra", 177 "requires", 178 "requires_dist", 179 "requires_external", 180 "supported_platforms", 181} 182 183_DICT_FIELDS = { 184 "project_urls", 185} 186 187 188def _parse_keywords(data: str) -> List[str]: 189 """Split a string of comma-separate keyboards into a list of keywords.""" 190 return [k.strip() for k in data.split(",")] 191 192 193def _parse_project_urls(data: List[str]) -> Dict[str, str]: 194 """Parse a list of label/URL string pairings separated by a comma.""" 195 urls = {} 196 for pair in data: 197 # Our logic is slightly tricky here as we want to try and do 198 # *something* reasonable with malformed data. 199 # 200 # The main thing that we have to worry about, is data that does 201 # not have a ',' at all to split the label from the Value. There 202 # isn't a singular right answer here, and we will fail validation 203 # later on (if the caller is validating) so it doesn't *really* 204 # matter, but since the missing value has to be an empty str 205 # and our return value is dict[str, str], if we let the key 206 # be the missing value, then they'd have multiple '' values that 207 # overwrite each other in a accumulating dict. 208 # 209 # The other potentional issue is that it's possible to have the 210 # same label multiple times in the metadata, with no solid "right" 211 # answer with what to do in that case. As such, we'll do the only 212 # thing we can, which is treat the field as unparseable and add it 213 # to our list of unparsed fields. 214 parts = [p.strip() for p in pair.split(",", 1)] 215 parts.extend([""] * (max(0, 2 - len(parts)))) # Ensure 2 items 216 217 # TODO: The spec doesn't say anything about if the keys should be 218 # considered case sensitive or not... logically they should 219 # be case-preserving and case-insensitive, but doing that 220 # would open up more cases where we might have duplicate 221 # entries. 222 label, url = parts 223 if label in urls: 224 # The label already exists in our set of urls, so this field 225 # is unparseable, and we can just add the whole thing to our 226 # unparseable data and stop processing it. 227 raise KeyError("duplicate labels in project urls") 228 urls[label] = url 229 230 return urls 231 232 233def _get_payload(msg: email.message.Message, source: Union[bytes, str]) -> str: 234 """Get the body of the message.""" 235 # If our source is a str, then our caller has managed encodings for us, 236 # and we don't need to deal with it. 237 if isinstance(source, str): 238 payload: str = msg.get_payload() 239 return payload 240 # If our source is a bytes, then we're managing the encoding and we need 241 # to deal with it. 242 else: 243 bpayload: bytes = msg.get_payload(decode=True) 244 try: 245 return bpayload.decode("utf8", "strict") 246 except UnicodeDecodeError: 247 raise ValueError("payload in an invalid encoding") 248 249 250# The various parse_FORMAT functions here are intended to be as lenient as 251# possible in their parsing, while still returning a correctly typed 252# RawMetadata. 253# 254# To aid in this, we also generally want to do as little touching of the 255# data as possible, except where there are possibly some historic holdovers 256# that make valid data awkward to work with. 257# 258# While this is a lower level, intermediate format than our ``Metadata`` 259# class, some light touch ups can make a massive difference in usability. 260 261# Map METADATA fields to RawMetadata. 262_EMAIL_TO_RAW_MAPPING = { 263 "author": "author", 264 "author-email": "author_email", 265 "classifier": "classifiers", 266 "description": "description", 267 "description-content-type": "description_content_type", 268 "download-url": "download_url", 269 "dynamic": "dynamic", 270 "home-page": "home_page", 271 "keywords": "keywords", 272 "license": "license", 273 "maintainer": "maintainer", 274 "maintainer-email": "maintainer_email", 275 "metadata-version": "metadata_version", 276 "name": "name", 277 "obsoletes": "obsoletes", 278 "obsoletes-dist": "obsoletes_dist", 279 "platform": "platforms", 280 "project-url": "project_urls", 281 "provides": "provides", 282 "provides-dist": "provides_dist", 283 "provides-extra": "provides_extra", 284 "requires": "requires", 285 "requires-dist": "requires_dist", 286 "requires-external": "requires_external", 287 "requires-python": "requires_python", 288 "summary": "summary", 289 "supported-platform": "supported_platforms", 290 "version": "version", 291} 292_RAW_TO_EMAIL_MAPPING = {raw: email for email, raw in _EMAIL_TO_RAW_MAPPING.items()} 293 294 295def parse_email(data: Union[bytes, str]) -> Tuple[RawMetadata, Dict[str, List[str]]]: 296 """Parse a distribution's metadata stored as email headers (e.g. from ``METADATA``). 297 298 This function returns a two-item tuple of dicts. The first dict is of 299 recognized fields from the core metadata specification. Fields that can be 300 parsed and translated into Python's built-in types are converted 301 appropriately. All other fields are left as-is. Fields that are allowed to 302 appear multiple times are stored as lists. 303 304 The second dict contains all other fields from the metadata. This includes 305 any unrecognized fields. It also includes any fields which are expected to 306 be parsed into a built-in type but were not formatted appropriately. Finally, 307 any fields that are expected to appear only once but are repeated are 308 included in this dict. 309 310 """ 311 raw: Dict[str, Union[str, List[str], Dict[str, str]]] = {} 312 unparsed: Dict[str, List[str]] = {} 313 314 if isinstance(data, str): 315 parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data) 316 else: 317 parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data) 318 319 # We have to wrap parsed.keys() in a set, because in the case of multiple 320 # values for a key (a list), the key will appear multiple times in the 321 # list of keys, but we're avoiding that by using get_all(). 322 for name in frozenset(parsed.keys()): 323 # Header names in RFC are case insensitive, so we'll normalize to all 324 # lower case to make comparisons easier. 325 name = name.lower() 326 327 # We use get_all() here, even for fields that aren't multiple use, 328 # because otherwise someone could have e.g. two Name fields, and we 329 # would just silently ignore it rather than doing something about it. 330 headers = parsed.get_all(name) or [] 331 332 # The way the email module works when parsing bytes is that it 333 # unconditionally decodes the bytes as ascii using the surrogateescape 334 # handler. When you pull that data back out (such as with get_all() ), 335 # it looks to see if the str has any surrogate escapes, and if it does 336 # it wraps it in a Header object instead of returning the string. 337 # 338 # As such, we'll look for those Header objects, and fix up the encoding. 339 value = [] 340 # Flag if we have run into any issues processing the headers, thus 341 # signalling that the data belongs in 'unparsed'. 342 valid_encoding = True 343 for h in headers: 344 # It's unclear if this can return more types than just a Header or 345 # a str, so we'll just assert here to make sure. 346 assert isinstance(h, (email.header.Header, str)) 347 348 # If it's a header object, we need to do our little dance to get 349 # the real data out of it. In cases where there is invalid data 350 # we're going to end up with mojibake, but there's no obvious, good 351 # way around that without reimplementing parts of the Header object 352 # ourselves. 353 # 354 # That should be fine since, if mojibacked happens, this key is 355 # going into the unparsed dict anyways. 356 if isinstance(h, email.header.Header): 357 # The Header object stores it's data as chunks, and each chunk 358 # can be independently encoded, so we'll need to check each 359 # of them. 360 chunks: List[Tuple[bytes, Optional[str]]] = [] 361 for bin, encoding in email.header.decode_header(h): 362 try: 363 bin.decode("utf8", "strict") 364 except UnicodeDecodeError: 365 # Enable mojibake. 366 encoding = "latin1" 367 valid_encoding = False 368 else: 369 encoding = "utf8" 370 chunks.append((bin, encoding)) 371 372 # Turn our chunks back into a Header object, then let that 373 # Header object do the right thing to turn them into a 374 # string for us. 375 value.append(str(email.header.make_header(chunks))) 376 # This is already a string, so just add it. 377 else: 378 value.append(h) 379 380 # We've processed all of our values to get them into a list of str, 381 # but we may have mojibake data, in which case this is an unparsed 382 # field. 383 if not valid_encoding: 384 unparsed[name] = value 385 continue 386 387 raw_name = _EMAIL_TO_RAW_MAPPING.get(name) 388 if raw_name is None: 389 # This is a bit of a weird situation, we've encountered a key that 390 # we don't know what it means, so we don't know whether it's meant 391 # to be a list or not. 392 # 393 # Since we can't really tell one way or another, we'll just leave it 394 # as a list, even though it may be a single item list, because that's 395 # what makes the most sense for email headers. 396 unparsed[name] = value 397 continue 398 399 # If this is one of our string fields, then we'll check to see if our 400 # value is a list of a single item. If it is then we'll assume that 401 # it was emitted as a single string, and unwrap the str from inside 402 # the list. 403 # 404 # If it's any other kind of data, then we haven't the faintest clue 405 # what we should parse it as, and we have to just add it to our list 406 # of unparsed stuff. 407 if raw_name in _STRING_FIELDS and len(value) == 1: 408 raw[raw_name] = value[0] 409 # If this is one of our list of string fields, then we can just assign 410 # the value, since email *only* has strings, and our get_all() call 411 # above ensures that this is a list. 412 elif raw_name in _LIST_FIELDS: 413 raw[raw_name] = value 414 # Special Case: Keywords 415 # The keywords field is implemented in the metadata spec as a str, 416 # but it conceptually is a list of strings, and is serialized using 417 # ", ".join(keywords), so we'll do some light data massaging to turn 418 # this into what it logically is. 419 elif raw_name == "keywords" and len(value) == 1: 420 raw[raw_name] = _parse_keywords(value[0]) 421 # Special Case: Project-URL 422 # The project urls is implemented in the metadata spec as a list of 423 # specially-formatted strings that represent a key and a value, which 424 # is fundamentally a mapping, however the email format doesn't support 425 # mappings in a sane way, so it was crammed into a list of strings 426 # instead. 427 # 428 # We will do a little light data massaging to turn this into a map as 429 # it logically should be. 430 elif raw_name == "project_urls": 431 try: 432 raw[raw_name] = _parse_project_urls(value) 433 except KeyError: 434 unparsed[name] = value 435 # Nothing that we've done has managed to parse this, so it'll just 436 # throw it in our unparseable data and move on. 437 else: 438 unparsed[name] = value 439 440 # We need to support getting the Description from the message payload in 441 # addition to getting it from the the headers. This does mean, though, there 442 # is the possibility of it being set both ways, in which case we put both 443 # in 'unparsed' since we don't know which is right. 444 try: 445 payload = _get_payload(parsed, data) 446 except ValueError: 447 unparsed.setdefault("description", []).append( 448 parsed.get_payload(decode=isinstance(data, bytes)) 449 ) 450 else: 451 if payload: 452 # Check to see if we've already got a description, if so then both 453 # it, and this body move to unparseable. 454 if "description" in raw: 455 description_header = cast(str, raw.pop("description")) 456 unparsed.setdefault("description", []).extend( 457 [description_header, payload] 458 ) 459 elif "description" in unparsed: 460 unparsed["description"].append(payload) 461 else: 462 raw["description"] = payload 463 464 # We need to cast our `raw` to a metadata, because a TypedDict only support 465 # literal key names, but we're computing our key names on purpose, but the 466 # way this function is implemented, our `TypedDict` can only have valid key 467 # names. 468 return cast(RawMetadata, raw), unparsed 469 470 471_NOT_FOUND = object() 472 473 474# Keep the two values in sync. 475_VALID_METADATA_VERSIONS = ["1.0", "1.1", "1.2", "2.1", "2.2", "2.3"] 476_MetadataVersion = Literal["1.0", "1.1", "1.2", "2.1", "2.2", "2.3"] 477 478_REQUIRED_ATTRS = frozenset(["metadata_version", "name", "version"]) 479 480 481class _Validator(Generic[T]): 482 """Validate a metadata field. 483 484 All _process_*() methods correspond to a core metadata field. The method is 485 called with the field's raw value. If the raw value is valid it is returned 486 in its "enriched" form (e.g. ``version.Version`` for the ``Version`` field). 487 If the raw value is invalid, :exc:`InvalidMetadata` is raised (with a cause 488 as appropriate). 489 """ 490 491 name: str 492 raw_name: str 493 added: _MetadataVersion 494 495 def __init__( 496 self, 497 *, 498 added: _MetadataVersion = "1.0", 499 ) -> None: 500 self.added = added 501 502 def __set_name__(self, _owner: "Metadata", name: str) -> None: 503 self.name = name 504 self.raw_name = _RAW_TO_EMAIL_MAPPING[name] 505 506 def __get__(self, instance: "Metadata", _owner: Type["Metadata"]) -> T: 507 # With Python 3.8, the caching can be replaced with functools.cached_property(). 508 # No need to check the cache as attribute lookup will resolve into the 509 # instance's __dict__ before __get__ is called. 510 cache = instance.__dict__ 511 value = instance._raw.get(self.name) 512 513 # To make the _process_* methods easier, we'll check if the value is None 514 # and if this field is NOT a required attribute, and if both of those 515 # things are true, we'll skip the the converter. This will mean that the 516 # converters never have to deal with the None union. 517 if self.name in _REQUIRED_ATTRS or value is not None: 518 try: 519 converter: Callable[[Any], T] = getattr(self, f"_process_{self.name}") 520 except AttributeError: 521 pass 522 else: 523 value = converter(value) 524 525 cache[self.name] = value 526 try: 527 del instance._raw[self.name] # type: ignore[misc] 528 except KeyError: 529 pass 530 531 return cast(T, value) 532 533 def _invalid_metadata( 534 self, msg: str, cause: Optional[Exception] = None 535 ) -> InvalidMetadata: 536 exc = InvalidMetadata( 537 self.raw_name, msg.format_map({"field": repr(self.raw_name)}) 538 ) 539 exc.__cause__ = cause 540 return exc 541 542 def _process_metadata_version(self, value: str) -> _MetadataVersion: 543 # Implicitly makes Metadata-Version required. 544 if value not in _VALID_METADATA_VERSIONS: 545 raise self._invalid_metadata(f"{value!r} is not a valid metadata version") 546 return cast(_MetadataVersion, value) 547 548 def _process_name(self, value: str) -> str: 549 if not value: 550 raise self._invalid_metadata("{field} is a required field") 551 # Validate the name as a side-effect. 552 try: 553 utils.canonicalize_name(value, validate=True) 554 except utils.InvalidName as exc: 555 raise self._invalid_metadata( 556 f"{value!r} is invalid for {{field}}", cause=exc 557 ) 558 else: 559 return value 560 561 def _process_version(self, value: str) -> version_module.Version: 562 if not value: 563 raise self._invalid_metadata("{field} is a required field") 564 try: 565 return version_module.parse(value) 566 except version_module.InvalidVersion as exc: 567 raise self._invalid_metadata( 568 f"{value!r} is invalid for {{field}}", cause=exc 569 ) 570 571 def _process_summary(self, value: str) -> str: 572 """Check the field contains no newlines.""" 573 if "\n" in value: 574 raise self._invalid_metadata("{field} must be a single line") 575 return value 576 577 def _process_description_content_type(self, value: str) -> str: 578 content_types = {"text/plain", "text/x-rst", "text/markdown"} 579 message = email.message.EmailMessage() 580 message["content-type"] = value 581 582 content_type, parameters = ( 583 # Defaults to `text/plain` if parsing failed. 584 message.get_content_type().lower(), 585 message["content-type"].params, 586 ) 587 # Check if content-type is valid or defaulted to `text/plain` and thus was 588 # not parseable. 589 if content_type not in content_types or content_type not in value.lower(): 590 raise self._invalid_metadata( 591 f"{{field}} must be one of {list(content_types)}, not {value!r}" 592 ) 593 594 charset = parameters.get("charset", "UTF-8") 595 if charset != "UTF-8": 596 raise self._invalid_metadata( 597 f"{{field}} can only specify the UTF-8 charset, not {list(charset)}" 598 ) 599 600 markdown_variants = {"GFM", "CommonMark"} 601 variant = parameters.get("variant", "GFM") # Use an acceptable default. 602 if content_type == "text/markdown" and variant not in markdown_variants: 603 raise self._invalid_metadata( 604 f"valid Markdown variants for {{field}} are {list(markdown_variants)}, " 605 f"not {variant!r}", 606 ) 607 return value 608 609 def _process_dynamic(self, value: List[str]) -> List[str]: 610 for dynamic_field in map(str.lower, value): 611 if dynamic_field in {"name", "version", "metadata-version"}: 612 raise self._invalid_metadata( 613 f"{value!r} is not allowed as a dynamic field" 614 ) 615 elif dynamic_field not in _EMAIL_TO_RAW_MAPPING: 616 raise self._invalid_metadata(f"{value!r} is not a valid dynamic field") 617 return list(map(str.lower, value)) 618 619 def _process_provides_extra( 620 self, 621 value: List[str], 622 ) -> List[utils.NormalizedName]: 623 normalized_names = [] 624 try: 625 for name in value: 626 normalized_names.append(utils.canonicalize_name(name, validate=True)) 627 except utils.InvalidName as exc: 628 raise self._invalid_metadata( 629 f"{name!r} is invalid for {{field}}", cause=exc 630 ) 631 else: 632 return normalized_names 633 634 def _process_requires_python(self, value: str) -> specifiers.SpecifierSet: 635 try: 636 return specifiers.SpecifierSet(value) 637 except specifiers.InvalidSpecifier as exc: 638 raise self._invalid_metadata( 639 f"{value!r} is invalid for {{field}}", cause=exc 640 ) 641 642 def _process_requires_dist( 643 self, 644 value: List[str], 645 ) -> List[requirements.Requirement]: 646 reqs = [] 647 try: 648 for req in value: 649 reqs.append(requirements.Requirement(req)) 650 except requirements.InvalidRequirement as exc: 651 raise self._invalid_metadata(f"{req!r} is invalid for {{field}}", cause=exc) 652 else: 653 return reqs 654 655 656class Metadata: 657 """Representation of distribution metadata. 658 659 Compared to :class:`RawMetadata`, this class provides objects representing 660 metadata fields instead of only using built-in types. Any invalid metadata 661 will cause :exc:`InvalidMetadata` to be raised (with a 662 :py:attr:`~BaseException.__cause__` attribute as appropriate). 663 """ 664 665 _raw: RawMetadata 666 667 @classmethod 668 def from_raw(cls, data: RawMetadata, *, validate: bool = True) -> "Metadata": 669 """Create an instance from :class:`RawMetadata`. 670 671 If *validate* is true, all metadata will be validated. All exceptions 672 related to validation will be gathered and raised as an :class:`ExceptionGroup`. 673 """ 674 ins = cls() 675 ins._raw = data.copy() # Mutations occur due to caching enriched values. 676 677 if validate: 678 exceptions: List[Exception] = [] 679 try: 680 metadata_version = ins.metadata_version 681 metadata_age = _VALID_METADATA_VERSIONS.index(metadata_version) 682 except InvalidMetadata as metadata_version_exc: 683 exceptions.append(metadata_version_exc) 684 metadata_version = None 685 686 # Make sure to check for the fields that are present, the required 687 # fields (so their absence can be reported). 688 fields_to_check = frozenset(ins._raw) | _REQUIRED_ATTRS 689 # Remove fields that have already been checked. 690 fields_to_check -= {"metadata_version"} 691 692 for key in fields_to_check: 693 try: 694 if metadata_version: 695 # Can't use getattr() as that triggers descriptor protocol which 696 # will fail due to no value for the instance argument. 697 try: 698 field_metadata_version = cls.__dict__[key].added 699 except KeyError: 700 exc = InvalidMetadata(key, f"unrecognized field: {key!r}") 701 exceptions.append(exc) 702 continue 703 field_age = _VALID_METADATA_VERSIONS.index( 704 field_metadata_version 705 ) 706 if field_age > metadata_age: 707 field = _RAW_TO_EMAIL_MAPPING[key] 708 exc = InvalidMetadata( 709 field, 710 "{field} introduced in metadata version " 711 "{field_metadata_version}, not {metadata_version}", 712 ) 713 exceptions.append(exc) 714 continue 715 getattr(ins, key) 716 except InvalidMetadata as exc: 717 exceptions.append(exc) 718 719 if exceptions: 720 raise ExceptionGroup("invalid metadata", exceptions) 721 722 return ins 723 724 @classmethod 725 def from_email( 726 cls, data: Union[bytes, str], *, validate: bool = True 727 ) -> "Metadata": 728 """Parse metadata from email headers. 729 730 If *validate* is true, the metadata will be validated. All exceptions 731 related to validation will be gathered and raised as an :class:`ExceptionGroup`. 732 """ 733 raw, unparsed = parse_email(data) 734 735 if validate: 736 exceptions: list[Exception] = [] 737 for unparsed_key in unparsed: 738 if unparsed_key in _EMAIL_TO_RAW_MAPPING: 739 message = f"{unparsed_key!r} has invalid data" 740 else: 741 message = f"unrecognized field: {unparsed_key!r}" 742 exceptions.append(InvalidMetadata(unparsed_key, message)) 743 744 if exceptions: 745 raise ExceptionGroup("unparsed", exceptions) 746 747 try: 748 return cls.from_raw(raw, validate=validate) 749 except ExceptionGroup as exc_group: 750 raise ExceptionGroup( 751 "invalid or unparsed metadata", exc_group.exceptions 752 ) from None 753 754 metadata_version: _Validator[_MetadataVersion] = _Validator() 755 """:external:ref:`core-metadata-metadata-version` 756 (required; validated to be a valid metadata version)""" 757 name: _Validator[str] = _Validator() 758 """:external:ref:`core-metadata-name` 759 (required; validated using :func:`~packaging.utils.canonicalize_name` and its 760 *validate* parameter)""" 761 version: _Validator[version_module.Version] = _Validator() 762 """:external:ref:`core-metadata-version` (required)""" 763 dynamic: _Validator[Optional[List[str]]] = _Validator( 764 added="2.2", 765 ) 766 """:external:ref:`core-metadata-dynamic` 767 (validated against core metadata field names and lowercased)""" 768 platforms: _Validator[Optional[List[str]]] = _Validator() 769 """:external:ref:`core-metadata-platform`""" 770 supported_platforms: _Validator[Optional[List[str]]] = _Validator(added="1.1") 771 """:external:ref:`core-metadata-supported-platform`""" 772 summary: _Validator[Optional[str]] = _Validator() 773 """:external:ref:`core-metadata-summary` (validated to contain no newlines)""" 774 description: _Validator[Optional[str]] = _Validator() # TODO 2.1: can be in body 775 """:external:ref:`core-metadata-description`""" 776 description_content_type: _Validator[Optional[str]] = _Validator(added="2.1") 777 """:external:ref:`core-metadata-description-content-type` (validated)""" 778 keywords: _Validator[Optional[List[str]]] = _Validator() 779 """:external:ref:`core-metadata-keywords`""" 780 home_page: _Validator[Optional[str]] = _Validator() 781 """:external:ref:`core-metadata-home-page`""" 782 download_url: _Validator[Optional[str]] = _Validator(added="1.1") 783 """:external:ref:`core-metadata-download-url`""" 784 author: _Validator[Optional[str]] = _Validator() 785 """:external:ref:`core-metadata-author`""" 786 author_email: _Validator[Optional[str]] = _Validator() 787 """:external:ref:`core-metadata-author-email`""" 788 maintainer: _Validator[Optional[str]] = _Validator(added="1.2") 789 """:external:ref:`core-metadata-maintainer`""" 790 maintainer_email: _Validator[Optional[str]] = _Validator(added="1.2") 791 """:external:ref:`core-metadata-maintainer-email`""" 792 license: _Validator[Optional[str]] = _Validator() 793 """:external:ref:`core-metadata-license`""" 794 classifiers: _Validator[Optional[List[str]]] = _Validator(added="1.1") 795 """:external:ref:`core-metadata-classifier`""" 796 requires_dist: _Validator[Optional[List[requirements.Requirement]]] = _Validator( 797 added="1.2" 798 ) 799 """:external:ref:`core-metadata-requires-dist`""" 800 requires_python: _Validator[Optional[specifiers.SpecifierSet]] = _Validator( 801 added="1.2" 802 ) 803 """:external:ref:`core-metadata-requires-python`""" 804 # Because `Requires-External` allows for non-PEP 440 version specifiers, we 805 # don't do any processing on the values. 806 requires_external: _Validator[Optional[List[str]]] = _Validator(added="1.2") 807 """:external:ref:`core-metadata-requires-external`""" 808 project_urls: _Validator[Optional[Dict[str, str]]] = _Validator(added="1.2") 809 """:external:ref:`core-metadata-project-url`""" 810 # PEP 685 lets us raise an error if an extra doesn't pass `Name` validation 811 # regardless of metadata version. 812 provides_extra: _Validator[Optional[List[utils.NormalizedName]]] = _Validator( 813 added="2.1", 814 ) 815 """:external:ref:`core-metadata-provides-extra`""" 816 provides_dist: _Validator[Optional[List[str]]] = _Validator(added="1.2") 817 """:external:ref:`core-metadata-provides-dist`""" 818 obsoletes_dist: _Validator[Optional[List[str]]] = _Validator(added="1.2") 819 """:external:ref:`core-metadata-obsoletes-dist`""" 820 requires: _Validator[Optional[List[str]]] = _Validator(added="1.1") 821 """``Requires`` (deprecated)""" 822 provides: _Validator[Optional[List[str]]] = _Validator(added="1.1") 823 """``Provides`` (deprecated)""" 824 obsoletes: _Validator[Optional[List[str]]] = _Validator(added="1.1") 825 """``Obsoletes`` (deprecated)""" 826