1import io 2import os 3import re 4import abc 5import csv 6import sys 7import email 8import pathlib 9import zipfile 10import operator 11import functools 12import itertools 13import posixpath 14import collections 15 16from configparser import ConfigParser 17from contextlib import suppress 18from importlib import import_module 19from importlib.abc import MetaPathFinder 20from itertools import starmap 21 22 23__all__ = [ 24 'Distribution', 25 'DistributionFinder', 26 'PackageNotFoundError', 27 'distribution', 28 'distributions', 29 'entry_points', 30 'files', 31 'metadata', 32 'requires', 33 'version', 34 ] 35 36 37class PackageNotFoundError(ModuleNotFoundError): 38 """The package was not found.""" 39 40 41class EntryPoint( 42 collections.namedtuple('EntryPointBase', 'name value group')): 43 """An entry point as defined by Python packaging conventions. 44 45 See `the packaging docs on entry points 46 <https://packaging.python.org/specifications/entry-points/>`_ 47 for more information. 48 """ 49 50 pattern = re.compile( 51 r'(?P<module>[\w.]+)\s*' 52 r'(:\s*(?P<attr>[\w.]+))?\s*' 53 r'(?P<extras>\[.*\])?\s*$' 54 ) 55 """ 56 A regular expression describing the syntax for an entry point, 57 which might look like: 58 59 - module 60 - package.module 61 - package.module:attribute 62 - package.module:object.attribute 63 - package.module:attr [extra1, extra2] 64 65 Other combinations are possible as well. 66 67 The expression is lenient about whitespace around the ':', 68 following the attr, and following any extras. 69 """ 70 71 def load(self): 72 """Load the entry point from its definition. If only a module 73 is indicated by the value, return that module. Otherwise, 74 return the named object. 75 """ 76 match = self.pattern.match(self.value) 77 module = import_module(match.group('module')) 78 attrs = filter(None, (match.group('attr') or '').split('.')) 79 return functools.reduce(getattr, attrs, module) 80 81 @property 82 def module(self): 83 match = self.pattern.match(self.value) 84 return match.group('module') 85 86 @property 87 def attr(self): 88 match = self.pattern.match(self.value) 89 return match.group('attr') 90 91 @property 92 def extras(self): 93 match = self.pattern.match(self.value) 94 return list(re.finditer(r'\w+', match.group('extras') or '')) 95 96 @classmethod 97 def _from_config(cls, config): 98 return [ 99 cls(name, value, group) 100 for group in config.sections() 101 for name, value in config.items(group) 102 ] 103 104 @classmethod 105 def _from_text(cls, text): 106 config = ConfigParser(delimiters='=') 107 # case sensitive: https://stackoverflow.com/q/1611799/812183 108 config.optionxform = str 109 try: 110 config.read_string(text) 111 except AttributeError: # pragma: nocover 112 # Python 2 has no read_string 113 config.readfp(io.StringIO(text)) 114 return EntryPoint._from_config(config) 115 116 def __iter__(self): 117 """ 118 Supply iter so one may construct dicts of EntryPoints easily. 119 """ 120 return iter((self.name, self)) 121 122 def __reduce__(self): 123 return ( 124 self.__class__, 125 (self.name, self.value, self.group), 126 ) 127 128 129class PackagePath(pathlib.PurePosixPath): 130 """A reference to a path in a package""" 131 132 def read_text(self, encoding='utf-8'): 133 with self.locate().open(encoding=encoding) as stream: 134 return stream.read() 135 136 def read_binary(self): 137 with self.locate().open('rb') as stream: 138 return stream.read() 139 140 def locate(self): 141 """Return a path-like object for this path""" 142 return self.dist.locate_file(self) 143 144 145class FileHash: 146 def __init__(self, spec): 147 self.mode, _, self.value = spec.partition('=') 148 149 def __repr__(self): 150 return '<FileHash mode: {} value: {}>'.format(self.mode, self.value) 151 152 153class Distribution: 154 """A Python distribution package.""" 155 156 @abc.abstractmethod 157 def read_text(self, filename): 158 """Attempt to load metadata file given by the name. 159 160 :param filename: The name of the file in the distribution info. 161 :return: The text if found, otherwise None. 162 """ 163 164 @abc.abstractmethod 165 def locate_file(self, path): 166 """ 167 Given a path to a file in this distribution, return a path 168 to it. 169 """ 170 171 @classmethod 172 def from_name(cls, name): 173 """Return the Distribution for the given package name. 174 175 :param name: The name of the distribution package to search for. 176 :return: The Distribution instance (or subclass thereof) for the named 177 package, if found. 178 :raises PackageNotFoundError: When the named package's distribution 179 metadata cannot be found. 180 """ 181 for resolver in cls._discover_resolvers(): 182 dists = resolver(DistributionFinder.Context(name=name)) 183 dist = next(iter(dists), None) 184 if dist is not None: 185 return dist 186 else: 187 raise PackageNotFoundError(name) 188 189 @classmethod 190 def discover(cls, **kwargs): 191 """Return an iterable of Distribution objects for all packages. 192 193 Pass a ``context`` or pass keyword arguments for constructing 194 a context. 195 196 :context: A ``DistributionFinder.Context`` object. 197 :return: Iterable of Distribution objects for all packages. 198 """ 199 context = kwargs.pop('context', None) 200 if context and kwargs: 201 raise ValueError("cannot accept context and kwargs") 202 context = context or DistributionFinder.Context(**kwargs) 203 return itertools.chain.from_iterable( 204 resolver(context) 205 for resolver in cls._discover_resolvers() 206 ) 207 208 @staticmethod 209 def at(path): 210 """Return a Distribution for the indicated metadata path 211 212 :param path: a string or path-like object 213 :return: a concrete Distribution instance for the path 214 """ 215 return PathDistribution(pathlib.Path(path)) 216 217 @staticmethod 218 def _discover_resolvers(): 219 """Search the meta_path for resolvers.""" 220 declared = ( 221 getattr(finder, 'find_distributions', None) 222 for finder in sys.meta_path 223 ) 224 return filter(None, declared) 225 226 @classmethod 227 def _local(cls, root='.'): 228 from pep517 import build, meta 229 system = build.compat_system(root) 230 builder = functools.partial( 231 meta.build, 232 source_dir=root, 233 system=system, 234 ) 235 return PathDistribution(zipfile.Path(meta.build_as_zip(builder))) 236 237 @property 238 def metadata(self): 239 """Return the parsed metadata for this Distribution. 240 241 The returned object will have keys that name the various bits of 242 metadata. See PEP 566 for details. 243 """ 244 text = ( 245 self.read_text('METADATA') 246 or self.read_text('PKG-INFO') 247 # This last clause is here to support old egg-info files. Its 248 # effect is to just end up using the PathDistribution's self._path 249 # (which points to the egg-info file) attribute unchanged. 250 or self.read_text('') 251 ) 252 return email.message_from_string(text) 253 254 @property 255 def version(self): 256 """Return the 'Version' metadata for the distribution package.""" 257 return self.metadata['Version'] 258 259 @property 260 def entry_points(self): 261 return EntryPoint._from_text(self.read_text('entry_points.txt')) 262 263 @property 264 def files(self): 265 """Files in this distribution. 266 267 :return: List of PackagePath for this distribution or None 268 269 Result is `None` if the metadata file that enumerates files 270 (i.e. RECORD for dist-info or SOURCES.txt for egg-info) is 271 missing. 272 Result may be empty if the metadata exists but is empty. 273 """ 274 file_lines = self._read_files_distinfo() or self._read_files_egginfo() 275 276 def make_file(name, hash=None, size_str=None): 277 result = PackagePath(name) 278 result.hash = FileHash(hash) if hash else None 279 result.size = int(size_str) if size_str else None 280 result.dist = self 281 return result 282 283 return file_lines and list(starmap(make_file, csv.reader(file_lines))) 284 285 def _read_files_distinfo(self): 286 """ 287 Read the lines of RECORD 288 """ 289 text = self.read_text('RECORD') 290 return text and text.splitlines() 291 292 def _read_files_egginfo(self): 293 """ 294 SOURCES.txt might contain literal commas, so wrap each line 295 in quotes. 296 """ 297 text = self.read_text('SOURCES.txt') 298 return text and map('"{}"'.format, text.splitlines()) 299 300 @property 301 def requires(self): 302 """Generated requirements specified for this Distribution""" 303 reqs = self._read_dist_info_reqs() or self._read_egg_info_reqs() 304 return reqs and list(reqs) 305 306 def _read_dist_info_reqs(self): 307 return self.metadata.get_all('Requires-Dist') 308 309 def _read_egg_info_reqs(self): 310 source = self.read_text('requires.txt') 311 return source and self._deps_from_requires_text(source) 312 313 @classmethod 314 def _deps_from_requires_text(cls, source): 315 section_pairs = cls._read_sections(source.splitlines()) 316 sections = { 317 section: list(map(operator.itemgetter('line'), results)) 318 for section, results in 319 itertools.groupby(section_pairs, operator.itemgetter('section')) 320 } 321 return cls._convert_egg_info_reqs_to_simple_reqs(sections) 322 323 @staticmethod 324 def _read_sections(lines): 325 section = None 326 for line in filter(None, lines): 327 section_match = re.match(r'\[(.*)\]$', line) 328 if section_match: 329 section = section_match.group(1) 330 continue 331 yield locals() 332 333 @staticmethod 334 def _convert_egg_info_reqs_to_simple_reqs(sections): 335 """ 336 Historically, setuptools would solicit and store 'extra' 337 requirements, including those with environment markers, 338 in separate sections. More modern tools expect each 339 dependency to be defined separately, with any relevant 340 extras and environment markers attached directly to that 341 requirement. This method converts the former to the 342 latter. See _test_deps_from_requires_text for an example. 343 """ 344 def make_condition(name): 345 return name and 'extra == "{name}"'.format(name=name) 346 347 def parse_condition(section): 348 section = section or '' 349 extra, sep, markers = section.partition(':') 350 if extra and markers: 351 markers = '({markers})'.format(markers=markers) 352 conditions = list(filter(None, [markers, make_condition(extra)])) 353 return '; ' + ' and '.join(conditions) if conditions else '' 354 355 for section, deps in sections.items(): 356 for dep in deps: 357 yield dep + parse_condition(section) 358 359 360class DistributionFinder(MetaPathFinder): 361 """ 362 A MetaPathFinder capable of discovering installed distributions. 363 """ 364 365 class Context: 366 """ 367 Keyword arguments presented by the caller to 368 ``distributions()`` or ``Distribution.discover()`` 369 to narrow the scope of a search for distributions 370 in all DistributionFinders. 371 372 Each DistributionFinder may expect any parameters 373 and should attempt to honor the canonical 374 parameters defined below when appropriate. 375 """ 376 377 name = None 378 """ 379 Specific name for which a distribution finder should match. 380 A name of ``None`` matches all distributions. 381 """ 382 383 def __init__(self, **kwargs): 384 vars(self).update(kwargs) 385 386 @property 387 def path(self): 388 """ 389 The path that a distribution finder should search. 390 391 Typically refers to Python package paths and defaults 392 to ``sys.path``. 393 """ 394 return vars(self).get('path', sys.path) 395 396 @abc.abstractmethod 397 def find_distributions(self, context=Context()): 398 """ 399 Find distributions. 400 401 Return an iterable of all Distribution instances capable of 402 loading the metadata for packages matching the ``context``, 403 a DistributionFinder.Context instance. 404 """ 405 406 407class FastPath: 408 """ 409 Micro-optimized class for searching a path for 410 children. 411 """ 412 413 def __init__(self, root): 414 self.root = root 415 self.base = os.path.basename(self.root).lower() 416 417 def joinpath(self, child): 418 return pathlib.Path(self.root, child) 419 420 def children(self): 421 with suppress(Exception): 422 return os.listdir(self.root or '') 423 with suppress(Exception): 424 return self.zip_children() 425 return [] 426 427 def zip_children(self): 428 zip_path = zipfile.Path(self.root) 429 names = zip_path.root.namelist() 430 self.joinpath = zip_path.joinpath 431 432 return dict.fromkeys( 433 child.split(posixpath.sep, 1)[0] 434 for child in names 435 ) 436 437 def is_egg(self, search): 438 base = self.base 439 return ( 440 base == search.versionless_egg_name 441 or base.startswith(search.prefix) 442 and base.endswith('.egg')) 443 444 def search(self, name): 445 for child in self.children(): 446 n_low = child.lower() 447 if (n_low in name.exact_matches 448 or n_low.startswith(name.prefix) 449 and n_low.endswith(name.suffixes) 450 # legacy case: 451 or self.is_egg(name) and n_low == 'egg-info'): 452 yield self.joinpath(child) 453 454 455class Prepared: 456 """ 457 A prepared search for metadata on a possibly-named package. 458 """ 459 normalized = '' 460 prefix = '' 461 suffixes = '.dist-info', '.egg-info' 462 exact_matches = [''][:0] 463 versionless_egg_name = '' 464 465 def __init__(self, name): 466 self.name = name 467 if name is None: 468 return 469 self.normalized = name.lower().replace('-', '_') 470 self.prefix = self.normalized + '-' 471 self.exact_matches = [ 472 self.normalized + suffix for suffix in self.suffixes] 473 self.versionless_egg_name = self.normalized + '.egg' 474 475 476class MetadataPathFinder(DistributionFinder): 477 @classmethod 478 def find_distributions(cls, context=DistributionFinder.Context()): 479 """ 480 Find distributions. 481 482 Return an iterable of all Distribution instances capable of 483 loading the metadata for packages matching ``context.name`` 484 (or all names if ``None`` indicated) along the paths in the list 485 of directories ``context.path``. 486 """ 487 found = cls._search_paths(context.name, context.path) 488 return map(PathDistribution, found) 489 490 @classmethod 491 def _search_paths(cls, name, paths): 492 """Find metadata directories in paths heuristically.""" 493 return itertools.chain.from_iterable( 494 path.search(Prepared(name)) 495 for path in map(FastPath, paths) 496 ) 497 498 499class PathDistribution(Distribution): 500 def __init__(self, path): 501 """Construct a distribution from a path to the metadata directory. 502 503 :param path: A pathlib.Path or similar object supporting 504 .joinpath(), __div__, .parent, and .read_text(). 505 """ 506 self._path = path 507 508 def read_text(self, filename): 509 with suppress(FileNotFoundError, IsADirectoryError, KeyError, 510 NotADirectoryError, PermissionError): 511 return self._path.joinpath(filename).read_text(encoding='utf-8') 512 read_text.__doc__ = Distribution.read_text.__doc__ 513 514 def locate_file(self, path): 515 return self._path.parent / path 516 517 518def distribution(distribution_name): 519 """Get the ``Distribution`` instance for the named package. 520 521 :param distribution_name: The name of the distribution package as a string. 522 :return: A ``Distribution`` instance (or subclass thereof). 523 """ 524 return Distribution.from_name(distribution_name) 525 526 527def distributions(**kwargs): 528 """Get all ``Distribution`` instances in the current environment. 529 530 :return: An iterable of ``Distribution`` instances. 531 """ 532 return Distribution.discover(**kwargs) 533 534 535def metadata(distribution_name): 536 """Get the metadata for the named package. 537 538 :param distribution_name: The name of the distribution package to query. 539 :return: An email.Message containing the parsed metadata. 540 """ 541 return Distribution.from_name(distribution_name).metadata 542 543 544def version(distribution_name): 545 """Get the version string for the named package. 546 547 :param distribution_name: The name of the distribution package to query. 548 :return: The version string for the package as defined in the package's 549 "Version" metadata key. 550 """ 551 return distribution(distribution_name).version 552 553 554def entry_points(): 555 """Return EntryPoint objects for all installed packages. 556 557 :return: EntryPoint objects for all installed packages. 558 """ 559 eps = itertools.chain.from_iterable( 560 dist.entry_points for dist in distributions()) 561 by_group = operator.attrgetter('group') 562 ordered = sorted(eps, key=by_group) 563 grouped = itertools.groupby(ordered, by_group) 564 return { 565 group: tuple(eps) 566 for group, eps in grouped 567 } 568 569 570def files(distribution_name): 571 """Return a list of files for the named package. 572 573 :param distribution_name: The name of the distribution package to query. 574 :return: List of files composing the distribution. 575 """ 576 return distribution(distribution_name).files 577 578 579def requires(distribution_name): 580 """ 581 Return a list of requirements for the named package. 582 583 :return: An iterator of requirements, suitable for 584 packaging.requirement.Requirement. 585 """ 586 return distribution(distribution_name).requires 587