• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import io
2import os
3import re
4import abc
5import csv
6import sys
7import email
8import pathlib
9import zipfile
10import operator
11import functools
12import itertools
13import collections
14
15from configparser import ConfigParser
16from contextlib import suppress
17from importlib import import_module
18from importlib.abc import MetaPathFinder
19from itertools import starmap
20
21
22__all__ = [
23    'Distribution',
24    'DistributionFinder',
25    'PackageNotFoundError',
26    'distribution',
27    'distributions',
28    'entry_points',
29    'files',
30    'metadata',
31    'requires',
32    'version',
33    ]
34
35
36class PackageNotFoundError(ModuleNotFoundError):
37    """The package was not found."""
38
39
40class EntryPoint(
41        collections.namedtuple('EntryPointBase', 'name value group')):
42    """An entry point as defined by Python packaging conventions.
43
44    See `the packaging docs on entry points
45    <https://packaging.python.org/specifications/entry-points/>`_
46    for more information.
47    """
48
49    pattern = re.compile(
50        r'(?P<module>[\w.]+)\s*'
51        r'(:\s*(?P<attr>[\w.]+))?\s*'
52        r'(?P<extras>\[.*\])?\s*$'
53        )
54    """
55    A regular expression describing the syntax for an entry point,
56    which might look like:
57
58        - module
59        - package.module
60        - package.module:attribute
61        - package.module:object.attribute
62        - package.module:attr [extra1, extra2]
63
64    Other combinations are possible as well.
65
66    The expression is lenient about whitespace around the ':',
67    following the attr, and following any extras.
68    """
69
70    def load(self):
71        """Load the entry point from its definition. If only a module
72        is indicated by the value, return that module. Otherwise,
73        return the named object.
74        """
75        match = self.pattern.match(self.value)
76        module = import_module(match.group('module'))
77        attrs = filter(None, (match.group('attr') or '').split('.'))
78        return functools.reduce(getattr, attrs, module)
79
80    @property
81    def extras(self):
82        match = self.pattern.match(self.value)
83        return list(re.finditer(r'\w+', match.group('extras') or ''))
84
85    @classmethod
86    def _from_config(cls, config):
87        return [
88            cls(name, value, group)
89            for group in config.sections()
90            for name, value in config.items(group)
91            ]
92
93    @classmethod
94    def _from_text(cls, text):
95        config = ConfigParser(delimiters='=')
96        # case sensitive: https://stackoverflow.com/q/1611799/812183
97        config.optionxform = str
98        try:
99            config.read_string(text)
100        except AttributeError:  # pragma: nocover
101            # Python 2 has no read_string
102            config.readfp(io.StringIO(text))
103        return EntryPoint._from_config(config)
104
105    def __iter__(self):
106        """
107        Supply iter so one may construct dicts of EntryPoints easily.
108        """
109        return iter((self.name, self))
110
111    def __reduce__(self):
112        return (
113            self.__class__,
114            (self.name, self.value, self.group),
115            )
116
117
118class PackagePath(pathlib.PurePosixPath):
119    """A reference to a path in a package"""
120
121    def read_text(self, encoding='utf-8'):
122        with self.locate().open(encoding=encoding) as stream:
123            return stream.read()
124
125    def read_binary(self):
126        with self.locate().open('rb') as stream:
127            return stream.read()
128
129    def locate(self):
130        """Return a path-like object for this path"""
131        return self.dist.locate_file(self)
132
133
134class FileHash:
135    def __init__(self, spec):
136        self.mode, _, self.value = spec.partition('=')
137
138    def __repr__(self):
139        return '<FileHash mode: {} value: {}>'.format(self.mode, self.value)
140
141
142class Distribution:
143    """A Python distribution package."""
144
145    @abc.abstractmethod
146    def read_text(self, filename):
147        """Attempt to load metadata file given by the name.
148
149        :param filename: The name of the file in the distribution info.
150        :return: The text if found, otherwise None.
151        """
152
153    @abc.abstractmethod
154    def locate_file(self, path):
155        """
156        Given a path to a file in this distribution, return a path
157        to it.
158        """
159
160    @classmethod
161    def from_name(cls, name):
162        """Return the Distribution for the given package name.
163
164        :param name: The name of the distribution package to search for.
165        :return: The Distribution instance (or subclass thereof) for the named
166            package, if found.
167        :raises PackageNotFoundError: When the named package's distribution
168            metadata cannot be found.
169        """
170        for resolver in cls._discover_resolvers():
171            dists = resolver(DistributionFinder.Context(name=name))
172            dist = next(dists, None)
173            if dist is not None:
174                return dist
175        else:
176            raise PackageNotFoundError(name)
177
178    @classmethod
179    def discover(cls, **kwargs):
180        """Return an iterable of Distribution objects for all packages.
181
182        Pass a ``context`` or pass keyword arguments for constructing
183        a context.
184
185        :context: A ``DistributionFinder.Context`` object.
186        :return: Iterable of Distribution objects for all packages.
187        """
188        context = kwargs.pop('context', None)
189        if context and kwargs:
190            raise ValueError("cannot accept context and kwargs")
191        context = context or DistributionFinder.Context(**kwargs)
192        return itertools.chain.from_iterable(
193            resolver(context)
194            for resolver in cls._discover_resolvers()
195            )
196
197    @staticmethod
198    def at(path):
199        """Return a Distribution for the indicated metadata path
200
201        :param path: a string or path-like object
202        :return: a concrete Distribution instance for the path
203        """
204        return PathDistribution(pathlib.Path(path))
205
206    @staticmethod
207    def _discover_resolvers():
208        """Search the meta_path for resolvers."""
209        declared = (
210            getattr(finder, 'find_distributions', None)
211            for finder in sys.meta_path
212            )
213        return filter(None, declared)
214
215    @property
216    def metadata(self):
217        """Return the parsed metadata for this Distribution.
218
219        The returned object will have keys that name the various bits of
220        metadata.  See PEP 566 for details.
221        """
222        text = (
223            self.read_text('METADATA')
224            or self.read_text('PKG-INFO')
225            # This last clause is here to support old egg-info files.  Its
226            # effect is to just end up using the PathDistribution's self._path
227            # (which points to the egg-info file) attribute unchanged.
228            or self.read_text('')
229            )
230        return email.message_from_string(text)
231
232    @property
233    def version(self):
234        """Return the 'Version' metadata for the distribution package."""
235        return self.metadata['Version']
236
237    @property
238    def entry_points(self):
239        return EntryPoint._from_text(self.read_text('entry_points.txt'))
240
241    @property
242    def files(self):
243        """Files in this distribution.
244
245        :return: List of PackagePath for this distribution or None
246
247        Result is `None` if the metadata file that enumerates files
248        (i.e. RECORD for dist-info or SOURCES.txt for egg-info) is
249        missing.
250        Result may be empty if the metadata exists but is empty.
251        """
252        file_lines = self._read_files_distinfo() or self._read_files_egginfo()
253
254        def make_file(name, hash=None, size_str=None):
255            result = PackagePath(name)
256            result.hash = FileHash(hash) if hash else None
257            result.size = int(size_str) if size_str else None
258            result.dist = self
259            return result
260
261        return file_lines and list(starmap(make_file, csv.reader(file_lines)))
262
263    def _read_files_distinfo(self):
264        """
265        Read the lines of RECORD
266        """
267        text = self.read_text('RECORD')
268        return text and text.splitlines()
269
270    def _read_files_egginfo(self):
271        """
272        SOURCES.txt might contain literal commas, so wrap each line
273        in quotes.
274        """
275        text = self.read_text('SOURCES.txt')
276        return text and map('"{}"'.format, text.splitlines())
277
278    @property
279    def requires(self):
280        """Generated requirements specified for this Distribution"""
281        reqs = self._read_dist_info_reqs() or self._read_egg_info_reqs()
282        return reqs and list(reqs)
283
284    def _read_dist_info_reqs(self):
285        return self.metadata.get_all('Requires-Dist')
286
287    def _read_egg_info_reqs(self):
288        source = self.read_text('requires.txt')
289        return source and self._deps_from_requires_text(source)
290
291    @classmethod
292    def _deps_from_requires_text(cls, source):
293        section_pairs = cls._read_sections(source.splitlines())
294        sections = {
295            section: list(map(operator.itemgetter('line'), results))
296            for section, results in
297            itertools.groupby(section_pairs, operator.itemgetter('section'))
298            }
299        return cls._convert_egg_info_reqs_to_simple_reqs(sections)
300
301    @staticmethod
302    def _read_sections(lines):
303        section = None
304        for line in filter(None, lines):
305            section_match = re.match(r'\[(.*)\]$', line)
306            if section_match:
307                section = section_match.group(1)
308                continue
309            yield locals()
310
311    @staticmethod
312    def _convert_egg_info_reqs_to_simple_reqs(sections):
313        """
314        Historically, setuptools would solicit and store 'extra'
315        requirements, including those with environment markers,
316        in separate sections. More modern tools expect each
317        dependency to be defined separately, with any relevant
318        extras and environment markers attached directly to that
319        requirement. This method converts the former to the
320        latter. See _test_deps_from_requires_text for an example.
321        """
322        def make_condition(name):
323            return name and 'extra == "{name}"'.format(name=name)
324
325        def parse_condition(section):
326            section = section or ''
327            extra, sep, markers = section.partition(':')
328            if extra and markers:
329                markers = '({markers})'.format(markers=markers)
330            conditions = list(filter(None, [markers, make_condition(extra)]))
331            return '; ' + ' and '.join(conditions) if conditions else ''
332
333        for section, deps in sections.items():
334            for dep in deps:
335                yield dep + parse_condition(section)
336
337
338class DistributionFinder(MetaPathFinder):
339    """
340    A MetaPathFinder capable of discovering installed distributions.
341    """
342
343    class Context:
344        """
345        Keyword arguments presented by the caller to
346        ``distributions()`` or ``Distribution.discover()``
347        to narrow the scope of a search for distributions
348        in all DistributionFinders.
349
350        Each DistributionFinder may expect any parameters
351        and should attempt to honor the canonical
352        parameters defined below when appropriate.
353        """
354
355        name = None
356        """
357        Specific name for which a distribution finder should match.
358        A name of ``None`` matches all distributions.
359        """
360
361        def __init__(self, **kwargs):
362            vars(self).update(kwargs)
363
364        @property
365        def path(self):
366            """
367            The path that a distribution finder should search.
368
369            Typically refers to Python package paths and defaults
370            to ``sys.path``.
371            """
372            return vars(self).get('path', sys.path)
373
374        @property
375        def pattern(self):
376            return '.*' if self.name is None else re.escape(self.name)
377
378    @abc.abstractmethod
379    def find_distributions(self, context=Context()):
380        """
381        Find distributions.
382
383        Return an iterable of all Distribution instances capable of
384        loading the metadata for packages matching the ``context``,
385        a DistributionFinder.Context instance.
386        """
387
388
389class MetadataPathFinder(DistributionFinder):
390    @classmethod
391    def find_distributions(cls, context=DistributionFinder.Context()):
392        """
393        Find distributions.
394
395        Return an iterable of all Distribution instances capable of
396        loading the metadata for packages matching ``context.name``
397        (or all names if ``None`` indicated) along the paths in the list
398        of directories ``context.path``.
399        """
400        found = cls._search_paths(context.pattern, context.path)
401        return map(PathDistribution, found)
402
403    @classmethod
404    def _search_paths(cls, pattern, paths):
405        """Find metadata directories in paths heuristically."""
406        return itertools.chain.from_iterable(
407            cls._search_path(path, pattern)
408            for path in map(cls._switch_path, paths)
409            )
410
411    @staticmethod
412    def _switch_path(path):
413        PYPY_OPEN_BUG = False
414        if not PYPY_OPEN_BUG or os.path.isfile(path):  # pragma: no branch
415            with suppress(Exception):
416                return zipfile.Path(path)
417        return pathlib.Path(path)
418
419    @classmethod
420    def _matches_info(cls, normalized, item):
421        template = r'{pattern}(-.*)?\.(dist|egg)-info'
422        manifest = template.format(pattern=normalized)
423        return re.match(manifest, item.name, flags=re.IGNORECASE)
424
425    @classmethod
426    def _matches_legacy(cls, normalized, item):
427        template = r'{pattern}-.*\.egg[\\/]EGG-INFO'
428        manifest = template.format(pattern=normalized)
429        return re.search(manifest, str(item), flags=re.IGNORECASE)
430
431    @classmethod
432    def _search_path(cls, root, pattern):
433        if not root.is_dir():
434            return ()
435        normalized = pattern.replace('-', '_')
436        return (item for item in root.iterdir()
437                if cls._matches_info(normalized, item)
438                or cls._matches_legacy(normalized, item))
439
440
441class PathDistribution(Distribution):
442    def __init__(self, path):
443        """Construct a distribution from a path to the metadata directory.
444
445        :param path: A pathlib.Path or similar object supporting
446                     .joinpath(), __div__, .parent, and .read_text().
447        """
448        self._path = path
449
450    def read_text(self, filename):
451        with suppress(FileNotFoundError, IsADirectoryError, KeyError,
452                      NotADirectoryError, PermissionError):
453            return self._path.joinpath(filename).read_text(encoding='utf-8')
454    read_text.__doc__ = Distribution.read_text.__doc__
455
456    def locate_file(self, path):
457        return self._path.parent / path
458
459
460def distribution(distribution_name):
461    """Get the ``Distribution`` instance for the named package.
462
463    :param distribution_name: The name of the distribution package as a string.
464    :return: A ``Distribution`` instance (or subclass thereof).
465    """
466    return Distribution.from_name(distribution_name)
467
468
469def distributions(**kwargs):
470    """Get all ``Distribution`` instances in the current environment.
471
472    :return: An iterable of ``Distribution`` instances.
473    """
474    return Distribution.discover(**kwargs)
475
476
477def metadata(distribution_name):
478    """Get the metadata for the named package.
479
480    :param distribution_name: The name of the distribution package to query.
481    :return: An email.Message containing the parsed metadata.
482    """
483    return Distribution.from_name(distribution_name).metadata
484
485
486def version(distribution_name):
487    """Get the version string for the named package.
488
489    :param distribution_name: The name of the distribution package to query.
490    :return: The version string for the package as defined in the package's
491        "Version" metadata key.
492    """
493    return distribution(distribution_name).version
494
495
496def entry_points():
497    """Return EntryPoint objects for all installed packages.
498
499    :return: EntryPoint objects for all installed packages.
500    """
501    eps = itertools.chain.from_iterable(
502        dist.entry_points for dist in distributions())
503    by_group = operator.attrgetter('group')
504    ordered = sorted(eps, key=by_group)
505    grouped = itertools.groupby(ordered, by_group)
506    return {
507        group: tuple(eps)
508        for group, eps in grouped
509        }
510
511
512def files(distribution_name):
513    """Return a list of files for the named package.
514
515    :param distribution_name: The name of the distribution package to query.
516    :return: List of files composing the distribution.
517    """
518    return distribution(distribution_name).files
519
520
521def requires(distribution_name):
522    """
523    Return a list of requirements for the named package.
524
525    :return: An iterator of requirements, suitable for
526    packaging.requirement.Requirement.
527    """
528    return distribution(distribution_name).requires
529