• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""distutils.util
2
3Miscellaneous utility functions -- anything that doesn't fit into
4one of the other *util.py modules.
5"""
6
7import os
8import re
9import importlib.util
10import string
11import sys
12import sysconfig
13from distutils.errors import DistutilsPlatformError
14from distutils.dep_util import newer
15from distutils.spawn import spawn
16from distutils import log
17from distutils.errors import DistutilsByteCompileError
18from .py35compat import _optim_args_from_interpreter_flags
19
20
21def get_host_platform():
22    """Return a string that identifies the current platform.  This is used mainly to
23    distinguish platform-specific build directories and platform-specific built
24    distributions.
25    """
26
27    # We initially exposed platforms as defined in Python 3.9
28    # even with older Python versions when distutils was split out.
29    # Now that we delegate to stdlib sysconfig we need to restore this
30    # in case anyone has started to depend on it.
31
32    if sys.version_info < (3, 8):
33        if os.name == 'nt':
34            if '(arm)' in sys.version.lower():
35                return 'win-arm32'
36            if '(arm64)' in sys.version.lower():
37                return 'win-arm64'
38
39    if sys.version_info < (3, 9):
40        if os.name == "posix" and hasattr(os, 'uname'):
41            osname, host, release, version, machine = os.uname()
42            if osname[:3] == "aix":
43                from .py38compat import aix_platform
44                return aix_platform(osname, version, release)
45
46    return sysconfig.get_platform()
47
48def get_platform():
49    if os.name == 'nt':
50        TARGET_TO_PLAT = {
51            'x86' : 'win32',
52            'x64' : 'win-amd64',
53            'arm' : 'win-arm32',
54            'arm64': 'win-arm64',
55        }
56        return TARGET_TO_PLAT.get(os.environ.get('VSCMD_ARG_TGT_ARCH')) or get_host_platform()
57    else:
58        return get_host_platform()
59
60
61if sys.platform == 'darwin':
62    _syscfg_macosx_ver = None # cache the version pulled from sysconfig
63MACOSX_VERSION_VAR = 'MACOSX_DEPLOYMENT_TARGET'
64
65def _clear_cached_macosx_ver():
66    """For testing only. Do not call."""
67    global _syscfg_macosx_ver
68    _syscfg_macosx_ver = None
69
70def get_macosx_target_ver_from_syscfg():
71    """Get the version of macOS latched in the Python interpreter configuration.
72    Returns the version as a string or None if can't obtain one. Cached."""
73    global _syscfg_macosx_ver
74    if _syscfg_macosx_ver is None:
75        from distutils import sysconfig
76        ver = sysconfig.get_config_var(MACOSX_VERSION_VAR) or ''
77        if ver:
78            _syscfg_macosx_ver = ver
79    return _syscfg_macosx_ver
80
81def get_macosx_target_ver():
82    """Return the version of macOS for which we are building.
83
84    The target version defaults to the version in sysconfig latched at time
85    the Python interpreter was built, unless overridden by an environment
86    variable. If neither source has a value, then None is returned"""
87
88    syscfg_ver = get_macosx_target_ver_from_syscfg()
89    env_ver = os.environ.get(MACOSX_VERSION_VAR)
90
91    if env_ver:
92        # Validate overridden version against sysconfig version, if have both.
93        # Ensure that the deployment target of the build process is not less
94        # than 10.3 if the interpreter was built for 10.3 or later.  This
95        # ensures extension modules are built with correct compatibility
96        # values, specifically LDSHARED which can use
97        # '-undefined dynamic_lookup' which only works on >= 10.3.
98        if syscfg_ver and split_version(syscfg_ver) >= [10, 3] and \
99            split_version(env_ver) < [10, 3]:
100            my_msg = ('$' + MACOSX_VERSION_VAR + ' mismatch: '
101                      'now "%s" but "%s" during configure; '
102                      'must use 10.3 or later'
103                      % (env_ver, syscfg_ver))
104            raise DistutilsPlatformError(my_msg)
105        return env_ver
106    return syscfg_ver
107
108
109def split_version(s):
110    """Convert a dot-separated string into a list of numbers for comparisons"""
111    return [int(n) for n in s.split('.')]
112
113
114def convert_path (pathname):
115    """Return 'pathname' as a name that will work on the native filesystem,
116    i.e. split it on '/' and put it back together again using the current
117    directory separator.  Needed because filenames in the setup script are
118    always supplied in Unix style, and have to be converted to the local
119    convention before we can actually use them in the filesystem.  Raises
120    ValueError on non-Unix-ish systems if 'pathname' either starts or
121    ends with a slash.
122    """
123    if os.sep == '/':
124        return pathname
125    if not pathname:
126        return pathname
127    if pathname[0] == '/':
128        raise ValueError("path '%s' cannot be absolute" % pathname)
129    if pathname[-1] == '/':
130        raise ValueError("path '%s' cannot end with '/'" % pathname)
131
132    paths = pathname.split('/')
133    while '.' in paths:
134        paths.remove('.')
135    if not paths:
136        return os.curdir
137    return os.path.join(*paths)
138
139# convert_path ()
140
141
142def change_root (new_root, pathname):
143    """Return 'pathname' with 'new_root' prepended.  If 'pathname' is
144    relative, this is equivalent to "os.path.join(new_root,pathname)".
145    Otherwise, it requires making 'pathname' relative and then joining the
146    two, which is tricky on DOS/Windows and Mac OS.
147    """
148    if os.name == 'posix':
149        if not os.path.isabs(pathname):
150            return os.path.join(new_root, pathname)
151        else:
152            return os.path.join(new_root, pathname[1:])
153
154    elif os.name == 'nt':
155        (drive, path) = os.path.splitdrive(pathname)
156        if path[0] == '\\':
157            path = path[1:]
158        return os.path.join(new_root, path)
159
160    else:
161        raise DistutilsPlatformError("nothing known about platform '%s'" % os.name)
162
163
164_environ_checked = 0
165def check_environ ():
166    """Ensure that 'os.environ' has all the environment variables we
167    guarantee that users can use in config files, command-line options,
168    etc.  Currently this includes:
169      HOME - user's home directory (Unix only)
170      PLAT - description of the current platform, including hardware
171             and OS (see 'get_platform()')
172    """
173    global _environ_checked
174    if _environ_checked:
175        return
176
177    if os.name == 'posix' and 'HOME' not in os.environ:
178        try:
179            import pwd
180            os.environ['HOME'] = pwd.getpwuid(os.getuid())[5]
181        except (ImportError, KeyError):
182            # bpo-10496: if the current user identifier doesn't exist in the
183            # password database, do nothing
184            pass
185
186    if 'PLAT' not in os.environ:
187        os.environ['PLAT'] = get_platform()
188
189    _environ_checked = 1
190
191
192def subst_vars (s, local_vars):
193    """
194    Perform variable substitution on 'string'.
195    Variables are indicated by format-style braces ("{var}").
196    Variable is substituted by the value found in the 'local_vars'
197    dictionary or in 'os.environ' if it's not in 'local_vars'.
198    'os.environ' is first checked/augmented to guarantee that it contains
199    certain values: see 'check_environ()'.  Raise ValueError for any
200    variables not found in either 'local_vars' or 'os.environ'.
201    """
202    check_environ()
203    lookup = dict(os.environ)
204    lookup.update((name, str(value)) for name, value in local_vars.items())
205    try:
206        return _subst_compat(s).format_map(lookup)
207    except KeyError as var:
208        raise ValueError(f"invalid variable {var}")
209
210# subst_vars ()
211
212
213def _subst_compat(s):
214    """
215    Replace shell/Perl-style variable substitution with
216    format-style. For compatibility.
217    """
218    def _subst(match):
219        return f'{{{match.group(1)}}}'
220    repl = re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)', _subst, s)
221    if repl != s:
222        import warnings
223        warnings.warn(
224            "shell/Perl-style substitions are deprecated",
225            DeprecationWarning,
226        )
227    return repl
228
229
230def grok_environment_error (exc, prefix="error: "):
231    # Function kept for backward compatibility.
232    # Used to try clever things with EnvironmentErrors,
233    # but nowadays str(exception) produces good messages.
234    return prefix + str(exc)
235
236
237# Needed by 'split_quoted()'
238_wordchars_re = _squote_re = _dquote_re = None
239def _init_regex():
240    global _wordchars_re, _squote_re, _dquote_re
241    _wordchars_re = re.compile(r'[^\\\'\"%s ]*' % string.whitespace)
242    _squote_re = re.compile(r"'(?:[^'\\]|\\.)*'")
243    _dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"')
244
245def split_quoted (s):
246    """Split a string up according to Unix shell-like rules for quotes and
247    backslashes.  In short: words are delimited by spaces, as long as those
248    spaces are not escaped by a backslash, or inside a quoted string.
249    Single and double quotes are equivalent, and the quote characters can
250    be backslash-escaped.  The backslash is stripped from any two-character
251    escape sequence, leaving only the escaped character.  The quote
252    characters are stripped from any quoted string.  Returns a list of
253    words.
254    """
255
256    # This is a nice algorithm for splitting up a single string, since it
257    # doesn't require character-by-character examination.  It was a little
258    # bit of a brain-bender to get it working right, though...
259    if _wordchars_re is None: _init_regex()
260
261    s = s.strip()
262    words = []
263    pos = 0
264
265    while s:
266        m = _wordchars_re.match(s, pos)
267        end = m.end()
268        if end == len(s):
269            words.append(s[:end])
270            break
271
272        if s[end] in string.whitespace: # unescaped, unquoted whitespace: now
273            words.append(s[:end])       # we definitely have a word delimiter
274            s = s[end:].lstrip()
275            pos = 0
276
277        elif s[end] == '\\':            # preserve whatever is being escaped;
278                                        # will become part of the current word
279            s = s[:end] + s[end+1:]
280            pos = end+1
281
282        else:
283            if s[end] == "'":           # slurp singly-quoted string
284                m = _squote_re.match(s, end)
285            elif s[end] == '"':         # slurp doubly-quoted string
286                m = _dquote_re.match(s, end)
287            else:
288                raise RuntimeError("this can't happen (bad char '%c')" % s[end])
289
290            if m is None:
291                raise ValueError("bad string (mismatched %s quotes?)" % s[end])
292
293            (beg, end) = m.span()
294            s = s[:beg] + s[beg+1:end-1] + s[end:]
295            pos = m.end() - 2
296
297        if pos >= len(s):
298            words.append(s)
299            break
300
301    return words
302
303# split_quoted ()
304
305
306def execute (func, args, msg=None, verbose=0, dry_run=0):
307    """Perform some action that affects the outside world (eg.  by
308    writing to the filesystem).  Such actions are special because they
309    are disabled by the 'dry_run' flag.  This method takes care of all
310    that bureaucracy for you; all you have to do is supply the
311    function to call and an argument tuple for it (to embody the
312    "external action" being performed), and an optional message to
313    print.
314    """
315    if msg is None:
316        msg = "%s%r" % (func.__name__, args)
317        if msg[-2:] == ',)':        # correct for singleton tuple
318            msg = msg[0:-2] + ')'
319
320    log.info(msg)
321    if not dry_run:
322        func(*args)
323
324
325def strtobool (val):
326    """Convert a string representation of truth to true (1) or false (0).
327
328    True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
329    are 'n', 'no', 'f', 'false', 'off', and '0'.  Raises ValueError if
330    'val' is anything else.
331    """
332    val = val.lower()
333    if val in ('y', 'yes', 't', 'true', 'on', '1'):
334        return 1
335    elif val in ('n', 'no', 'f', 'false', 'off', '0'):
336        return 0
337    else:
338        raise ValueError("invalid truth value %r" % (val,))
339
340
341def byte_compile (py_files,
342                  optimize=0, force=0,
343                  prefix=None, base_dir=None,
344                  verbose=1, dry_run=0,
345                  direct=None):
346    """Byte-compile a collection of Python source files to .pyc
347    files in a __pycache__ subdirectory.  'py_files' is a list
348    of files to compile; any files that don't end in ".py" are silently
349    skipped.  'optimize' must be one of the following:
350      0 - don't optimize
351      1 - normal optimization (like "python -O")
352      2 - extra optimization (like "python -OO")
353    If 'force' is true, all files are recompiled regardless of
354    timestamps.
355
356    The source filename encoded in each bytecode file defaults to the
357    filenames listed in 'py_files'; you can modify these with 'prefix' and
358    'basedir'.  'prefix' is a string that will be stripped off of each
359    source filename, and 'base_dir' is a directory name that will be
360    prepended (after 'prefix' is stripped).  You can supply either or both
361    (or neither) of 'prefix' and 'base_dir', as you wish.
362
363    If 'dry_run' is true, doesn't actually do anything that would
364    affect the filesystem.
365
366    Byte-compilation is either done directly in this interpreter process
367    with the standard py_compile module, or indirectly by writing a
368    temporary script and executing it.  Normally, you should let
369    'byte_compile()' figure out to use direct compilation or not (see
370    the source for details).  The 'direct' flag is used by the script
371    generated in indirect mode; unless you know what you're doing, leave
372    it set to None.
373    """
374
375    # Late import to fix a bootstrap issue: _posixsubprocess is built by
376    # setup.py, but setup.py uses distutils.
377    import subprocess
378
379    # nothing is done if sys.dont_write_bytecode is True
380    if sys.dont_write_bytecode:
381        raise DistutilsByteCompileError('byte-compiling is disabled.')
382
383    # First, if the caller didn't force us into direct or indirect mode,
384    # figure out which mode we should be in.  We take a conservative
385    # approach: choose direct mode *only* if the current interpreter is
386    # in debug mode and optimize is 0.  If we're not in debug mode (-O
387    # or -OO), we don't know which level of optimization this
388    # interpreter is running with, so we can't do direct
389    # byte-compilation and be certain that it's the right thing.  Thus,
390    # always compile indirectly if the current interpreter is in either
391    # optimize mode, or if either optimization level was requested by
392    # the caller.
393    if direct is None:
394        direct = (__debug__ and optimize == 0)
395
396    # "Indirect" byte-compilation: write a temporary script and then
397    # run it with the appropriate flags.
398    if not direct:
399        try:
400            from tempfile import mkstemp
401            (script_fd, script_name) = mkstemp(".py")
402        except ImportError:
403            from tempfile import mktemp
404            (script_fd, script_name) = None, mktemp(".py")
405        log.info("writing byte-compilation script '%s'", script_name)
406        if not dry_run:
407            if script_fd is not None:
408                script = os.fdopen(script_fd, "w")
409            else:
410                script = open(script_name, "w")
411
412            with script:
413                script.write("""\
414from distutils.util import byte_compile
415files = [
416""")
417
418                # XXX would be nice to write absolute filenames, just for
419                # safety's sake (script should be more robust in the face of
420                # chdir'ing before running it).  But this requires abspath'ing
421                # 'prefix' as well, and that breaks the hack in build_lib's
422                # 'byte_compile()' method that carefully tacks on a trailing
423                # slash (os.sep really) to make sure the prefix here is "just
424                # right".  This whole prefix business is rather delicate -- the
425                # problem is that it's really a directory, but I'm treating it
426                # as a dumb string, so trailing slashes and so forth matter.
427
428                #py_files = map(os.path.abspath, py_files)
429                #if prefix:
430                #    prefix = os.path.abspath(prefix)
431
432                script.write(",\n".join(map(repr, py_files)) + "]\n")
433                script.write("""
434byte_compile(files, optimize=%r, force=%r,
435             prefix=%r, base_dir=%r,
436             verbose=%r, dry_run=0,
437             direct=1)
438""" % (optimize, force, prefix, base_dir, verbose))
439
440        cmd = [sys.executable]
441        cmd.extend(_optim_args_from_interpreter_flags())
442        cmd.append(script_name)
443        spawn(cmd, dry_run=dry_run)
444        execute(os.remove, (script_name,), "removing %s" % script_name,
445                dry_run=dry_run)
446
447    # "Direct" byte-compilation: use the py_compile module to compile
448    # right here, right now.  Note that the script generated in indirect
449    # mode simply calls 'byte_compile()' in direct mode, a weird sort of
450    # cross-process recursion.  Hey, it works!
451    else:
452        from py_compile import compile
453
454        for file in py_files:
455            if file[-3:] != ".py":
456                # This lets us be lazy and not filter filenames in
457                # the "install_lib" command.
458                continue
459
460            # Terminology from the py_compile module:
461            #   cfile - byte-compiled file
462            #   dfile - purported source filename (same as 'file' by default)
463            if optimize >= 0:
464                opt = '' if optimize == 0 else optimize
465                cfile = importlib.util.cache_from_source(
466                    file, optimization=opt)
467            else:
468                cfile = importlib.util.cache_from_source(file)
469            dfile = file
470            if prefix:
471                if file[:len(prefix)] != prefix:
472                    raise ValueError("invalid prefix: filename %r doesn't start with %r"
473                           % (file, prefix))
474                dfile = dfile[len(prefix):]
475            if base_dir:
476                dfile = os.path.join(base_dir, dfile)
477
478            cfile_base = os.path.basename(cfile)
479            if direct:
480                if force or newer(file, cfile):
481                    log.info("byte-compiling %s to %s", file, cfile_base)
482                    if not dry_run:
483                        compile(file, cfile, dfile)
484                else:
485                    log.debug("skipping byte-compilation of %s to %s",
486                              file, cfile_base)
487
488# byte_compile ()
489
490def rfc822_escape (header):
491    """Return a version of the string escaped for inclusion in an
492    RFC-822 header, by ensuring there are 8 spaces space after each newline.
493    """
494    lines = header.split('\n')
495    sep = '\n' + 8 * ' '
496    return sep.join(lines)
497