• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""distutils.util
2
3Miscellaneous utility functions -- anything that doesn't fit into
4one of the other *util.py modules.
5"""
6
7import os
8import re
9import importlib.util
10import string
11import sys
12import distutils
13from distutils.errors import DistutilsPlatformError
14from distutils.dep_util import newer
15from distutils.spawn import spawn
16from distutils import log
17from distutils.errors import DistutilsByteCompileError
18
19def get_host_platform():
20    """Return a string that identifies the current platform.  This is used mainly to
21    distinguish platform-specific build directories and platform-specific built
22    distributions.  Typically includes the OS name and version and the
23    architecture (as supplied by 'os.uname()'), although the exact information
24    included depends on the OS; eg. on Linux, the kernel version isn't
25    particularly important.
26
27    Examples of returned values:
28       linux-i586
29       linux-alpha (?)
30       solaris-2.6-sun4u
31
32    Windows will return one of:
33       win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc)
34       win32 (all others - specifically, sys.platform is returned)
35
36    For other non-POSIX platforms, currently just returns 'sys.platform'.
37
38    """
39    if os.name == 'nt':
40        if 'amd64' in sys.version.lower():
41            return 'win-amd64'
42        if '(arm)' in sys.version.lower():
43            return 'win-arm32'
44        if '(arm64)' in sys.version.lower():
45            return 'win-arm64'
46        return sys.platform
47
48    # Set for cross builds explicitly
49    if "_PYTHON_HOST_PLATFORM" in os.environ:
50        return os.environ["_PYTHON_HOST_PLATFORM"]
51
52    if os.name != "posix" or not hasattr(os, 'uname'):
53        # XXX what about the architecture? NT is Intel or Alpha,
54        # Mac OS is M68k or PPC, etc.
55        return sys.platform
56
57    # Try to distinguish various flavours of Unix
58
59    (osname, host, release, version, machine) = os.uname()
60
61    # Convert the OS name to lowercase, remove '/' characters, and translate
62    # spaces (for "Power Macintosh")
63    osname = osname.lower().replace('/', '')
64    machine = machine.replace(' ', '_')
65    machine = machine.replace('/', '-')
66
67    if osname[:5] == "linux":
68        # At least on Linux/Intel, 'machine' is the processor --
69        # i386, etc.
70        # XXX what about Alpha, SPARC, etc?
71        return  "%s-%s" % (osname, machine)
72    elif osname[:5] == "sunos":
73        if release[0] >= "5":           # SunOS 5 == Solaris 2
74            osname = "solaris"
75            release = "%d.%s" % (int(release[0]) - 3, release[2:])
76            # We can't use "platform.architecture()[0]" because a
77            # bootstrap problem. We use a dict to get an error
78            # if some suspicious happens.
79            bitness = {2147483647:"32bit", 9223372036854775807:"64bit"}
80            machine += ".%s" % bitness[sys.maxsize]
81        # fall through to standard osname-release-machine representation
82    elif osname[:3] == "aix":
83        from _aix_support import aix_platform
84        return aix_platform()
85    elif osname[:6] == "cygwin":
86        osname = "cygwin"
87        rel_re = re.compile (r'[\d.]+', re.ASCII)
88        m = rel_re.match(release)
89        if m:
90            release = m.group()
91    elif osname[:6] == "darwin":
92        import _osx_support, distutils.sysconfig
93        osname, release, machine = _osx_support.get_platform_osx(
94                                        distutils.sysconfig.get_config_vars(),
95                                        osname, release, machine)
96
97    return "%s-%s-%s" % (osname, release, machine)
98
99def get_platform():
100    if os.name == 'nt':
101        TARGET_TO_PLAT = {
102            'x86' : 'win32',
103            'x64' : 'win-amd64',
104            'arm' : 'win-arm32',
105        }
106        return TARGET_TO_PLAT.get(os.environ.get('VSCMD_ARG_TGT_ARCH')) or get_host_platform()
107    else:
108        return get_host_platform()
109
110def convert_path (pathname):
111    """Return 'pathname' as a name that will work on the native filesystem,
112    i.e. split it on '/' and put it back together again using the current
113    directory separator.  Needed because filenames in the setup script are
114    always supplied in Unix style, and have to be converted to the local
115    convention before we can actually use them in the filesystem.  Raises
116    ValueError on non-Unix-ish systems if 'pathname' either starts or
117    ends with a slash.
118    """
119    if os.sep == '/':
120        return pathname
121    if not pathname:
122        return pathname
123    if pathname[0] == '/':
124        raise ValueError("path '%s' cannot be absolute" % pathname)
125    if pathname[-1] == '/':
126        raise ValueError("path '%s' cannot end with '/'" % pathname)
127
128    paths = pathname.split('/')
129    while '.' in paths:
130        paths.remove('.')
131    if not paths:
132        return os.curdir
133    return os.path.join(*paths)
134
135# convert_path ()
136
137
138def change_root (new_root, pathname):
139    """Return 'pathname' with 'new_root' prepended.  If 'pathname' is
140    relative, this is equivalent to "os.path.join(new_root,pathname)".
141    Otherwise, it requires making 'pathname' relative and then joining the
142    two, which is tricky on DOS/Windows and Mac OS.
143    """
144    if os.name == 'posix':
145        if not os.path.isabs(pathname):
146            return os.path.join(new_root, pathname)
147        else:
148            return os.path.join(new_root, pathname[1:])
149
150    elif os.name == 'nt':
151        (drive, path) = os.path.splitdrive(pathname)
152        if path[0] == '\\':
153            path = path[1:]
154        return os.path.join(new_root, path)
155
156    else:
157        raise DistutilsPlatformError("nothing known about platform '%s'" % os.name)
158
159
160_environ_checked = 0
161def check_environ ():
162    """Ensure that 'os.environ' has all the environment variables we
163    guarantee that users can use in config files, command-line options,
164    etc.  Currently this includes:
165      HOME - user's home directory (Unix only)
166      PLAT - description of the current platform, including hardware
167             and OS (see 'get_platform()')
168    """
169    global _environ_checked
170    if _environ_checked:
171        return
172
173    if os.name == 'posix' and 'HOME' not in os.environ:
174        try:
175            import pwd
176            os.environ['HOME'] = pwd.getpwuid(os.getuid())[5]
177        except (ImportError, KeyError):
178            # bpo-10496: if the current user identifier doesn't exist in the
179            # password database, do nothing
180            pass
181
182    if 'PLAT' not in os.environ:
183        os.environ['PLAT'] = get_platform()
184
185    _environ_checked = 1
186
187
188def subst_vars (s, local_vars):
189    """Perform shell/Perl-style variable substitution on 'string'.  Every
190    occurrence of '$' followed by a name is considered a variable, and
191    variable is substituted by the value found in the 'local_vars'
192    dictionary, or in 'os.environ' if it's not in 'local_vars'.
193    'os.environ' is first checked/augmented to guarantee that it contains
194    certain values: see 'check_environ()'.  Raise ValueError for any
195    variables not found in either 'local_vars' or 'os.environ'.
196    """
197    check_environ()
198    def _subst (match, local_vars=local_vars):
199        var_name = match.group(1)
200        if var_name in local_vars:
201            return str(local_vars[var_name])
202        else:
203            return os.environ[var_name]
204
205    try:
206        return re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)', _subst, s)
207    except KeyError as var:
208        raise ValueError("invalid variable '$%s'" % var)
209
210# subst_vars ()
211
212
213def grok_environment_error (exc, prefix="error: "):
214    # Function kept for backward compatibility.
215    # Used to try clever things with EnvironmentErrors,
216    # but nowadays str(exception) produces good messages.
217    return prefix + str(exc)
218
219
220# Needed by 'split_quoted()'
221_wordchars_re = _squote_re = _dquote_re = None
222def _init_regex():
223    global _wordchars_re, _squote_re, _dquote_re
224    _wordchars_re = re.compile(r'[^\\\'\"%s ]*' % string.whitespace)
225    _squote_re = re.compile(r"'(?:[^'\\]|\\.)*'")
226    _dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"')
227
228def split_quoted (s):
229    """Split a string up according to Unix shell-like rules for quotes and
230    backslashes.  In short: words are delimited by spaces, as long as those
231    spaces are not escaped by a backslash, or inside a quoted string.
232    Single and double quotes are equivalent, and the quote characters can
233    be backslash-escaped.  The backslash is stripped from any two-character
234    escape sequence, leaving only the escaped character.  The quote
235    characters are stripped from any quoted string.  Returns a list of
236    words.
237    """
238
239    # This is a nice algorithm for splitting up a single string, since it
240    # doesn't require character-by-character examination.  It was a little
241    # bit of a brain-bender to get it working right, though...
242    if _wordchars_re is None: _init_regex()
243
244    s = s.strip()
245    words = []
246    pos = 0
247
248    while s:
249        m = _wordchars_re.match(s, pos)
250        end = m.end()
251        if end == len(s):
252            words.append(s[:end])
253            break
254
255        if s[end] in string.whitespace: # unescaped, unquoted whitespace: now
256            words.append(s[:end])       # we definitely have a word delimiter
257            s = s[end:].lstrip()
258            pos = 0
259
260        elif s[end] == '\\':            # preserve whatever is being escaped;
261                                        # will become part of the current word
262            s = s[:end] + s[end+1:]
263            pos = end+1
264
265        else:
266            if s[end] == "'":           # slurp singly-quoted string
267                m = _squote_re.match(s, end)
268            elif s[end] == '"':         # slurp doubly-quoted string
269                m = _dquote_re.match(s, end)
270            else:
271                raise RuntimeError("this can't happen (bad char '%c')" % s[end])
272
273            if m is None:
274                raise ValueError("bad string (mismatched %s quotes?)" % s[end])
275
276            (beg, end) = m.span()
277            s = s[:beg] + s[beg+1:end-1] + s[end:]
278            pos = m.end() - 2
279
280        if pos >= len(s):
281            words.append(s)
282            break
283
284    return words
285
286# split_quoted ()
287
288
289def execute (func, args, msg=None, verbose=0, dry_run=0):
290    """Perform some action that affects the outside world (eg.  by
291    writing to the filesystem).  Such actions are special because they
292    are disabled by the 'dry_run' flag.  This method takes care of all
293    that bureaucracy for you; all you have to do is supply the
294    function to call and an argument tuple for it (to embody the
295    "external action" being performed), and an optional message to
296    print.
297    """
298    if msg is None:
299        msg = "%s%r" % (func.__name__, args)
300        if msg[-2:] == ',)':        # correct for singleton tuple
301            msg = msg[0:-2] + ')'
302
303    log.info(msg)
304    if not dry_run:
305        func(*args)
306
307
308def strtobool (val):
309    """Convert a string representation of truth to true (1) or false (0).
310
311    True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
312    are 'n', 'no', 'f', 'false', 'off', and '0'.  Raises ValueError if
313    'val' is anything else.
314    """
315    val = val.lower()
316    if val in ('y', 'yes', 't', 'true', 'on', '1'):
317        return 1
318    elif val in ('n', 'no', 'f', 'false', 'off', '0'):
319        return 0
320    else:
321        raise ValueError("invalid truth value %r" % (val,))
322
323
324def byte_compile (py_files,
325                  optimize=0, force=0,
326                  prefix=None, base_dir=None,
327                  verbose=1, dry_run=0,
328                  direct=None):
329    """Byte-compile a collection of Python source files to .pyc
330    files in a __pycache__ subdirectory.  'py_files' is a list
331    of files to compile; any files that don't end in ".py" are silently
332    skipped.  'optimize' must be one of the following:
333      0 - don't optimize
334      1 - normal optimization (like "python -O")
335      2 - extra optimization (like "python -OO")
336    If 'force' is true, all files are recompiled regardless of
337    timestamps.
338
339    The source filename encoded in each bytecode file defaults to the
340    filenames listed in 'py_files'; you can modify these with 'prefix' and
341    'basedir'.  'prefix' is a string that will be stripped off of each
342    source filename, and 'base_dir' is a directory name that will be
343    prepended (after 'prefix' is stripped).  You can supply either or both
344    (or neither) of 'prefix' and 'base_dir', as you wish.
345
346    If 'dry_run' is true, doesn't actually do anything that would
347    affect the filesystem.
348
349    Byte-compilation is either done directly in this interpreter process
350    with the standard py_compile module, or indirectly by writing a
351    temporary script and executing it.  Normally, you should let
352    'byte_compile()' figure out to use direct compilation or not (see
353    the source for details).  The 'direct' flag is used by the script
354    generated in indirect mode; unless you know what you're doing, leave
355    it set to None.
356    """
357
358    # Late import to fix a bootstrap issue: _posixsubprocess is built by
359    # setup.py, but setup.py uses distutils.
360    import subprocess
361
362    # nothing is done if sys.dont_write_bytecode is True
363    if sys.dont_write_bytecode:
364        raise DistutilsByteCompileError('byte-compiling is disabled.')
365
366    # First, if the caller didn't force us into direct or indirect mode,
367    # figure out which mode we should be in.  We take a conservative
368    # approach: choose direct mode *only* if the current interpreter is
369    # in debug mode and optimize is 0.  If we're not in debug mode (-O
370    # or -OO), we don't know which level of optimization this
371    # interpreter is running with, so we can't do direct
372    # byte-compilation and be certain that it's the right thing.  Thus,
373    # always compile indirectly if the current interpreter is in either
374    # optimize mode, or if either optimization level was requested by
375    # the caller.
376    if direct is None:
377        direct = (__debug__ and optimize == 0)
378
379    # "Indirect" byte-compilation: write a temporary script and then
380    # run it with the appropriate flags.
381    if not direct:
382        try:
383            from tempfile import mkstemp
384            (script_fd, script_name) = mkstemp(".py")
385        except ImportError:
386            from tempfile import mktemp
387            (script_fd, script_name) = None, mktemp(".py")
388        log.info("writing byte-compilation script '%s'", script_name)
389        if not dry_run:
390            if script_fd is not None:
391                script = os.fdopen(script_fd, "w")
392            else:
393                script = open(script_name, "w")
394
395            with script:
396                script.write("""\
397from distutils.util import byte_compile
398files = [
399""")
400
401                # XXX would be nice to write absolute filenames, just for
402                # safety's sake (script should be more robust in the face of
403                # chdir'ing before running it).  But this requires abspath'ing
404                # 'prefix' as well, and that breaks the hack in build_lib's
405                # 'byte_compile()' method that carefully tacks on a trailing
406                # slash (os.sep really) to make sure the prefix here is "just
407                # right".  This whole prefix business is rather delicate -- the
408                # problem is that it's really a directory, but I'm treating it
409                # as a dumb string, so trailing slashes and so forth matter.
410
411                #py_files = map(os.path.abspath, py_files)
412                #if prefix:
413                #    prefix = os.path.abspath(prefix)
414
415                script.write(",\n".join(map(repr, py_files)) + "]\n")
416                script.write("""
417byte_compile(files, optimize=%r, force=%r,
418             prefix=%r, base_dir=%r,
419             verbose=%r, dry_run=0,
420             direct=1)
421""" % (optimize, force, prefix, base_dir, verbose))
422
423        msg = distutils._DEPRECATION_MESSAGE
424        cmd = [sys.executable]
425        cmd.extend(subprocess._optim_args_from_interpreter_flags())
426        cmd.append(f'-Wignore:{msg}:DeprecationWarning')
427        cmd.append(script_name)
428        spawn(cmd, dry_run=dry_run)
429        execute(os.remove, (script_name,), "removing %s" % script_name,
430                dry_run=dry_run)
431
432    # "Direct" byte-compilation: use the py_compile module to compile
433    # right here, right now.  Note that the script generated in indirect
434    # mode simply calls 'byte_compile()' in direct mode, a weird sort of
435    # cross-process recursion.  Hey, it works!
436    else:
437        from py_compile import compile
438
439        for file in py_files:
440            if file[-3:] != ".py":
441                # This lets us be lazy and not filter filenames in
442                # the "install_lib" command.
443                continue
444
445            # Terminology from the py_compile module:
446            #   cfile - byte-compiled file
447            #   dfile - purported source filename (same as 'file' by default)
448            if optimize >= 0:
449                opt = '' if optimize == 0 else optimize
450                cfile = importlib.util.cache_from_source(
451                    file, optimization=opt)
452            else:
453                cfile = importlib.util.cache_from_source(file)
454            dfile = file
455            if prefix:
456                if file[:len(prefix)] != prefix:
457                    raise ValueError("invalid prefix: filename %r doesn't start with %r"
458                           % (file, prefix))
459                dfile = dfile[len(prefix):]
460            if base_dir:
461                dfile = os.path.join(base_dir, dfile)
462
463            cfile_base = os.path.basename(cfile)
464            if direct:
465                if force or newer(file, cfile):
466                    log.info("byte-compiling %s to %s", file, cfile_base)
467                    if not dry_run:
468                        compile(file, cfile, dfile)
469                else:
470                    log.debug("skipping byte-compilation of %s to %s",
471                              file, cfile_base)
472
473# byte_compile ()
474
475def rfc822_escape (header):
476    """Return a version of the string escaped for inclusion in an
477    RFC-822 header, by ensuring there are 8 spaces space after each newline.
478    """
479    lines = header.split('\n')
480    sep = '\n' + 8 * ' '
481    return sep.join(lines)
482
483# 2to3 support
484
485def run_2to3(files, fixer_names=None, options=None, explicit=None):
486    """Invoke 2to3 on a list of Python files.
487    The files should all come from the build area, as the
488    modification is done in-place. To reduce the build time,
489    only files modified since the last invocation of this
490    function should be passed in the files argument."""
491
492    if not files:
493        return
494
495    # Make this class local, to delay import of 2to3
496    from lib2to3.refactor import RefactoringTool, get_fixers_from_package
497    class DistutilsRefactoringTool(RefactoringTool):
498        def log_error(self, msg, *args, **kw):
499            log.error(msg, *args)
500
501        def log_message(self, msg, *args):
502            log.info(msg, *args)
503
504        def log_debug(self, msg, *args):
505            log.debug(msg, *args)
506
507    if fixer_names is None:
508        fixer_names = get_fixers_from_package('lib2to3.fixes')
509    r = DistutilsRefactoringTool(fixer_names, options=options)
510    r.refactor(files, write=True)
511
512def copydir_run_2to3(src, dest, template=None, fixer_names=None,
513                     options=None, explicit=None):
514    """Recursively copy a directory, only copying new and changed files,
515    running run_2to3 over all newly copied Python modules afterward.
516
517    If you give a template string, it's parsed like a MANIFEST.in.
518    """
519    from distutils.dir_util import mkpath
520    from distutils.file_util import copy_file
521    from distutils.filelist import FileList
522    filelist = FileList()
523    curdir = os.getcwd()
524    os.chdir(src)
525    try:
526        filelist.findall()
527    finally:
528        os.chdir(curdir)
529    filelist.files[:] = filelist.allfiles
530    if template:
531        for line in template.splitlines():
532            line = line.strip()
533            if not line: continue
534            filelist.process_template_line(line)
535    copied = []
536    for filename in filelist.files:
537        outname = os.path.join(dest, filename)
538        mkpath(os.path.dirname(outname))
539        res = copy_file(os.path.join(src, filename), outname, update=1)
540        if res[1]: copied.append(outname)
541    run_2to3([fn for fn in copied if fn.lower().endswith('.py')],
542             fixer_names=fixer_names, options=options, explicit=explicit)
543    return copied
544
545class Mixin2to3:
546    '''Mixin class for commands that run 2to3.
547    To configure 2to3, setup scripts may either change
548    the class variables, or inherit from individual commands
549    to override how 2to3 is invoked.'''
550
551    # provide list of fixers to run;
552    # defaults to all from lib2to3.fixers
553    fixer_names = None
554
555    # options dictionary
556    options = None
557
558    # list of fixers to invoke even though they are marked as explicit
559    explicit = None
560
561    def run_2to3(self, files):
562        return run_2to3(files, self.fixer_names, self.options, self.explicit)
563