• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""distutils.util
2
3Miscellaneous utility functions -- anything that doesn't fit into
4one of the other *util.py modules.
5"""
6
7import os
8import re
9import importlib.util
10import string
11import sys
12from distutils.errors import DistutilsPlatformError
13from distutils.dep_util import newer
14from distutils.spawn import spawn
15from distutils import log
16from distutils.errors import DistutilsByteCompileError
17
18def get_host_platform():
19    """Return a string that identifies the current platform.  This is used mainly to
20    distinguish platform-specific build directories and platform-specific built
21    distributions.  Typically includes the OS name and version and the
22    architecture (as supplied by 'os.uname()'), although the exact information
23    included depends on the OS; eg. on Linux, the kernel version isn't
24    particularly important.
25
26    Examples of returned values:
27       linux-i586
28       linux-alpha (?)
29       solaris-2.6-sun4u
30
31    Windows will return one of:
32       win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc)
33       win32 (all others - specifically, sys.platform is returned)
34
35    For other non-POSIX platforms, currently just returns 'sys.platform'.
36
37    """
38    if os.name == 'nt':
39        if 'amd64' in sys.version.lower():
40            return 'win-amd64'
41        if '(arm)' in sys.version.lower():
42            return 'win-arm32'
43        if '(arm64)' in sys.version.lower():
44            return 'win-arm64'
45        return sys.platform
46
47    # Set for cross builds explicitly
48    if "_PYTHON_HOST_PLATFORM" in os.environ:
49        return os.environ["_PYTHON_HOST_PLATFORM"]
50
51    if os.name != "posix" or not hasattr(os, 'uname'):
52        # XXX what about the architecture? NT is Intel or Alpha,
53        # Mac OS is M68k or PPC, etc.
54        return sys.platform
55
56    # Try to distinguish various flavours of Unix
57
58    (osname, host, release, version, machine) = os.uname()
59
60    # Convert the OS name to lowercase, remove '/' characters, and translate
61    # spaces (for "Power Macintosh")
62    osname = osname.lower().replace('/', '')
63    machine = machine.replace(' ', '_')
64    machine = machine.replace('/', '-')
65
66    if osname[:5] == "linux":
67        # At least on Linux/Intel, 'machine' is the processor --
68        # i386, etc.
69        # XXX what about Alpha, SPARC, etc?
70        return  "%s-%s" % (osname, machine)
71    elif osname[:5] == "sunos":
72        if release[0] >= "5":           # SunOS 5 == Solaris 2
73            osname = "solaris"
74            release = "%d.%s" % (int(release[0]) - 3, release[2:])
75            # We can't use "platform.architecture()[0]" because a
76            # bootstrap problem. We use a dict to get an error
77            # if some suspicious happens.
78            bitness = {2147483647:"32bit", 9223372036854775807:"64bit"}
79            machine += ".%s" % bitness[sys.maxsize]
80        # fall through to standard osname-release-machine representation
81    elif osname[:3] == "aix":
82        return "%s-%s.%s" % (osname, version, release)
83    elif osname[:6] == "cygwin":
84        osname = "cygwin"
85        rel_re = re.compile (r'[\d.]+', re.ASCII)
86        m = rel_re.match(release)
87        if m:
88            release = m.group()
89    elif osname[:6] == "darwin":
90        import _osx_support, distutils.sysconfig
91        osname, release, machine = _osx_support.get_platform_osx(
92                                        distutils.sysconfig.get_config_vars(),
93                                        osname, release, machine)
94
95    return "%s-%s-%s" % (osname, release, machine)
96
97def get_platform():
98    if os.name == 'nt':
99        TARGET_TO_PLAT = {
100            'x86' : 'win32',
101            'x64' : 'win-amd64',
102            'arm' : 'win-arm32',
103        }
104        return TARGET_TO_PLAT.get(os.environ.get('VSCMD_ARG_TGT_ARCH')) or get_host_platform()
105    else:
106        return get_host_platform()
107
108def convert_path (pathname):
109    """Return 'pathname' as a name that will work on the native filesystem,
110    i.e. split it on '/' and put it back together again using the current
111    directory separator.  Needed because filenames in the setup script are
112    always supplied in Unix style, and have to be converted to the local
113    convention before we can actually use them in the filesystem.  Raises
114    ValueError on non-Unix-ish systems if 'pathname' either starts or
115    ends with a slash.
116    """
117    if os.sep == '/':
118        return pathname
119    if not pathname:
120        return pathname
121    if pathname[0] == '/':
122        raise ValueError("path '%s' cannot be absolute" % pathname)
123    if pathname[-1] == '/':
124        raise ValueError("path '%s' cannot end with '/'" % pathname)
125
126    paths = pathname.split('/')
127    while '.' in paths:
128        paths.remove('.')
129    if not paths:
130        return os.curdir
131    return os.path.join(*paths)
132
133# convert_path ()
134
135
136def change_root (new_root, pathname):
137    """Return 'pathname' with 'new_root' prepended.  If 'pathname' is
138    relative, this is equivalent to "os.path.join(new_root,pathname)".
139    Otherwise, it requires making 'pathname' relative and then joining the
140    two, which is tricky on DOS/Windows and Mac OS.
141    """
142    if os.name == 'posix':
143        if not os.path.isabs(pathname):
144            return os.path.join(new_root, pathname)
145        else:
146            return os.path.join(new_root, pathname[1:])
147
148    elif os.name == 'nt':
149        (drive, path) = os.path.splitdrive(pathname)
150        if path[0] == '\\':
151            path = path[1:]
152        return os.path.join(new_root, path)
153
154    else:
155        raise DistutilsPlatformError("nothing known about platform '%s'" % os.name)
156
157
158_environ_checked = 0
159def check_environ ():
160    """Ensure that 'os.environ' has all the environment variables we
161    guarantee that users can use in config files, command-line options,
162    etc.  Currently this includes:
163      HOME - user's home directory (Unix only)
164      PLAT - description of the current platform, including hardware
165             and OS (see 'get_platform()')
166    """
167    global _environ_checked
168    if _environ_checked:
169        return
170
171    if os.name == 'posix' and 'HOME' not in os.environ:
172        try:
173            import pwd
174            os.environ['HOME'] = pwd.getpwuid(os.getuid())[5]
175        except (ImportError, KeyError):
176            # bpo-10496: if the current user identifier doesn't exist in the
177            # password database, do nothing
178            pass
179
180    if 'PLAT' not in os.environ:
181        os.environ['PLAT'] = get_platform()
182
183    _environ_checked = 1
184
185
186def subst_vars (s, local_vars):
187    """Perform shell/Perl-style variable substitution on 'string'.  Every
188    occurrence of '$' followed by a name is considered a variable, and
189    variable is substituted by the value found in the 'local_vars'
190    dictionary, or in 'os.environ' if it's not in 'local_vars'.
191    'os.environ' is first checked/augmented to guarantee that it contains
192    certain values: see 'check_environ()'.  Raise ValueError for any
193    variables not found in either 'local_vars' or 'os.environ'.
194    """
195    check_environ()
196    def _subst (match, local_vars=local_vars):
197        var_name = match.group(1)
198        if var_name in local_vars:
199            return str(local_vars[var_name])
200        else:
201            return os.environ[var_name]
202
203    try:
204        return re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)', _subst, s)
205    except KeyError as var:
206        raise ValueError("invalid variable '$%s'" % var)
207
208# subst_vars ()
209
210
211def grok_environment_error (exc, prefix="error: "):
212    # Function kept for backward compatibility.
213    # Used to try clever things with EnvironmentErrors,
214    # but nowadays str(exception) produces good messages.
215    return prefix + str(exc)
216
217
218# Needed by 'split_quoted()'
219_wordchars_re = _squote_re = _dquote_re = None
220def _init_regex():
221    global _wordchars_re, _squote_re, _dquote_re
222    _wordchars_re = re.compile(r'[^\\\'\"%s ]*' % string.whitespace)
223    _squote_re = re.compile(r"'(?:[^'\\]|\\.)*'")
224    _dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"')
225
226def split_quoted (s):
227    """Split a string up according to Unix shell-like rules for quotes and
228    backslashes.  In short: words are delimited by spaces, as long as those
229    spaces are not escaped by a backslash, or inside a quoted string.
230    Single and double quotes are equivalent, and the quote characters can
231    be backslash-escaped.  The backslash is stripped from any two-character
232    escape sequence, leaving only the escaped character.  The quote
233    characters are stripped from any quoted string.  Returns a list of
234    words.
235    """
236
237    # This is a nice algorithm for splitting up a single string, since it
238    # doesn't require character-by-character examination.  It was a little
239    # bit of a brain-bender to get it working right, though...
240    if _wordchars_re is None: _init_regex()
241
242    s = s.strip()
243    words = []
244    pos = 0
245
246    while s:
247        m = _wordchars_re.match(s, pos)
248        end = m.end()
249        if end == len(s):
250            words.append(s[:end])
251            break
252
253        if s[end] in string.whitespace: # unescaped, unquoted whitespace: now
254            words.append(s[:end])       # we definitely have a word delimiter
255            s = s[end:].lstrip()
256            pos = 0
257
258        elif s[end] == '\\':            # preserve whatever is being escaped;
259                                        # will become part of the current word
260            s = s[:end] + s[end+1:]
261            pos = end+1
262
263        else:
264            if s[end] == "'":           # slurp singly-quoted string
265                m = _squote_re.match(s, end)
266            elif s[end] == '"':         # slurp doubly-quoted string
267                m = _dquote_re.match(s, end)
268            else:
269                raise RuntimeError("this can't happen (bad char '%c')" % s[end])
270
271            if m is None:
272                raise ValueError("bad string (mismatched %s quotes?)" % s[end])
273
274            (beg, end) = m.span()
275            s = s[:beg] + s[beg+1:end-1] + s[end:]
276            pos = m.end() - 2
277
278        if pos >= len(s):
279            words.append(s)
280            break
281
282    return words
283
284# split_quoted ()
285
286
287def execute (func, args, msg=None, verbose=0, dry_run=0):
288    """Perform some action that affects the outside world (eg.  by
289    writing to the filesystem).  Such actions are special because they
290    are disabled by the 'dry_run' flag.  This method takes care of all
291    that bureaucracy for you; all you have to do is supply the
292    function to call and an argument tuple for it (to embody the
293    "external action" being performed), and an optional message to
294    print.
295    """
296    if msg is None:
297        msg = "%s%r" % (func.__name__, args)
298        if msg[-2:] == ',)':        # correct for singleton tuple
299            msg = msg[0:-2] + ')'
300
301    log.info(msg)
302    if not dry_run:
303        func(*args)
304
305
306def strtobool (val):
307    """Convert a string representation of truth to true (1) or false (0).
308
309    True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
310    are 'n', 'no', 'f', 'false', 'off', and '0'.  Raises ValueError if
311    'val' is anything else.
312    """
313    val = val.lower()
314    if val in ('y', 'yes', 't', 'true', 'on', '1'):
315        return 1
316    elif val in ('n', 'no', 'f', 'false', 'off', '0'):
317        return 0
318    else:
319        raise ValueError("invalid truth value %r" % (val,))
320
321
322def byte_compile (py_files,
323                  optimize=0, force=0,
324                  prefix=None, base_dir=None,
325                  verbose=1, dry_run=0,
326                  direct=None):
327    """Byte-compile a collection of Python source files to .pyc
328    files in a __pycache__ subdirectory.  'py_files' is a list
329    of files to compile; any files that don't end in ".py" are silently
330    skipped.  'optimize' must be one of the following:
331      0 - don't optimize
332      1 - normal optimization (like "python -O")
333      2 - extra optimization (like "python -OO")
334    If 'force' is true, all files are recompiled regardless of
335    timestamps.
336
337    The source filename encoded in each bytecode file defaults to the
338    filenames listed in 'py_files'; you can modify these with 'prefix' and
339    'basedir'.  'prefix' is a string that will be stripped off of each
340    source filename, and 'base_dir' is a directory name that will be
341    prepended (after 'prefix' is stripped).  You can supply either or both
342    (or neither) of 'prefix' and 'base_dir', as you wish.
343
344    If 'dry_run' is true, doesn't actually do anything that would
345    affect the filesystem.
346
347    Byte-compilation is either done directly in this interpreter process
348    with the standard py_compile module, or indirectly by writing a
349    temporary script and executing it.  Normally, you should let
350    'byte_compile()' figure out to use direct compilation or not (see
351    the source for details).  The 'direct' flag is used by the script
352    generated in indirect mode; unless you know what you're doing, leave
353    it set to None.
354    """
355
356    # Late import to fix a bootstrap issue: _posixsubprocess is built by
357    # setup.py, but setup.py uses distutils.
358    import subprocess
359
360    # nothing is done if sys.dont_write_bytecode is True
361    if sys.dont_write_bytecode:
362        raise DistutilsByteCompileError('byte-compiling is disabled.')
363
364    # First, if the caller didn't force us into direct or indirect mode,
365    # figure out which mode we should be in.  We take a conservative
366    # approach: choose direct mode *only* if the current interpreter is
367    # in debug mode and optimize is 0.  If we're not in debug mode (-O
368    # or -OO), we don't know which level of optimization this
369    # interpreter is running with, so we can't do direct
370    # byte-compilation and be certain that it's the right thing.  Thus,
371    # always compile indirectly if the current interpreter is in either
372    # optimize mode, or if either optimization level was requested by
373    # the caller.
374    if direct is None:
375        direct = (__debug__ and optimize == 0)
376
377    # "Indirect" byte-compilation: write a temporary script and then
378    # run it with the appropriate flags.
379    if not direct:
380        try:
381            from tempfile import mkstemp
382            (script_fd, script_name) = mkstemp(".py")
383        except ImportError:
384            from tempfile import mktemp
385            (script_fd, script_name) = None, mktemp(".py")
386        log.info("writing byte-compilation script '%s'", script_name)
387        if not dry_run:
388            if script_fd is not None:
389                script = os.fdopen(script_fd, "w")
390            else:
391                script = open(script_name, "w")
392
393            with script:
394                script.write("""\
395from distutils.util import byte_compile
396files = [
397""")
398
399                # XXX would be nice to write absolute filenames, just for
400                # safety's sake (script should be more robust in the face of
401                # chdir'ing before running it).  But this requires abspath'ing
402                # 'prefix' as well, and that breaks the hack in build_lib's
403                # 'byte_compile()' method that carefully tacks on a trailing
404                # slash (os.sep really) to make sure the prefix here is "just
405                # right".  This whole prefix business is rather delicate -- the
406                # problem is that it's really a directory, but I'm treating it
407                # as a dumb string, so trailing slashes and so forth matter.
408
409                #py_files = map(os.path.abspath, py_files)
410                #if prefix:
411                #    prefix = os.path.abspath(prefix)
412
413                script.write(",\n".join(map(repr, py_files)) + "]\n")
414                script.write("""
415byte_compile(files, optimize=%r, force=%r,
416             prefix=%r, base_dir=%r,
417             verbose=%r, dry_run=0,
418             direct=1)
419""" % (optimize, force, prefix, base_dir, verbose))
420
421        cmd = [sys.executable]
422        cmd.extend(subprocess._optim_args_from_interpreter_flags())
423        cmd.append(script_name)
424        spawn(cmd, dry_run=dry_run)
425        execute(os.remove, (script_name,), "removing %s" % script_name,
426                dry_run=dry_run)
427
428    # "Direct" byte-compilation: use the py_compile module to compile
429    # right here, right now.  Note that the script generated in indirect
430    # mode simply calls 'byte_compile()' in direct mode, a weird sort of
431    # cross-process recursion.  Hey, it works!
432    else:
433        from py_compile import compile
434
435        for file in py_files:
436            if file[-3:] != ".py":
437                # This lets us be lazy and not filter filenames in
438                # the "install_lib" command.
439                continue
440
441            # Terminology from the py_compile module:
442            #   cfile - byte-compiled file
443            #   dfile - purported source filename (same as 'file' by default)
444            if optimize >= 0:
445                opt = '' if optimize == 0 else optimize
446                cfile = importlib.util.cache_from_source(
447                    file, optimization=opt)
448            else:
449                cfile = importlib.util.cache_from_source(file)
450            dfile = file
451            if prefix:
452                if file[:len(prefix)] != prefix:
453                    raise ValueError("invalid prefix: filename %r doesn't start with %r"
454                           % (file, prefix))
455                dfile = dfile[len(prefix):]
456            if base_dir:
457                dfile = os.path.join(base_dir, dfile)
458
459            cfile_base = os.path.basename(cfile)
460            if direct:
461                if force or newer(file, cfile):
462                    log.info("byte-compiling %s to %s", file, cfile_base)
463                    if not dry_run:
464                        compile(file, cfile, dfile)
465                else:
466                    log.debug("skipping byte-compilation of %s to %s",
467                              file, cfile_base)
468
469# byte_compile ()
470
471def rfc822_escape (header):
472    """Return a version of the string escaped for inclusion in an
473    RFC-822 header, by ensuring there are 8 spaces space after each newline.
474    """
475    lines = header.split('\n')
476    sep = '\n' + 8 * ' '
477    return sep.join(lines)
478
479# 2to3 support
480
481def run_2to3(files, fixer_names=None, options=None, explicit=None):
482    """Invoke 2to3 on a list of Python files.
483    The files should all come from the build area, as the
484    modification is done in-place. To reduce the build time,
485    only files modified since the last invocation of this
486    function should be passed in the files argument."""
487
488    if not files:
489        return
490
491    # Make this class local, to delay import of 2to3
492    from lib2to3.refactor import RefactoringTool, get_fixers_from_package
493    class DistutilsRefactoringTool(RefactoringTool):
494        def log_error(self, msg, *args, **kw):
495            log.error(msg, *args)
496
497        def log_message(self, msg, *args):
498            log.info(msg, *args)
499
500        def log_debug(self, msg, *args):
501            log.debug(msg, *args)
502
503    if fixer_names is None:
504        fixer_names = get_fixers_from_package('lib2to3.fixes')
505    r = DistutilsRefactoringTool(fixer_names, options=options)
506    r.refactor(files, write=True)
507
508def copydir_run_2to3(src, dest, template=None, fixer_names=None,
509                     options=None, explicit=None):
510    """Recursively copy a directory, only copying new and changed files,
511    running run_2to3 over all newly copied Python modules afterward.
512
513    If you give a template string, it's parsed like a MANIFEST.in.
514    """
515    from distutils.dir_util import mkpath
516    from distutils.file_util import copy_file
517    from distutils.filelist import FileList
518    filelist = FileList()
519    curdir = os.getcwd()
520    os.chdir(src)
521    try:
522        filelist.findall()
523    finally:
524        os.chdir(curdir)
525    filelist.files[:] = filelist.allfiles
526    if template:
527        for line in template.splitlines():
528            line = line.strip()
529            if not line: continue
530            filelist.process_template_line(line)
531    copied = []
532    for filename in filelist.files:
533        outname = os.path.join(dest, filename)
534        mkpath(os.path.dirname(outname))
535        res = copy_file(os.path.join(src, filename), outname, update=1)
536        if res[1]: copied.append(outname)
537    run_2to3([fn for fn in copied if fn.lower().endswith('.py')],
538             fixer_names=fixer_names, options=options, explicit=explicit)
539    return copied
540
541class Mixin2to3:
542    '''Mixin class for commands that run 2to3.
543    To configure 2to3, setup scripts may either change
544    the class variables, or inherit from individual commands
545    to override how 2to3 is invoked.'''
546
547    # provide list of fixers to run;
548    # defaults to all from lib2to3.fixers
549    fixer_names = None
550
551    # options dictionary
552    options = None
553
554    # list of fixers to invoke even though they are marked as explicit
555    explicit = None
556
557    def run_2to3(self, files):
558        return run_2to3(files, self.fixer_names, self.options, self.explicit)
559