1"""distutils.util 2 3Miscellaneous utility functions -- anything that doesn't fit into 4one of the other *util.py modules. 5""" 6 7import os 8import re 9import importlib.util 10import string 11import sys 12import sysconfig 13from distutils.errors import DistutilsPlatformError 14from distutils.dep_util import newer 15from distutils.spawn import spawn 16from distutils import log 17from distutils.errors import DistutilsByteCompileError 18from .py35compat import _optim_args_from_interpreter_flags 19 20 21def get_host_platform(): 22 """Return a string that identifies the current platform. This is used mainly to 23 distinguish platform-specific build directories and platform-specific built 24 distributions. 25 """ 26 27 # We initially exposed platforms as defined in Python 3.9 28 # even with older Python versions when distutils was split out. 29 # Now that we delegate to stdlib sysconfig we need to restore this 30 # in case anyone has started to depend on it. 31 32 if sys.version_info < (3, 8): 33 if os.name == 'nt': 34 if '(arm)' in sys.version.lower(): 35 return 'win-arm32' 36 if '(arm64)' in sys.version.lower(): 37 return 'win-arm64' 38 39 if sys.version_info < (3, 9): 40 if os.name == "posix" and hasattr(os, 'uname'): 41 osname, host, release, version, machine = os.uname() 42 if osname[:3] == "aix": 43 from .py38compat import aix_platform 44 return aix_platform(osname, version, release) 45 46 return sysconfig.get_platform() 47 48def get_platform(): 49 if os.name == 'nt': 50 TARGET_TO_PLAT = { 51 'x86' : 'win32', 52 'x64' : 'win-amd64', 53 'arm' : 'win-arm32', 54 'arm64': 'win-arm64', 55 } 56 return TARGET_TO_PLAT.get(os.environ.get('VSCMD_ARG_TGT_ARCH')) or get_host_platform() 57 else: 58 return get_host_platform() 59 60 61if sys.platform == 'darwin': 62 _syscfg_macosx_ver = None # cache the version pulled from sysconfig 63MACOSX_VERSION_VAR = 'MACOSX_DEPLOYMENT_TARGET' 64 65def _clear_cached_macosx_ver(): 66 """For testing only. Do not call.""" 67 global _syscfg_macosx_ver 68 _syscfg_macosx_ver = None 69 70def get_macosx_target_ver_from_syscfg(): 71 """Get the version of macOS latched in the Python interpreter configuration. 72 Returns the version as a string or None if can't obtain one. Cached.""" 73 global _syscfg_macosx_ver 74 if _syscfg_macosx_ver is None: 75 from distutils import sysconfig 76 ver = sysconfig.get_config_var(MACOSX_VERSION_VAR) or '' 77 if ver: 78 _syscfg_macosx_ver = ver 79 return _syscfg_macosx_ver 80 81def get_macosx_target_ver(): 82 """Return the version of macOS for which we are building. 83 84 The target version defaults to the version in sysconfig latched at time 85 the Python interpreter was built, unless overridden by an environment 86 variable. If neither source has a value, then None is returned""" 87 88 syscfg_ver = get_macosx_target_ver_from_syscfg() 89 env_ver = os.environ.get(MACOSX_VERSION_VAR) 90 91 if env_ver: 92 # Validate overridden version against sysconfig version, if have both. 93 # Ensure that the deployment target of the build process is not less 94 # than 10.3 if the interpreter was built for 10.3 or later. This 95 # ensures extension modules are built with correct compatibility 96 # values, specifically LDSHARED which can use 97 # '-undefined dynamic_lookup' which only works on >= 10.3. 98 if syscfg_ver and split_version(syscfg_ver) >= [10, 3] and \ 99 split_version(env_ver) < [10, 3]: 100 my_msg = ('$' + MACOSX_VERSION_VAR + ' mismatch: ' 101 'now "%s" but "%s" during configure; ' 102 'must use 10.3 or later' 103 % (env_ver, syscfg_ver)) 104 raise DistutilsPlatformError(my_msg) 105 return env_ver 106 return syscfg_ver 107 108 109def split_version(s): 110 """Convert a dot-separated string into a list of numbers for comparisons""" 111 return [int(n) for n in s.split('.')] 112 113 114def convert_path (pathname): 115 """Return 'pathname' as a name that will work on the native filesystem, 116 i.e. split it on '/' and put it back together again using the current 117 directory separator. Needed because filenames in the setup script are 118 always supplied in Unix style, and have to be converted to the local 119 convention before we can actually use them in the filesystem. Raises 120 ValueError on non-Unix-ish systems if 'pathname' either starts or 121 ends with a slash. 122 """ 123 if os.sep == '/': 124 return pathname 125 if not pathname: 126 return pathname 127 if pathname[0] == '/': 128 raise ValueError("path '%s' cannot be absolute" % pathname) 129 if pathname[-1] == '/': 130 raise ValueError("path '%s' cannot end with '/'" % pathname) 131 132 paths = pathname.split('/') 133 while '.' in paths: 134 paths.remove('.') 135 if not paths: 136 return os.curdir 137 return os.path.join(*paths) 138 139# convert_path () 140 141 142def change_root (new_root, pathname): 143 """Return 'pathname' with 'new_root' prepended. If 'pathname' is 144 relative, this is equivalent to "os.path.join(new_root,pathname)". 145 Otherwise, it requires making 'pathname' relative and then joining the 146 two, which is tricky on DOS/Windows and Mac OS. 147 """ 148 if os.name == 'posix': 149 if not os.path.isabs(pathname): 150 return os.path.join(new_root, pathname) 151 else: 152 return os.path.join(new_root, pathname[1:]) 153 154 elif os.name == 'nt': 155 (drive, path) = os.path.splitdrive(pathname) 156 if path[0] == '\\': 157 path = path[1:] 158 return os.path.join(new_root, path) 159 160 else: 161 raise DistutilsPlatformError("nothing known about platform '%s'" % os.name) 162 163 164_environ_checked = 0 165def check_environ (): 166 """Ensure that 'os.environ' has all the environment variables we 167 guarantee that users can use in config files, command-line options, 168 etc. Currently this includes: 169 HOME - user's home directory (Unix only) 170 PLAT - description of the current platform, including hardware 171 and OS (see 'get_platform()') 172 """ 173 global _environ_checked 174 if _environ_checked: 175 return 176 177 if os.name == 'posix' and 'HOME' not in os.environ: 178 try: 179 import pwd 180 os.environ['HOME'] = pwd.getpwuid(os.getuid())[5] 181 except (ImportError, KeyError): 182 # bpo-10496: if the current user identifier doesn't exist in the 183 # password database, do nothing 184 pass 185 186 if 'PLAT' not in os.environ: 187 os.environ['PLAT'] = get_platform() 188 189 _environ_checked = 1 190 191 192def subst_vars (s, local_vars): 193 """ 194 Perform variable substitution on 'string'. 195 Variables are indicated by format-style braces ("{var}"). 196 Variable is substituted by the value found in the 'local_vars' 197 dictionary or in 'os.environ' if it's not in 'local_vars'. 198 'os.environ' is first checked/augmented to guarantee that it contains 199 certain values: see 'check_environ()'. Raise ValueError for any 200 variables not found in either 'local_vars' or 'os.environ'. 201 """ 202 check_environ() 203 lookup = dict(os.environ) 204 lookup.update((name, str(value)) for name, value in local_vars.items()) 205 try: 206 return _subst_compat(s).format_map(lookup) 207 except KeyError as var: 208 raise ValueError(f"invalid variable {var}") 209 210# subst_vars () 211 212 213def _subst_compat(s): 214 """ 215 Replace shell/Perl-style variable substitution with 216 format-style. For compatibility. 217 """ 218 def _subst(match): 219 return f'{{{match.group(1)}}}' 220 repl = re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)', _subst, s) 221 if repl != s: 222 import warnings 223 warnings.warn( 224 "shell/Perl-style substitions are deprecated", 225 DeprecationWarning, 226 ) 227 return repl 228 229 230def grok_environment_error (exc, prefix="error: "): 231 # Function kept for backward compatibility. 232 # Used to try clever things with EnvironmentErrors, 233 # but nowadays str(exception) produces good messages. 234 return prefix + str(exc) 235 236 237# Needed by 'split_quoted()' 238_wordchars_re = _squote_re = _dquote_re = None 239def _init_regex(): 240 global _wordchars_re, _squote_re, _dquote_re 241 _wordchars_re = re.compile(r'[^\\\'\"%s ]*' % string.whitespace) 242 _squote_re = re.compile(r"'(?:[^'\\]|\\.)*'") 243 _dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"') 244 245def split_quoted (s): 246 """Split a string up according to Unix shell-like rules for quotes and 247 backslashes. In short: words are delimited by spaces, as long as those 248 spaces are not escaped by a backslash, or inside a quoted string. 249 Single and double quotes are equivalent, and the quote characters can 250 be backslash-escaped. The backslash is stripped from any two-character 251 escape sequence, leaving only the escaped character. The quote 252 characters are stripped from any quoted string. Returns a list of 253 words. 254 """ 255 256 # This is a nice algorithm for splitting up a single string, since it 257 # doesn't require character-by-character examination. It was a little 258 # bit of a brain-bender to get it working right, though... 259 if _wordchars_re is None: _init_regex() 260 261 s = s.strip() 262 words = [] 263 pos = 0 264 265 while s: 266 m = _wordchars_re.match(s, pos) 267 end = m.end() 268 if end == len(s): 269 words.append(s[:end]) 270 break 271 272 if s[end] in string.whitespace: # unescaped, unquoted whitespace: now 273 words.append(s[:end]) # we definitely have a word delimiter 274 s = s[end:].lstrip() 275 pos = 0 276 277 elif s[end] == '\\': # preserve whatever is being escaped; 278 # will become part of the current word 279 s = s[:end] + s[end+1:] 280 pos = end+1 281 282 else: 283 if s[end] == "'": # slurp singly-quoted string 284 m = _squote_re.match(s, end) 285 elif s[end] == '"': # slurp doubly-quoted string 286 m = _dquote_re.match(s, end) 287 else: 288 raise RuntimeError("this can't happen (bad char '%c')" % s[end]) 289 290 if m is None: 291 raise ValueError("bad string (mismatched %s quotes?)" % s[end]) 292 293 (beg, end) = m.span() 294 s = s[:beg] + s[beg+1:end-1] + s[end:] 295 pos = m.end() - 2 296 297 if pos >= len(s): 298 words.append(s) 299 break 300 301 return words 302 303# split_quoted () 304 305 306def execute (func, args, msg=None, verbose=0, dry_run=0): 307 """Perform some action that affects the outside world (eg. by 308 writing to the filesystem). Such actions are special because they 309 are disabled by the 'dry_run' flag. This method takes care of all 310 that bureaucracy for you; all you have to do is supply the 311 function to call and an argument tuple for it (to embody the 312 "external action" being performed), and an optional message to 313 print. 314 """ 315 if msg is None: 316 msg = "%s%r" % (func.__name__, args) 317 if msg[-2:] == ',)': # correct for singleton tuple 318 msg = msg[0:-2] + ')' 319 320 log.info(msg) 321 if not dry_run: 322 func(*args) 323 324 325def strtobool (val): 326 """Convert a string representation of truth to true (1) or false (0). 327 328 True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values 329 are 'n', 'no', 'f', 'false', 'off', and '0'. Raises ValueError if 330 'val' is anything else. 331 """ 332 val = val.lower() 333 if val in ('y', 'yes', 't', 'true', 'on', '1'): 334 return 1 335 elif val in ('n', 'no', 'f', 'false', 'off', '0'): 336 return 0 337 else: 338 raise ValueError("invalid truth value %r" % (val,)) 339 340 341def byte_compile (py_files, 342 optimize=0, force=0, 343 prefix=None, base_dir=None, 344 verbose=1, dry_run=0, 345 direct=None): 346 """Byte-compile a collection of Python source files to .pyc 347 files in a __pycache__ subdirectory. 'py_files' is a list 348 of files to compile; any files that don't end in ".py" are silently 349 skipped. 'optimize' must be one of the following: 350 0 - don't optimize 351 1 - normal optimization (like "python -O") 352 2 - extra optimization (like "python -OO") 353 If 'force' is true, all files are recompiled regardless of 354 timestamps. 355 356 The source filename encoded in each bytecode file defaults to the 357 filenames listed in 'py_files'; you can modify these with 'prefix' and 358 'basedir'. 'prefix' is a string that will be stripped off of each 359 source filename, and 'base_dir' is a directory name that will be 360 prepended (after 'prefix' is stripped). You can supply either or both 361 (or neither) of 'prefix' and 'base_dir', as you wish. 362 363 If 'dry_run' is true, doesn't actually do anything that would 364 affect the filesystem. 365 366 Byte-compilation is either done directly in this interpreter process 367 with the standard py_compile module, or indirectly by writing a 368 temporary script and executing it. Normally, you should let 369 'byte_compile()' figure out to use direct compilation or not (see 370 the source for details). The 'direct' flag is used by the script 371 generated in indirect mode; unless you know what you're doing, leave 372 it set to None. 373 """ 374 375 # Late import to fix a bootstrap issue: _posixsubprocess is built by 376 # setup.py, but setup.py uses distutils. 377 import subprocess 378 379 # nothing is done if sys.dont_write_bytecode is True 380 if sys.dont_write_bytecode: 381 raise DistutilsByteCompileError('byte-compiling is disabled.') 382 383 # First, if the caller didn't force us into direct or indirect mode, 384 # figure out which mode we should be in. We take a conservative 385 # approach: choose direct mode *only* if the current interpreter is 386 # in debug mode and optimize is 0. If we're not in debug mode (-O 387 # or -OO), we don't know which level of optimization this 388 # interpreter is running with, so we can't do direct 389 # byte-compilation and be certain that it's the right thing. Thus, 390 # always compile indirectly if the current interpreter is in either 391 # optimize mode, or if either optimization level was requested by 392 # the caller. 393 if direct is None: 394 direct = (__debug__ and optimize == 0) 395 396 # "Indirect" byte-compilation: write a temporary script and then 397 # run it with the appropriate flags. 398 if not direct: 399 try: 400 from tempfile import mkstemp 401 (script_fd, script_name) = mkstemp(".py") 402 except ImportError: 403 from tempfile import mktemp 404 (script_fd, script_name) = None, mktemp(".py") 405 log.info("writing byte-compilation script '%s'", script_name) 406 if not dry_run: 407 if script_fd is not None: 408 script = os.fdopen(script_fd, "w") 409 else: 410 script = open(script_name, "w") 411 412 with script: 413 script.write("""\ 414from distutils.util import byte_compile 415files = [ 416""") 417 418 # XXX would be nice to write absolute filenames, just for 419 # safety's sake (script should be more robust in the face of 420 # chdir'ing before running it). But this requires abspath'ing 421 # 'prefix' as well, and that breaks the hack in build_lib's 422 # 'byte_compile()' method that carefully tacks on a trailing 423 # slash (os.sep really) to make sure the prefix here is "just 424 # right". This whole prefix business is rather delicate -- the 425 # problem is that it's really a directory, but I'm treating it 426 # as a dumb string, so trailing slashes and so forth matter. 427 428 #py_files = map(os.path.abspath, py_files) 429 #if prefix: 430 # prefix = os.path.abspath(prefix) 431 432 script.write(",\n".join(map(repr, py_files)) + "]\n") 433 script.write(""" 434byte_compile(files, optimize=%r, force=%r, 435 prefix=%r, base_dir=%r, 436 verbose=%r, dry_run=0, 437 direct=1) 438""" % (optimize, force, prefix, base_dir, verbose)) 439 440 cmd = [sys.executable] 441 cmd.extend(_optim_args_from_interpreter_flags()) 442 cmd.append(script_name) 443 spawn(cmd, dry_run=dry_run) 444 execute(os.remove, (script_name,), "removing %s" % script_name, 445 dry_run=dry_run) 446 447 # "Direct" byte-compilation: use the py_compile module to compile 448 # right here, right now. Note that the script generated in indirect 449 # mode simply calls 'byte_compile()' in direct mode, a weird sort of 450 # cross-process recursion. Hey, it works! 451 else: 452 from py_compile import compile 453 454 for file in py_files: 455 if file[-3:] != ".py": 456 # This lets us be lazy and not filter filenames in 457 # the "install_lib" command. 458 continue 459 460 # Terminology from the py_compile module: 461 # cfile - byte-compiled file 462 # dfile - purported source filename (same as 'file' by default) 463 if optimize >= 0: 464 opt = '' if optimize == 0 else optimize 465 cfile = importlib.util.cache_from_source( 466 file, optimization=opt) 467 else: 468 cfile = importlib.util.cache_from_source(file) 469 dfile = file 470 if prefix: 471 if file[:len(prefix)] != prefix: 472 raise ValueError("invalid prefix: filename %r doesn't start with %r" 473 % (file, prefix)) 474 dfile = dfile[len(prefix):] 475 if base_dir: 476 dfile = os.path.join(base_dir, dfile) 477 478 cfile_base = os.path.basename(cfile) 479 if direct: 480 if force or newer(file, cfile): 481 log.info("byte-compiling %s to %s", file, cfile_base) 482 if not dry_run: 483 compile(file, cfile, dfile) 484 else: 485 log.debug("skipping byte-compilation of %s to %s", 486 file, cfile_base) 487 488# byte_compile () 489 490def rfc822_escape (header): 491 """Return a version of the string escaped for inclusion in an 492 RFC-822 header, by ensuring there are 8 spaces space after each newline. 493 """ 494 lines = header.split('\n') 495 sep = '\n' + 8 * ' ' 496 return sep.join(lines) 497