• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# This is a "stage 2" bootstrap. We can assume we've running under the desired
2# interpreter, with some of the basic interpreter options/envvars set.
3# However, more setup is required to make the app's real main file runnable.
4
5import sys
6
7# By default the Python interpreter prepends the directory containing this
8# script (following symlinks) to the import path. This is the cause of #9239,
9# and is a special case of #7091.
10#
11# Python 3.11 introduced an PYTHONSAFEPATH (-P) option that disables this
12# behaviour, which we set in the stage 1 bootstrap.
13# So the prepended entry needs to be removed only if the above option is either
14# unset or not supported by the interpreter.
15# NOTE: This can be removed when Python 3.10 and below is no longer supported
16if not getattr(sys.flags, "safe_path", False):
17    del sys.path[0]
18
19import contextlib
20import os
21import re
22import runpy
23import uuid
24
25# ===== Template substitutions start =====
26# We just put them in one place so its easy to tell which are used.
27
28# Runfiles-relative path to the main Python source file.
29MAIN = "%main%"
30
31# ===== Template substitutions end =====
32
33
34# Return True if running on Windows
35def is_windows():
36    return os.name == "nt"
37
38
39def get_windows_path_with_unc_prefix(path):
40    path = path.strip()
41
42    # No need to add prefix for non-Windows platforms.
43    if not is_windows() or sys.version_info[0] < 3:
44        return path
45
46    # Starting in Windows 10, version 1607(OS build 14393), MAX_PATH limitations have been
47    # removed from common Win32 file and directory functions.
48    # Related doc: https://docs.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation?tabs=cmd#enable-long-paths-in-windows-10-version-1607-and-later
49    import platform
50
51    if platform.win32_ver()[1] >= "10.0.14393":
52        return path
53
54    # import sysconfig only now to maintain python 2.6 compatibility
55    import sysconfig
56
57    if sysconfig.get_platform() == "mingw":
58        return path
59
60    # Lets start the unicode fun
61    if path.startswith(unicode_prefix):
62        return path
63
64    # os.path.abspath returns a normalized absolute path
65    return unicode_prefix + os.path.abspath(path)
66
67
68def search_path(name):
69    """Finds a file in a given search path."""
70    search_path = os.getenv("PATH", os.defpath).split(os.pathsep)
71    for directory in search_path:
72        if directory:
73            path = os.path.join(directory, name)
74            if os.path.isfile(path) and os.access(path, os.X_OK):
75                return path
76    return None
77
78
79def is_verbose():
80    return bool(os.environ.get("RULES_PYTHON_BOOTSTRAP_VERBOSE"))
81
82
83def print_verbose(*args, mapping=None, values=None):
84    if is_verbose():
85        if mapping is not None:
86            for key, value in sorted((mapping or {}).items()):
87                print(
88                    "bootstrap: stage 2:",
89                    *args,
90                    f"{key}={value!r}",
91                    file=sys.stderr,
92                    flush=True,
93                )
94        elif values is not None:
95            for i, v in enumerate(values):
96                print(
97                    "bootstrap: stage 2:",
98                    *args,
99                    f"[{i}] {v!r}",
100                    file=sys.stderr,
101                    flush=True,
102                )
103        else:
104            print("bootstrap: stage 2:", *args, file=sys.stderr, flush=True)
105
106
107def print_verbose_coverage(*args):
108    """Print output if VERBOSE_COVERAGE is non-empty in the environment."""
109    if os.environ.get("VERBOSE_COVERAGE"):
110        print(*args, file=sys.stderr, flush=True)
111
112
113def is_verbose_coverage():
114    """Returns True if VERBOSE_COVERAGE is non-empty in the environment."""
115    return os.environ.get("VERBOSE_COVERAGE") or is_verbose()
116
117
118def find_runfiles_root(main_rel_path):
119    """Finds the runfiles tree."""
120    # When the calling process used the runfiles manifest to resolve the
121    # location of this stub script, the path may be expanded. This means
122    # argv[0] may no longer point to a location inside the runfiles
123    # directory. We should therefore respect RUNFILES_DIR and
124    # RUNFILES_MANIFEST_FILE set by the caller.
125    runfiles_dir = os.environ.get("RUNFILES_DIR", None)
126    if not runfiles_dir:
127        runfiles_manifest_file = os.environ.get("RUNFILES_MANIFEST_FILE", "")
128        if runfiles_manifest_file.endswith(
129            ".runfiles_manifest"
130        ) or runfiles_manifest_file.endswith(".runfiles/MANIFEST"):
131            runfiles_dir = runfiles_manifest_file[:-9]
132    # Be defensive: the runfiles dir should contain our main entry point. If
133    # it doesn't, then it must not be our runfiles directory.
134    if runfiles_dir and os.path.exists(os.path.join(runfiles_dir, main_rel_path)):
135        return runfiles_dir
136
137    stub_filename = sys.argv[0]
138    if not os.path.isabs(stub_filename):
139        stub_filename = os.path.join(os.getcwd(), stub_filename)
140
141    while True:
142        module_space = stub_filename + (".exe" if is_windows() else "") + ".runfiles"
143        if os.path.isdir(module_space):
144            return module_space
145
146        runfiles_pattern = r"(.*\.runfiles)" + (r"\\" if is_windows() else "/") + ".*"
147        matchobj = re.match(runfiles_pattern, stub_filename)
148        if matchobj:
149            return matchobj.group(1)
150
151        if not os.path.islink(stub_filename):
152            break
153        target = os.readlink(stub_filename)
154        if os.path.isabs(target):
155            stub_filename = target
156        else:
157            stub_filename = os.path.join(os.path.dirname(stub_filename), target)
158
159    raise AssertionError("Cannot find .runfiles directory for %s" % sys.argv[0])
160
161
162def runfiles_envvar(module_space):
163    """Finds the runfiles manifest or the runfiles directory.
164
165    Returns:
166      A tuple of (var_name, var_value) where var_name is either 'RUNFILES_DIR' or
167      'RUNFILES_MANIFEST_FILE' and var_value is the path to that directory or
168      file, or (None, None) if runfiles couldn't be found.
169    """
170    # If this binary is the data-dependency of another one, the other sets
171    # RUNFILES_MANIFEST_FILE or RUNFILES_DIR for our sake.
172    runfiles = os.environ.get("RUNFILES_MANIFEST_FILE", None)
173    if runfiles:
174        return ("RUNFILES_MANIFEST_FILE", runfiles)
175
176    runfiles = os.environ.get("RUNFILES_DIR", None)
177    if runfiles:
178        return ("RUNFILES_DIR", runfiles)
179
180    # Look for the runfiles "output" manifest, argv[0] + ".runfiles_manifest"
181    runfiles = module_space + "_manifest"
182    if os.path.exists(runfiles):
183        return ("RUNFILES_MANIFEST_FILE", runfiles)
184
185    # Look for the runfiles "input" manifest, argv[0] + ".runfiles/MANIFEST"
186    # Normally .runfiles_manifest and MANIFEST are both present, but the
187    # former will be missing for zip-based builds or if someone copies the
188    # runfiles tree elsewhere.
189    runfiles = os.path.join(module_space, "MANIFEST")
190    if os.path.exists(runfiles):
191        return ("RUNFILES_MANIFEST_FILE", runfiles)
192
193    # If running in a sandbox and no environment variables are set, then
194    # Look for the runfiles  next to the binary.
195    if module_space.endswith(".runfiles") and os.path.isdir(module_space):
196        return ("RUNFILES_DIR", module_space)
197
198    return (None, None)
199
200
201def instrumented_file_paths():
202    """Yields tuples of realpath of each instrumented file with the relative path."""
203    manifest_filename = os.environ.get("COVERAGE_MANIFEST")
204    if not manifest_filename:
205        return
206    with open(manifest_filename, "r") as manifest:
207        for line in manifest:
208            filename = line.strip()
209            if not filename:
210                continue
211            try:
212                realpath = os.path.realpath(filename)
213            except OSError:
214                print(
215                    "Could not find instrumented file {}".format(filename),
216                    file=sys.stderr,
217                    flush=True,
218                )
219                continue
220            if realpath != filename:
221                print_verbose_coverage("Fixing up {} -> {}".format(realpath, filename))
222                yield (realpath, filename)
223
224
225def unresolve_symlinks(output_filename):
226    # type: (str) -> None
227    """Replace realpath of instrumented files with the relative path in the lcov output.
228
229    Though we are asking coveragepy to use relative file names, currently
230    ignore that for purposes of generating the lcov report (and other reports
231    which are not the XML report), so we need to go and fix up the report.
232
233    This function is a workaround for that issue. Once that issue is fixed
234    upstream and the updated version is widely in use, this should be removed.
235
236    See https://github.com/nedbat/coveragepy/issues/963.
237    """
238    substitutions = list(instrumented_file_paths())
239    if substitutions:
240        unfixed_file = output_filename + ".tmp"
241        os.rename(output_filename, unfixed_file)
242        with open(unfixed_file, "r") as unfixed:
243            with open(output_filename, "w") as output_file:
244                for line in unfixed:
245                    if line.startswith("SF:"):
246                        for realpath, filename in substitutions:
247                            line = line.replace(realpath, filename)
248                    output_file.write(line)
249        os.unlink(unfixed_file)
250
251
252def _run_py(main_filename, *, args, cwd=None):
253    # type: (str, str, list[str], dict[str, str]) -> ...
254    """Executes the given Python file using the various environment settings."""
255
256    orig_argv = sys.argv
257    orig_cwd = os.getcwd()
258    try:
259        sys.argv = [main_filename] + args
260        if cwd:
261            os.chdir(cwd)
262        print_verbose("run_py: cwd:", os.getcwd())
263        print_verbose("run_py: sys.argv: ", values=sys.argv)
264        print_verbose("run_py: os.environ:", mapping=os.environ)
265        print_verbose("run_py: sys.path:", values=sys.path)
266        runpy.run_path(main_filename, run_name="__main__")
267    finally:
268        os.chdir(orig_cwd)
269        sys.argv = orig_argv
270
271
272@contextlib.contextmanager
273def _maybe_collect_coverage(enable):
274    if not enable:
275        yield
276        return
277
278    import uuid
279
280    import coverage
281
282    coverage_dir = os.environ["COVERAGE_DIR"]
283    unique_id = uuid.uuid4()
284
285    # We need for coveragepy to use relative paths.  This can only be configured
286    rcfile_name = os.path.join(coverage_dir, ".coveragerc_{}".format(unique_id))
287    with open(rcfile_name, "w") as rcfile:
288        rcfile.write(
289            """[run]
290relative_files = True
291"""
292        )
293    try:
294        cov = coverage.Coverage(
295            config_file=rcfile_name,
296            branch=True,
297            # NOTE: The messages arg controls what coverage prints to stdout/stderr,
298            # which can interfere with the Bazel coverage command. Enabling message
299            # output is only useful for debugging coverage support.
300            messages=is_verbose_coverage(),
301            omit=[
302                # Pipes can't be read back later, which can cause coverage to
303                # throw an error when trying to get its source code.
304                "/dev/fd/*",
305                # The mechanism for finding third-party packages in coverage-py
306                # only works for installed packages, not for runfiles. e.g:
307                #'$HOME/.local/lib/python3.10/site-packages',
308                # '/usr/lib/python',
309                # '/usr/lib/python3.10/site-packages',
310                # '/usr/local/lib/python3.10/dist-packages'
311                # see https://github.com/nedbat/coveragepy/blob/bfb0c708fdd8182b2a9f0fc403596693ef65e475/coverage/inorout.py#L153-L164
312                "*/external/*",
313            ],
314        )
315        cov.start()
316        try:
317            yield
318        finally:
319            cov.stop()
320            lcov_path = os.path.join(coverage_dir, "pylcov.dat")
321            cov.lcov_report(
322                outfile=lcov_path,
323                # Ignore errors because sometimes instrumented files aren't
324                # readable afterwards. e.g. if they come from /dev/fd or if
325                # they were transient code-under-test in /tmp
326                ignore_errors=True,
327            )
328            if os.path.isfile(lcov_path):
329                unresolve_symlinks(lcov_path)
330    finally:
331        try:
332            os.unlink(rcfile_name)
333        except OSError as err:
334            # It's possible that the profiled program might execute another Python
335            # binary through a wrapper that would then delete the rcfile.  Not much
336            # we can do about that, besides ignore the failure here.
337            print_verbose_coverage("Error removing temporary coverage rc file:", err)
338
339
340def main():
341    print_verbose("initial argv:", values=sys.argv)
342    print_verbose("initial cwd:", os.getcwd())
343    print_verbose("initial environ:", mapping=os.environ)
344    print_verbose("initial sys.path:", values=sys.path)
345
346    main_rel_path = MAIN
347    if is_windows():
348        main_rel_path = main_rel_path.replace("/", os.sep)
349
350    module_space = find_runfiles_root(main_rel_path)
351    print_verbose("runfiles root:", module_space)
352
353    # Recreate the "add main's dir to sys.path[0]" behavior to match the
354    # system-python bootstrap / typical Python behavior.
355    #
356    # Without safe path enabled, when `python foo/bar.py` is run, python will
357    # resolve the foo/bar.py symlink to its real path, then add the directory
358    # of that path to sys.path. But, the resolved directory for the symlink
359    # depends on if the file is generated or not.
360    #
361    # When foo/bar.py is a source file, then it's a symlink pointing
362    # back to the client source directory. This means anything from that source
363    # directory becomes importable, i.e. most code is importable.
364    #
365    # When foo/bar.py is a generated file, then it's a symlink pointing to
366    # somewhere under bazel-out/.../bin, i.e. where generated files are. This
367    # means only other generated files are importable (not source files).
368    #
369    # To replicate this behavior, we add main's directory within the runfiles
370    # when safe path isn't enabled.
371    if not getattr(sys.flags, "safe_path", False):
372        prepend_path_entries = [
373            os.path.join(module_space, os.path.dirname(main_rel_path))
374        ]
375    else:
376        prepend_path_entries = []
377
378    runfiles_envkey, runfiles_envvalue = runfiles_envvar(module_space)
379    if runfiles_envkey:
380        os.environ[runfiles_envkey] = runfiles_envvalue
381
382    main_filename = os.path.join(module_space, main_rel_path)
383    main_filename = get_windows_path_with_unc_prefix(main_filename)
384    assert os.path.exists(main_filename), (
385        "Cannot exec() %r: file not found." % main_filename
386    )
387    assert os.access(main_filename, os.R_OK), (
388        "Cannot exec() %r: file not readable." % main_filename
389    )
390
391    sys.stdout.flush()
392
393    sys.path[0:0] = prepend_path_entries
394
395    if os.environ.get("COVERAGE_DIR"):
396        import _bazel_site_init
397        coverage_enabled = _bazel_site_init.COVERAGE_SETUP
398    else:
399        coverage_enabled = False
400
401    with _maybe_collect_coverage(enable=coverage_enabled):
402        # The first arg is this bootstrap, so drop that for the re-invocation.
403        _run_py(main_filename, args=sys.argv[1:])
404        sys.exit(0)
405
406
407main()
408