• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#
3# Copyright 2022 The ChromiumOS Authors
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Handle most aspects of creating and benchmarking PGO profiles for Rust.
8
9This is meant to be done at Rust uprev time. Ultimately profdata files need
10to be placed at
11
12gs://chromeos-localmirror/distfiles/rust-pgo-{rust_version}-frontend.profdata{s}.tz
13and
14gs://chromeos-localmirror/distfiles/rust-pgo-{rust_version}-llvm.profdata{s}.tz
15
16Here {s} is an optional suffix to distinguish between profdata files on the same
17Rust version.
18
19The intended flow is that you first get the new Rust version in a shape so that
20it builds, for instance modifying or adding patches as necessary. Note that if
21you need to generate manifests for dev-lang/rust and dev-lang/rust-host before
22the profdata files are created, which will cause the `ebuild manifest` command
23to fail. One way to handle this is to temporarily delete the lines of the
24variable SRC_URI in cros-rustc.eclass which refer to profdata files.
25
26After you have a new working Rust version, you can run the following.
27
28```
29$ ./pgo_rust.py generate         # generate profdata files
30$ ./pgo_rust.py benchmark-pgo    # benchmark with PGO
31$ ./pgo_rust.py benchmark-nopgo  # benchmark without PGO
32$ ./pgo_rust.py upload-profdata  # upload profdata to localmirror
33```
34
35The benchmark steps aren't strictly necessary, but are recommended and will
36upload benchmark data to
37
38gs://chromeos-toolchain-artifacts/rust-pgo/benchmarks/{rust_version}/
39
40Currently by default ripgrep 13.0.0 is used as both the crate to build using an
41instrumented Rust while generating profdata, and the crate to build to
42benchmark Rust. You may wish to experiment with other crates for either role.
43In that case upload your crate to
44
45gs://chromeos-toolchain-artifacts/rust-pgo/crates/{name}-{version}.tar.xz
46
47and use `--crate-name` and `--crate-version` to indicate which crate to build
48to generate profdata (or which crate's generated profdata to use), and
49`--bench-crate-name` to indicate which crate to build in benchmarks.
50
51Notes on various local and GS locations follow.
52
53Note that currently we need to keep separate profdata files for the LLVM and
54frontend components of Rust. This is because LLVM profdata is instrumented by
55the system LLVM, but Rust's profdata is instrumented by its own LLVM, which
56may have separate profdata.
57
58profdata files accessed by ebuilds must be stored in
59
60gs://chromeos-localmirror/distfiles
61
62Specifically, they go to
63
64gs://chromeos-localmirror/distfiles/rust-pgo-{rust-version}-llvm.profdata.xz
65
66gs://chromeos-localmirror/distfiles/
67  rust-pgo-{rust-version}-frontend.profdata.xz
68
69But we can store other data elsewhere, like gs://chromeos-toolchain-artifacts.
70
71GS locations:
72
73{GS_BASE}/crates/ - store crates we may use for generating profiles or
74benchmarking PGO optimized Rust compilers
75
76{GS_BASE}/benchmarks/{rust_version}/nopgo/
77  {bench_crate_name}-{bench_crate_version}-{triple}
78
79{GS_BASE}/benchmarks/{rust_version}/{crate_name}-{crate_version}/
80  {bench_crate_name}-{bench_crate_version}-{triple}
81
82Local locations:
83
84{LOCAL_BASE}/crates/
85
86{LOCAL_BASE}/llvm-profraw/
87
88{LOCAL_BASE}/frontend-profraw/
89
90{LOCAL_BASE}/profdata/{crate_name}-{crate_version}/llvm.profdata
91
92{LOCAL_BASE}/profdata/{crate_name}-{crate_version}/frontend.profdata
93
94{LOCAL_BASE}/benchmarks/{rust_version}/nopgo/
95  {bench_crate_name}-{bench_crate_version}-{triple}
96
97{LOCAL_BASE}/benchmarks/{rust_version}/{crate_name}-{crate_version}/
98  {bench_crate_name}-{bench_crate_version}-{triple}
99
100{LOCAL_BASE}/llvm.profdata     - must go here to be used by Rust ebuild
101{LOCAL_BASE}/frontend.profdata - must go here to be used by Rust ebuild
102"""
103
104import argparse
105import contextlib
106import logging
107import os
108from pathlib import Path
109from pathlib import PurePosixPath
110import re
111import shutil
112import subprocess
113import sys
114from typing import Dict, List, Optional
115
116
117TARGET_TRIPLES = [
118    "x86_64-cros-linux-gnu",
119    "x86_64-pc-linux-gnu",
120    "armv7a-cros-linux-gnueabihf",
121    "aarch64-cros-linux-gnu",
122]
123
124LOCAL_BASE = Path("/tmp/rust-pgo")
125
126GS_BASE = PurePosixPath("/chromeos-toolchain-artifacts/rust-pgo")
127
128GS_DISTFILES = PurePosixPath("/chromeos-localmirror/distfiles")
129
130CRATE_NAME = "ripgrep"
131
132CRATE_VERSION = "13.0.0"
133
134
135@contextlib.contextmanager
136def chdir(new_directory: Path):
137    initial_directory = Path.cwd()
138    os.chdir(new_directory)
139    try:
140        yield
141    finally:
142        os.chdir(initial_directory)
143
144
145def run(
146    args: List,
147    *,
148    indent: int = 4,
149    env: Optional[Dict[str, str]] = None,
150    capture_stdout: bool = False,
151    message: bool = True,
152) -> Optional[str]:
153    args = [str(arg) for arg in args]
154
155    if env is None:
156        new_env = os.environ
157    else:
158        new_env = os.environ.copy()
159        new_env.update(env)
160
161    if message:
162        if env is None:
163            logging.info("Running %s", args)
164        else:
165            logging.info("Running %s in environment %s", args, env)
166
167    result = subprocess.run(
168        args,
169        env=new_env,
170        stdout=subprocess.PIPE,
171        stderr=subprocess.PIPE,
172        encoding="utf-8",
173        check=False,
174    )
175
176    stdout = result.stdout
177    stderr = result.stderr
178    if indent != 0:
179        stdout = re.sub("^", " " * indent, stdout, flags=re.MULTILINE)
180        stderr = re.sub("^", " " * indent, stderr, flags=re.MULTILINE)
181
182    if capture_stdout:
183        ret = result.stdout
184    else:
185        logging.info("STDOUT:")
186        logging.info(stdout)
187        logging.info("STDERR:")
188        logging.info(stderr)
189        ret = None
190
191    result.check_returncode()
192
193    if message:
194        if env is None:
195            logging.info("Ran %s\n", args)
196        else:
197            logging.info("Ran %s in environment %s\n", args, env)
198
199    return ret
200
201
202def get_rust_version() -> str:
203    s = run(["rustc", "--version"], capture_stdout=True)
204    m = re.search(r"\d+\.\d+\.\d+", s)
205    assert m is not None, repr(s)
206    return m.group(0)
207
208
209def download_unpack_crate(*, crate_name: str, crate_version: str):
210    filename_no_extension = f"{crate_name}-{crate_version}"
211    gs_path = GS_BASE / "crates" / f"{filename_no_extension}.tar.xz"
212    local_path = LOCAL_BASE / "crates"
213    shutil.rmtree(
214        local_path / f"{crate_name}-{crate_version}", ignore_errors=True
215    )
216    with chdir(local_path):
217        run(["gsutil.py", "cp", f"gs:/{gs_path}", "."])
218        run(["xz", "-d", f"{filename_no_extension}.tar.xz"])
219        run(["tar", "xvf", f"{filename_no_extension}.tar"])
220
221
222def build_crate(
223    *,
224    crate_name: str,
225    crate_version: str,
226    target_triple: str,
227    time_file: Optional[str] = None,
228):
229    local_path = LOCAL_BASE / "crates" / f"{crate_name}-{crate_version}"
230    with chdir(local_path):
231        Path(".cargo").mkdir(exist_ok=True)
232        with open(".cargo/config.toml", "w") as f:
233            f.write(
234                "\n".join(
235                    (
236                        "[source.crates-io]",
237                        'replace-with = "vendored-sources"',
238                        "",
239                        "[source.vendored-sources]",
240                        'directory = "vendor"',
241                        "",
242                        f"[target.{target_triple}]",
243                        f'linker = "{target_triple}-clang"',
244                        "",
245                        "[target.'cfg(all())']",
246                        "rustflags = [",
247                        '    "-Clto=thin",',
248                        '    "-Cembed-bitcode=yes",',
249                        "]",
250                    )
251                )
252            )
253
254        run(["cargo", "clean"])
255
256        cargo_cmd = ["cargo", "build", "--release", "--target", target_triple]
257
258        if time_file is None:
259            run(cargo_cmd)
260        else:
261            time_cmd = [
262                "/usr/bin/time",
263                f"--output={time_file}",
264                "--format=wall time (s) %e\nuser time (s) %U\nmax RSS %M\n",
265            ]
266            run(time_cmd + cargo_cmd)
267
268
269def build_rust(
270    *,
271    generate_frontend_profile: bool = False,
272    generate_llvm_profile: bool = False,
273    use_frontend_profile: bool = False,
274    use_llvm_profile: bool = False,
275):
276
277    if use_frontend_profile or use_llvm_profile:
278        assert (
279            not generate_frontend_profile and not generate_llvm_profile
280        ), "Can't build a compiler to both use profile information and generate it"
281
282    assert (
283        not generate_frontend_profile or not generate_llvm_profile
284    ), "Can't generate both frontend and LLVM profile information"
285
286    use = "-rust_profile_frontend_use -rust_profile_llvm_use "
287    if generate_frontend_profile:
288        use += "rust_profile_frontend_generate "
289    if generate_llvm_profile:
290        use += "rust_profile_llvm_generate "
291    if use_frontend_profile:
292        use += "rust_profile_frontend_use_local "
293    if use_llvm_profile:
294        use += "rust_profile_llvm_use_local "
295
296    # -E to preserve our USE environment variable.
297    run(
298        ["sudo", "-E", "emerge", "dev-lang/rust", "dev-lang/rust-host"],
299        env={"USE": use},
300    )
301
302
303def merge_profdata(llvm_or_frontend, *, source_directory: Path, dest: Path):
304    assert llvm_or_frontend in ("llvm", "frontend")
305
306    # The two `llvm-profdata` programs come from different LLVM versions, and may
307    # support different versions of the profdata format, so make sure to use the
308    # right one.
309    llvm_profdata = (
310        "/usr/bin/llvm-profdata"
311        if llvm_or_frontend == "llvm"
312        else "/usr/libexec/rust/llvm-profdata"
313    )
314
315    dest.parent.mkdir(parents=True, exist_ok=True)
316
317    files = list(source_directory.glob("*.profraw"))
318    run([llvm_profdata, "merge", f"--output={dest}"] + files)
319
320
321def do_upload_profdata(*, source: Path, dest: PurePosixPath):
322    new_path = source.parent / (source.name + ".xz")
323    run(["xz", "--keep", "--compress", "--force", source])
324    upload_file(source=new_path, dest=dest, public_read=True)
325
326
327def upload_file(
328    *, source: Path, dest: PurePosixPath, public_read: bool = False
329):
330    if public_read:
331        run(["gsutil.py", "cp", "-a", "public-read", source, f"gs:/{dest}"])
332    else:
333        run(["gsutil.py", "cp", source, f"gs:/{dest}"])
334
335
336def maybe_download_crate(*, crate_name: str, crate_version: str):
337    directory = LOCAL_BASE / "crates" / f"{crate_name}-{crate_version}"
338    if directory.is_dir():
339        logging.info("Crate already downloaded")
340    else:
341        logging.info("Downloading crate")
342        download_unpack_crate(
343            crate_name=crate_name, crate_version=crate_version
344        )
345
346
347def generate(args):
348    maybe_download_crate(
349        crate_name=args.crate_name, crate_version=args.crate_version
350    )
351
352    llvm_dir = LOCAL_BASE / "llvm-profraw"
353    shutil.rmtree(llvm_dir, ignore_errors=True)
354    frontend_dir = LOCAL_BASE / "frontend-profraw"
355    shutil.rmtree(frontend_dir, ignore_errors=True)
356
357    logging.info("Building Rust instrumented for llvm")
358    build_rust(generate_llvm_profile=True)
359
360    llvm_dir.mkdir(parents=True, exist_ok=True)
361    for triple in TARGET_TRIPLES:
362        logging.info(
363            "Building crate with LLVM instrumentation, for triple %s", triple
364        )
365        build_crate(
366            crate_name=args.crate_name,
367            crate_version=args.crate_version,
368            target_triple=triple,
369        )
370
371    logging.info("Merging LLVM profile data")
372    merge_profdata(
373        "llvm",
374        source_directory=LOCAL_BASE / "llvm-profraw",
375        dest=(
376            LOCAL_BASE
377            / "profdata"
378            / f"{args.crate_name}-{args.crate_version}"
379            / "llvm.profdata"
380        ),
381    )
382
383    logging.info("Building Rust instrumented for frontend")
384    build_rust(generate_frontend_profile=True)
385
386    frontend_dir.mkdir(parents=True, exist_ok=True)
387    for triple in TARGET_TRIPLES:
388        logging.info(
389            "Building crate with frontend instrumentation, for triple %s",
390            triple,
391        )
392        build_crate(
393            crate_name=args.crate_name,
394            crate_version=args.crate_version,
395            target_triple=triple,
396        )
397
398    logging.info("Merging frontend profile data")
399    merge_profdata(
400        "frontend",
401        source_directory=LOCAL_BASE / "frontend-profraw",
402        dest=(
403            LOCAL_BASE
404            / "profdata"
405            / f"{args.crate_name}-{args.crate_version}"
406            / "frontend.profdata"
407        ),
408    )
409
410
411def benchmark_nopgo(args):
412    logging.info("Building Rust, no PGO")
413    build_rust()
414
415    time_directory = LOCAL_BASE / "benchmarks" / "nopgo"
416    logging.info("Benchmarking crate build with no PGO")
417    time_directory.mkdir(parents=True, exist_ok=True)
418    for triple in TARGET_TRIPLES:
419        build_crate(
420            crate_name=args.bench_crate_name,
421            crate_version=args.bench_crate_version,
422            target_triple=triple,
423            time_file=(
424                time_directory
425                / f"{args.bench_crate_name}-{args.bench_crate_version}-{triple}"
426            ),
427        )
428
429    rust_version = get_rust_version()
430    dest_directory = (
431        GS_BASE / "benchmarks" / rust_version / f"nopgo{args.suffix}"
432    )
433    logging.info("Uploading benchmark data")
434    for file in time_directory.iterdir():
435        upload_file(
436            source=time_directory / file.name, dest=dest_directory / file.name
437        )
438
439
440def benchmark_pgo(args):
441    maybe_download_crate(
442        crate_name=args.bench_crate_name, crate_version=args.bench_crate_version
443    )
444
445    files_dir = Path(
446        "/mnt/host/source/src/third_party/chromiumos-overlay",
447        "dev-lang/rust/files",
448    )
449
450    logging.info("Copying profile data to be used in building Rust")
451    run(
452        [
453            "cp",
454            (
455                LOCAL_BASE
456                / "profdata"
457                / f"{args.crate_name}-{args.crate_version}"
458                / "llvm.profdata"
459            ),
460            files_dir,
461        ]
462    )
463    run(
464        [
465            "cp",
466            (
467                LOCAL_BASE
468                / "profdata"
469                / f"{args.crate_name}-{args.crate_version}"
470                / "frontend.profdata"
471            ),
472            files_dir,
473        ]
474    )
475
476    logging.info("Building Rust with PGO")
477    build_rust(use_llvm_profile=True, use_frontend_profile=True)
478
479    time_directory = (
480        LOCAL_BASE / "benchmarks" / f"{args.crate_name}-{args.crate_version}"
481    )
482    time_directory.mkdir(parents=True, exist_ok=True)
483    logging.info("Benchmarking crate built with PGO")
484    for triple in TARGET_TRIPLES:
485        build_crate(
486            crate_name=args.bench_crate_name,
487            crate_version=args.bench_crate_version,
488            target_triple=triple,
489            time_file=(
490                time_directory
491                / f"{args.bench_crate_name}-{args.bench_crate_version}-{triple}"
492            ),
493        )
494
495    rust_version = get_rust_version()
496    dest_directory = (
497        GS_BASE
498        / "benchmarks"
499        / rust_version
500        / f"{args.crate_name}-{args.crate_version}{args.suffix}"
501    )
502    logging.info("Uploading benchmark data")
503    for file in time_directory.iterdir():
504        upload_file(
505            source=time_directory / file.name, dest=dest_directory / file.name
506        )
507
508
509def upload_profdata(args):
510    directory = (
511        LOCAL_BASE / "profdata" / f"{args.crate_name}-{args.crate_version}"
512    )
513    rust_version = get_rust_version()
514
515    logging.info("Uploading LLVM profdata")
516    do_upload_profdata(
517        source=directory / "llvm.profdata",
518        dest=(
519            GS_DISTFILES
520            / f"rust-pgo-{rust_version}-llvm{args.suffix}.profdata.xz"
521        ),
522    )
523
524    logging.info("Uploading frontend profdata")
525    do_upload_profdata(
526        source=directory / "frontend.profdata",
527        dest=(
528            GS_DISTFILES
529            / f"rust-pgo-{rust_version}-frontend{args.suffix}.profdata.xz"
530        ),
531    )
532
533
534def main():
535    logging.basicConfig(
536        stream=sys.stdout, level=logging.NOTSET, format="%(message)s"
537    )
538
539    parser = argparse.ArgumentParser(
540        prog=sys.argv[0],
541        description=__doc__,
542        formatter_class=argparse.RawDescriptionHelpFormatter,
543    )
544    subparsers = parser.add_subparsers(dest="command", help="")
545    subparsers.required = True
546
547    parser_generate = subparsers.add_parser(
548        "generate",
549        help="Generate LLVM and frontend profdata files by building "
550        "instrumented Rust compilers, and using them to build the "
551        "indicated crate (downloading the crate if necessary).",
552    )
553    parser_generate.set_defaults(func=generate)
554    parser_generate.add_argument(
555        "--crate-name", default=CRATE_NAME, help="Name of the crate to build"
556    )
557    parser_generate.add_argument(
558        "--crate-version",
559        default=CRATE_VERSION,
560        help="Version of the crate to build",
561    )
562
563    parser_benchmark_nopgo = subparsers.add_parser(
564        "benchmark-nopgo",
565        help="Build the Rust compiler without PGO, benchmark "
566        "the build of the indicated crate, and upload "
567        "the benchmark data.",
568    )
569    parser_benchmark_nopgo.set_defaults(func=benchmark_nopgo)
570    parser_benchmark_nopgo.add_argument(
571        "--bench-crate-name",
572        default=CRATE_NAME,
573        help="Name of the crate whose build to benchmark",
574    )
575    parser_benchmark_nopgo.add_argument(
576        "--bench-crate-version",
577        default=CRATE_VERSION,
578        help="Version of the crate whose benchmark to build",
579    )
580    parser_benchmark_nopgo.add_argument(
581        "--suffix",
582        default="",
583        help="Suffix to distinguish benchmarks and profdata with identical rustc versions",
584    )
585
586    parser_benchmark_pgo = subparsers.add_parser(
587        "benchmark-pgo",
588        help="Build the Rust compiler using PGO with the indicated "
589        "profdata files, benchmark the build of the indicated crate, "
590        "and upload the benchmark data.",
591    )
592    parser_benchmark_pgo.set_defaults(func=benchmark_pgo)
593    parser_benchmark_pgo.add_argument(
594        "--bench-crate-name",
595        default=CRATE_NAME,
596        help="Name of the crate whose build to benchmark",
597    )
598    parser_benchmark_pgo.add_argument(
599        "--bench-crate-version",
600        default=CRATE_VERSION,
601        help="Version of the crate whose benchmark to build",
602    )
603    parser_benchmark_pgo.add_argument(
604        "--crate-name",
605        default=CRATE_NAME,
606        help="Name of the crate whose profile to use",
607    )
608    parser_benchmark_pgo.add_argument(
609        "--crate-version",
610        default=CRATE_VERSION,
611        help="Version of the crate whose profile to use",
612    )
613    parser_benchmark_pgo.add_argument(
614        "--suffix",
615        default="",
616        help="Suffix to distinguish benchmarks and profdata with identical rustc versions",
617    )
618
619    parser_upload_profdata = subparsers.add_parser(
620        "upload-profdata", help="Upload the profdata files"
621    )
622    parser_upload_profdata.set_defaults(func=upload_profdata)
623    parser_upload_profdata.add_argument(
624        "--crate-name",
625        default=CRATE_NAME,
626        help="Name of the crate whose profile to use",
627    )
628    parser_upload_profdata.add_argument(
629        "--crate-version",
630        default=CRATE_VERSION,
631        help="Version of the crate whose profile to use",
632    )
633    parser_upload_profdata.add_argument(
634        "--suffix",
635        default="",
636        help="Suffix to distinguish benchmarks and profdata with identical rustc versions",
637    )
638
639    args = parser.parse_args()
640
641    (LOCAL_BASE / "crates").mkdir(parents=True, exist_ok=True)
642    (LOCAL_BASE / "llvm-profraw").mkdir(parents=True, exist_ok=True)
643    (LOCAL_BASE / "frontend-profraw").mkdir(parents=True, exist_ok=True)
644    (LOCAL_BASE / "benchmarks").mkdir(parents=True, exist_ok=True)
645
646    args.func(args)
647
648    return 0
649
650
651if __name__ == "__main__":
652    sys.exit(main())
653