• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# Copyright 2020 The gRPC Authors
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#     http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16# Script to extract build metadata from bazel BUILD.
17# To avoid having two sources of truth for the build metadata (build
18# targets, source files, header files etc.), this script analyzes the contents
19# of bazel BUILD files and generates a YAML file (currently called
20# build_autogenerated.yaml). The format and semantics of the generated YAML files
21# is chosen to match the format of a "build.yaml" file, which used
22# to be build the source of truth for gRPC build before bazel became
23# the primary build system.
24# A good basic overview of the "build.yaml" format is available here:
25# https://github.com/grpc/grpc/blob/master/templates/README.md. Note that
26# while useful as an overview, the doc does not act as formal spec
27# (formal spec does not exist in fact) and the doc can be incomplete,
28# inaccurate or slightly out of date.
29# TODO(jtattermusch): In the future we want to get rid of the legacy build.yaml
30# format entirely or simplify it to a point where it becomes self-explanatory
31# and doesn't need any detailed documentation.
32
33import collections
34import os
35import subprocess
36from typing import Any, Dict, Iterable, List, Optional
37import xml.etree.ElementTree as ET
38
39import build_cleaner
40
41BuildMetadata = Dict[str, Any]
42BuildDict = Dict[str, BuildMetadata]
43BuildYaml = Dict[str, Any]
44
45BuildMetadata = Dict[str, Any]
46BuildDict = Dict[str, BuildMetadata]
47BuildYaml = Dict[str, Any]
48
49
50class ExternalProtoLibrary:
51    """ExternalProtoLibrary is the struct about an external proto library.
52
53    Fields:
54    - destination(int): The relative path of this proto library should be.
55        Preferably, it should match the submodule path.
56    - proto_prefix(str): The prefix to remove in order to insure the proto import
57        is correct. For more info, see description of
58        https://github.com/grpc/grpc/pull/25272.
59    - urls(List[str]): Following 3 fields should be filled by build metadata from
60        Bazel.
61    - hash(str): The hash of the downloaded archive
62    - strip_prefix(str): The path to be stripped from the extracted directory, see
63        http_archive in Bazel.
64    """
65
66    def __init__(
67        self, destination, proto_prefix, urls=None, hash="", strip_prefix=""
68    ):
69        self.destination = destination
70        self.proto_prefix = proto_prefix
71        if urls is None:
72            self.urls = []
73        else:
74            self.urls = urls
75        self.hash = hash
76        self.strip_prefix = strip_prefix
77
78
79EXTERNAL_PROTO_LIBRARIES = {
80    "envoy_api": ExternalProtoLibrary(
81        destination="third_party/envoy-api",
82        proto_prefix="third_party/envoy-api/",
83    ),
84    "com_google_googleapis": ExternalProtoLibrary(
85        destination="third_party/googleapis",
86        proto_prefix="third_party/googleapis/",
87    ),
88    "com_github_cncf_xds": ExternalProtoLibrary(
89        destination="third_party/xds", proto_prefix="third_party/xds/"
90    ),
91    "com_envoyproxy_protoc_gen_validate": ExternalProtoLibrary(
92        destination="third_party/protoc-gen-validate",
93        proto_prefix="third_party/protoc-gen-validate/",
94    ),
95    "opencensus_proto": ExternalProtoLibrary(
96        destination="third_party/opencensus-proto/src",
97        proto_prefix="third_party/opencensus-proto/src/",
98    ),
99}
100
101# We want to get a list of source files for some external libraries
102# to be able to include them in a non-bazel (e.g. make/cmake) build.
103# For that we need mapping from external repo name to a corresponding
104# path to a git submodule.
105EXTERNAL_SOURCE_PREFIXES = {
106    # TODO(veblush): Remove @utf8_range// item once protobuf is upgraded to 26.x
107    "@utf8_range//": "third_party/utf8_range",
108    "@com_googlesource_code_re2//": "third_party/re2",
109    "@com_google_googletest//": "third_party/googletest",
110    "@com_google_protobuf//upb": "third_party/upb/upb",
111    "@com_google_protobuf//third_party/utf8_range": "third_party/utf8_range",
112    "@zlib//": "third_party/zlib",
113}
114
115
116def _bazel_query_xml_tree(query: str) -> ET.Element:
117    """Get xml output of bazel query invocation, parsed as XML tree"""
118    output = subprocess.check_output(
119        ["tools/bazel", "query", "--noimplicit_deps", "--output", "xml", query]
120    )
121    return ET.fromstring(output)
122
123
124def _rule_dict_from_xml_node(rule_xml_node):
125    """Converts XML node representing a rule (obtained from "bazel query --output xml") to a dictionary that contains all the metadata we will need."""
126    result = {
127        "class": rule_xml_node.attrib.get("class"),
128        "name": rule_xml_node.attrib.get("name"),
129        "srcs": [],
130        "hdrs": [],
131        "textual_hdrs": [],
132        "deps": [],
133        "data": [],
134        "tags": [],
135        "args": [],
136        "generator_function": None,
137        "size": None,
138        "flaky": False,
139        "actual": None,  # the real target name for aliases
140    }
141    for child in rule_xml_node:
142        # all the metadata we want is stored under "list" tags
143        if child.tag == "list":
144            list_name = child.attrib["name"]
145            if list_name in [
146                "srcs",
147                "hdrs",
148                "textual_hdrs",
149                "deps",
150                "data",
151                "tags",
152                "args",
153            ]:
154                result[list_name] += [item.attrib["value"] for item in child]
155        if child.tag == "string":
156            string_name = child.attrib["name"]
157            if string_name in ["generator_function", "size"]:
158                result[string_name] = child.attrib["value"]
159        if child.tag == "boolean":
160            bool_name = child.attrib["name"]
161            if bool_name in ["flaky"]:
162                result[bool_name] = child.attrib["value"] == "true"
163        if child.tag == "label":
164            # extract actual name for alias and bind rules
165            label_name = child.attrib["name"]
166            if label_name in ["actual"]:
167                actual_name = child.attrib.get("value", None)
168                if actual_name:
169                    result["actual"] = actual_name
170                    # HACK: since we do a lot of transitive dependency scanning,
171                    # make it seem that the actual name is a dependency of the alias or bind rule
172                    # (aliases don't have dependencies themselves)
173                    result["deps"].append(actual_name)
174    return result
175
176
177def _extract_rules_from_bazel_xml(xml_tree):
178    """Extract bazel rules from an XML tree node obtained from "bazel query --output xml" command."""
179    result = {}
180    for child in xml_tree:
181        if child.tag == "rule":
182            rule_dict = _rule_dict_from_xml_node(child)
183            rule_clazz = rule_dict["class"]
184            rule_name = rule_dict["name"]
185            if rule_clazz in [
186                "cc_library",
187                "cc_binary",
188                "cc_test",
189                "cc_proto_library",
190                "cc_proto_gen_validate",
191                "proto_library",
192                "upb_c_proto_library",
193                "upb_proto_reflection_library",
194                "alias",
195                "bind",
196            ]:
197                if rule_name in result:
198                    raise Exception("Rule %s already present" % rule_name)
199                result[rule_name] = rule_dict
200    return result
201
202
203def _get_bazel_label(target_name: str) -> str:
204    if target_name.startswith("@"):
205        return target_name
206    if ":" in target_name:
207        return "//%s" % target_name
208    else:
209        return "//:%s" % target_name
210
211
212def _try_extract_source_file_path(label: str) -> str:
213    """Gets relative path to source file from bazel deps listing"""
214    if label.startswith("@"):
215        # This is an external source file. We are only interested in sources
216        # for some of the external libraries.
217        for lib_name, prefix in EXTERNAL_SOURCE_PREFIXES.items():
218            if label.startswith(lib_name):
219                return (
220                    label.replace("%s" % lib_name, prefix)
221                    .replace(":", "/")
222                    .replace("//", "/")
223                )
224
225        # This source file is external, and we need to translate the
226        # @REPO_NAME to a valid path prefix. At this stage, we need
227        # to check repo name, since the label/path mapping is not
228        # available in BUILD files.
229        for lib_name, external_proto_lib in EXTERNAL_PROTO_LIBRARIES.items():
230            if label.startswith("@" + lib_name + "//"):
231                return label.replace(
232                    "@%s//" % lib_name,
233                    external_proto_lib.proto_prefix,
234                ).replace(":", "/")
235
236        # No external library match found
237        return None
238    else:
239        if label.startswith("//"):
240            label = label[len("//") :]
241        # labels in form //:src/core/lib/surface/call_test_only.h
242        if label.startswith(":"):
243            label = label[len(":") :]
244        # labels in form //test/core/test_util:port.cc
245        return label.replace(":", "/")
246
247
248def _has_header_suffix(label: str) -> bool:
249    """Returns True if the label has a suffix that looks like a C/C++ include file"""
250    return (
251        label.endswith(".h")
252        or label.endswith(".h")
253        or label.endswith(".hpp")
254        or label.endswith(".inc")
255    )
256
257
258def _extract_public_headers(bazel_rule: BuildMetadata) -> List[str]:
259    """Gets list of public headers from a bazel rule"""
260    result = []
261    for dep in bazel_rule["hdrs"]:
262        if dep.startswith("//:include/") and _has_header_suffix(dep):
263            source_file_maybe = _try_extract_source_file_path(dep)
264            if source_file_maybe:
265                result.append(source_file_maybe)
266    return list(sorted(result))
267
268
269def _extract_nonpublic_headers(bazel_rule: BuildMetadata) -> List[str]:
270    """Gets list of non-public headers from a bazel rule"""
271    result = []
272    for dep in list(
273        bazel_rule["hdrs"] + bazel_rule["textual_hdrs"] + bazel_rule["srcs"]
274    ):
275        if not dep.startswith("//:include/") and _has_header_suffix(dep):
276            source_file_maybe = _try_extract_source_file_path(dep)
277            if source_file_maybe:
278                result.append(source_file_maybe)
279    return list(sorted(result))
280
281
282def _extract_sources(bazel_rule: BuildMetadata) -> List[str]:
283    """Gets list of source files from a bazel rule"""
284    result = []
285    for src in bazel_rule["srcs"]:
286        # Skip .proto files from the protobuf repo
287        if src.startswith("@com_google_protobuf//") and src.endswith(".proto"):
288            continue
289        if src.endswith(".cc") or src.endswith(".c") or src.endswith(".proto"):
290            source_file_maybe = _try_extract_source_file_path(src)
291            if source_file_maybe:
292                result.append(source_file_maybe)
293    return list(sorted(result))
294
295
296def _extract_deps(
297    bazel_rule: BuildMetadata, bazel_rules: BuildDict
298) -> List[str]:
299    """Gets list of deps from from a bazel rule"""
300    deps = set(bazel_rule["deps"])
301    for src in bazel_rule["srcs"]:
302        if (
303            not src.endswith(".cc")
304            and not src.endswith(".c")
305            and not src.endswith(".proto")
306        ):
307            if src in bazel_rules:
308                # This label doesn't point to a source file, but another Bazel
309                # target. This is required for :pkg_cc_proto_validate targets,
310                # and it's generally allowed by Bazel.
311                deps.add(src)
312    return list(sorted(list(deps)))
313
314
315def _create_target_from_bazel_rule(
316    target_name: str, bazel_rules: BuildDict
317) -> BuildMetadata:
318    """Create build.yaml-like target definition from bazel metadata"""
319    bazel_rule = bazel_rules[_get_bazel_label(target_name)]
320
321    # Create a template for our target from the bazel rule. Initially we only
322    # populate some "private" fields with the original info we got from bazel
323    # and only later we will populate the public fields (once we do some extra
324    # postprocessing).
325    result = {
326        "name": target_name,
327        "_PUBLIC_HEADERS_BAZEL": _extract_public_headers(bazel_rule),
328        "_HEADERS_BAZEL": _extract_nonpublic_headers(bazel_rule),
329        "_SRC_BAZEL": _extract_sources(bazel_rule),
330        "_DEPS_BAZEL": _extract_deps(bazel_rule, bazel_rules),
331        "public_headers": bazel_rule["_COLLAPSED_PUBLIC_HEADERS"],
332        "headers": bazel_rule["_COLLAPSED_HEADERS"],
333        "src": bazel_rule["_COLLAPSED_SRCS"],
334        "deps": bazel_rule["_COLLAPSED_DEPS"],
335    }
336    return result
337
338
339def _external_dep_name_from_bazel_dependency(bazel_dep: str) -> Optional[str]:
340    """Returns name of dependency if external bazel dependency is provided or None"""
341    if bazel_dep.startswith("@com_google_absl//"):
342        # special case for add dependency on one of the absl libraries (there is not just one absl library)
343        prefixlen = len("@com_google_absl//")
344        return bazel_dep[prefixlen:]
345    elif bazel_dep == "@com_github_google_benchmark//:benchmark":
346        return "benchmark"
347    elif bazel_dep == "@boringssl//:ssl":
348        return "libssl"
349    elif bazel_dep == "@com_github_cares_cares//:ares":
350        return "cares"
351    elif (
352        bazel_dep == "@com_google_protobuf//:protobuf"
353        or bazel_dep == "@com_google_protobuf//:protobuf_headers"
354    ):
355        return "protobuf"
356    elif bazel_dep == "@com_google_protobuf//:protoc_lib":
357        return "protoc"
358    elif bazel_dep == "@io_opentelemetry_cpp//api:api":
359        return "opentelemetry-cpp::api"
360    elif bazel_dep == "@io_opentelemetry_cpp//sdk/src/metrics:metrics":
361        return "opentelemetry-cpp::metrics"
362    else:
363        # Two options here:
364        # * either this is not external dependency at all (which is fine, we will treat it as internal library)
365        # * this is external dependency, but we don't want to make the dependency explicit in the build metadata
366        #   for other build systems.
367        return None
368
369
370def _compute_transitive_metadata(
371    rule_name: str, bazel_rules: Any, bazel_label_to_dep_name: Dict[str, str]
372) -> None:
373    """Computes the final build metadata for Bazel target with rule_name.
374
375    The dependencies that will appear on the deps list are:
376
377    * Public build targets including binaries and tests;
378    * External targets, like absl, re2.
379
380    All other intermediate dependencies will be merged, which means their
381    source file, headers, etc. will be collected into one build target. This
382    step of processing will greatly reduce the complexity of the generated
383    build specifications for other build systems, like CMake, Make, setuptools.
384
385    The final build metadata are:
386    * _TRANSITIVE_DEPS: all the transitive dependencies including intermediate
387                        targets;
388    * _COLLAPSED_DEPS:  dependencies that fits our requirement above, and it
389                        will remove duplicated items and produce the shortest
390                        possible dependency list in alphabetical order;
391    * _COLLAPSED_SRCS:  the merged source files;
392    * _COLLAPSED_PUBLIC_HEADERS: the merged public headers;
393    * _COLLAPSED_HEADERS: the merged non-public headers;
394    * _EXCLUDE_DEPS: intermediate targets to exclude when performing collapsing
395      of sources and dependencies.
396
397    For the collapsed_deps, the algorithm improved cases like:
398
399    The result in the past:
400        end2end_tests -> [grpc_test_util, grpc, gpr, address_sorting, upb]
401        grpc_test_util -> [grpc, gpr, address_sorting, upb, ...]
402        grpc -> [gpr, address_sorting, upb, ...]
403
404    The result of the algorithm:
405        end2end_tests -> [grpc_test_util]
406        grpc_test_util -> [grpc]
407        grpc -> [gpr, address_sorting, upb, ...]
408    """
409    bazel_rule = bazel_rules[rule_name]
410    direct_deps = _extract_deps(bazel_rule, bazel_rules)
411    transitive_deps = set()
412    collapsed_deps = set()
413    exclude_deps = set()
414    collapsed_srcs = set(_extract_sources(bazel_rule))
415    collapsed_public_headers = set(_extract_public_headers(bazel_rule))
416    collapsed_headers = set(_extract_nonpublic_headers(bazel_rule))
417
418    for dep in direct_deps:
419        external_dep_name_maybe = _external_dep_name_from_bazel_dependency(dep)
420
421        if dep in bazel_rules:
422            # Descend recursively, but no need to do that for external deps
423            if external_dep_name_maybe is None:
424                if "_PROCESSING_DONE" not in bazel_rules[dep]:
425                    # This item is not processed before, compute now
426                    _compute_transitive_metadata(
427                        dep, bazel_rules, bazel_label_to_dep_name
428                    )
429                transitive_deps.update(
430                    bazel_rules[dep].get("_TRANSITIVE_DEPS", [])
431                )
432                collapsed_deps.update(
433                    collapsed_deps, bazel_rules[dep].get("_COLLAPSED_DEPS", [])
434                )
435                exclude_deps.update(bazel_rules[dep].get("_EXCLUDE_DEPS", []))
436
437        # This dep is a public target, add it as a dependency
438        if dep in bazel_label_to_dep_name:
439            transitive_deps.update([bazel_label_to_dep_name[dep]])
440            collapsed_deps.update(
441                collapsed_deps, [bazel_label_to_dep_name[dep]]
442            )
443            # Add all the transitive deps of our every public dep to exclude
444            # list since we want to avoid building sources that are already
445            # built by our dependencies
446            exclude_deps.update(bazel_rules[dep]["_TRANSITIVE_DEPS"])
447            continue
448
449        # This dep is an external target, add it as a dependency
450        if external_dep_name_maybe is not None:
451            transitive_deps.update([external_dep_name_maybe])
452            collapsed_deps.update(collapsed_deps, [external_dep_name_maybe])
453            continue
454
455    # Direct dependencies are part of transitive dependencies
456    transitive_deps.update(direct_deps)
457
458    # Calculate transitive public deps (needed for collapsing sources)
459    transitive_public_deps = set(
460        [x for x in transitive_deps if x in bazel_label_to_dep_name]
461    )
462
463    # Remove intermediate targets that our public dependencies already depend
464    # on. This is the step that further shorten the deps list.
465    collapsed_deps = set([x for x in collapsed_deps if x not in exclude_deps])
466
467    # Compute the final source files and headers for this build target whose
468    # name is `rule_name` (input argument of this function).
469    #
470    # Imaging a public target PX has transitive deps [IA, IB, PY, IC, PZ]. PX,
471    # PY and PZ are public build targets. And IA, IB, IC are intermediate
472    # targets. In addition, PY depends on IC.
473    #
474    # Translate the condition into dependency graph:
475    #   PX -> [IA, IB, PY, IC, PZ]
476    #   PY -> [IC]
477    #   Public targets: [PX, PY, PZ]
478    #
479    # The collapsed dependencies of PX: [PY, PZ].
480    # The excluded dependencies of X: [PY, IC, PZ].
481    # (IC is excluded as a dependency of PX. It is already included in PY, hence
482    # it would be redundant to include it again.)
483    #
484    # Target PX should include source files and headers of [PX, IA, IB] as final
485    # build metadata.
486    for dep in transitive_deps:
487        if dep not in exclude_deps and dep not in transitive_public_deps:
488            if dep in bazel_rules:
489                collapsed_srcs.update(_extract_sources(bazel_rules[dep]))
490                collapsed_public_headers.update(
491                    _extract_public_headers(bazel_rules[dep])
492                )
493                collapsed_headers.update(
494                    _extract_nonpublic_headers(bazel_rules[dep])
495                )
496    # This item is a "visited" flag
497    bazel_rule["_PROCESSING_DONE"] = True
498    # Following items are described in the docstinrg.
499    bazel_rule["_TRANSITIVE_DEPS"] = list(sorted(transitive_deps))
500    bazel_rule["_COLLAPSED_DEPS"] = list(sorted(collapsed_deps))
501    bazel_rule["_COLLAPSED_SRCS"] = list(sorted(collapsed_srcs))
502    bazel_rule["_COLLAPSED_PUBLIC_HEADERS"] = list(
503        sorted(collapsed_public_headers)
504    )
505    bazel_rule["_COLLAPSED_HEADERS"] = list(sorted(collapsed_headers))
506    bazel_rule["_EXCLUDE_DEPS"] = list(sorted(exclude_deps))
507
508
509# TODO(jtattermusch): deduplicate with transitive_dependencies.py (which has a
510# slightly different logic)
511# TODO(jtattermusch): This is done to avoid introducing too many intermediate
512# libraries into the build.yaml-based builds (which might in cause issues
513# building language-specific artifacts) and also because the libraries in
514# build.yaml-based build are generally considered units of distributions (=
515# public libraries that are visible to the user and are installable), while in
516# bazel builds it is customary to define larger number of smaller
517# "sublibraries". The need for elision (and expansion) of intermediate libraries
518# can be re-evaluated in the future.
519def _populate_transitive_metadata(
520    bazel_rules: Any, public_dep_names: Iterable[str]
521) -> None:
522    """Add 'transitive_deps' field for each of the rules"""
523    # Create the map between Bazel label and public dependency name
524    bazel_label_to_dep_name = {}
525    for dep_name in public_dep_names:
526        bazel_label_to_dep_name[_get_bazel_label(dep_name)] = dep_name
527
528    # Make sure we reached all the Bazel rules
529    # TODO(lidiz) potentially we could only update a subset of rules
530    for rule_name in bazel_rules:
531        if "_PROCESSING_DONE" not in bazel_rules[rule_name]:
532            _compute_transitive_metadata(
533                rule_name, bazel_rules, bazel_label_to_dep_name
534            )
535
536
537def update_test_metadata_with_transitive_metadata(
538    all_extra_metadata: BuildDict, bazel_rules: BuildDict
539) -> None:
540    """Patches test build metadata with transitive metadata."""
541    for lib_name, lib_dict in list(all_extra_metadata.items()):
542        # Skip if it isn't not an test
543        if (
544            lib_dict.get("build") != "test"
545            and lib_dict.get("build") != "plugin_test"
546        ) or lib_dict.get("_TYPE") != "target":
547            continue
548
549        bazel_rule = bazel_rules[_get_bazel_label(lib_name)]
550
551        if "//third_party:benchmark" in bazel_rule["_TRANSITIVE_DEPS"]:
552            lib_dict["benchmark"] = True
553            lib_dict["defaults"] = "benchmark"
554
555        if "//third_party:gtest" in bazel_rule["_TRANSITIVE_DEPS"]:
556            # run_tests.py checks the "gtest" property to see if test should be run via gtest.
557            lib_dict["gtest"] = True
558            # TODO: this might be incorrect categorization of the test...
559            lib_dict["language"] = "c++"
560
561
562def _get_transitive_protos(bazel_rules, t):
563    que = [
564        t,
565    ]
566    visited = set()
567    ret = []
568    while que:
569        name = que.pop(0)
570        rule = bazel_rules.get(name, None)
571        if rule:
572            for dep in rule["deps"]:
573                if dep not in visited:
574                    visited.add(dep)
575                    que.append(dep)
576            for src in rule["srcs"]:
577                if src.endswith(".proto"):
578                    ret.append(src)
579    return list(set(ret))
580
581
582def _expand_upb_proto_library_rules(bazel_rules):
583    # Expand the .proto files from UPB proto library rules into the pre-generated
584    # upb files.
585    GEN_UPB_ROOT = "//:src/core/ext/upb-gen/"
586    GEN_UPBDEFS_ROOT = "//:src/core/ext/upbdefs-gen/"
587    EXTERNAL_LINKS = [
588        ("@com_google_protobuf//", "src/"),
589        ("@com_google_googleapis//", ""),
590        ("@com_github_cncf_xds//", ""),
591        ("@com_envoyproxy_protoc_gen_validate//", ""),
592        ("@envoy_api//", ""),
593        ("@opencensus_proto//", ""),
594    ]
595    for name, bazel_rule in bazel_rules.items():
596        gen_func = bazel_rule.get("generator_function", None)
597        if gen_func in (
598            "grpc_upb_proto_library",
599            "grpc_upb_proto_reflection_library",
600        ):
601            # get proto dependency
602            deps = bazel_rule["deps"]
603            if len(deps) != 1:
604                raise Exception(
605                    'upb rule "{0}" should have 1 proto dependency but has'
606                    ' "{1}"'.format(name, deps)
607                )
608            # deps is not properly fetched from bazel query for upb_c_proto_library target
609            # so add the upb dependency manually
610            bazel_rule["deps"] = [
611                "@com_google_protobuf//upb:descriptor_upb_proto",
612                "@com_google_protobuf//upb:generated_code_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me",
613            ]
614            # populate the upb_c_proto_library rule with pre-generated upb headers
615            # and sources using proto_rule
616            protos = _get_transitive_protos(bazel_rules, deps[0])
617            if len(protos) == 0:
618                raise Exception(
619                    'upb rule "{0}" should have at least one proto file.'.format(
620                        name
621                    )
622                )
623            srcs = []
624            hdrs = []
625            for proto_src in protos:
626                for external_link in EXTERNAL_LINKS:
627                    if proto_src.startswith(external_link[0]):
628                        prefix_to_strip = external_link[0] + external_link[1]
629                        if not proto_src.startswith(prefix_to_strip):
630                            raise Exception(
631                                'Source file "{0}" in upb rule {1} does not'
632                                ' have the expected prefix "{2}"'.format(
633                                    proto_src, name, prefix_to_strip
634                                )
635                            )
636                        proto_src = proto_src[len(prefix_to_strip) :]
637                        break
638                if proto_src.startswith("@"):
639                    raise Exception('"{0}" is unknown workspace.'.format(name))
640                proto_src_file = _try_extract_source_file_path(proto_src)
641                if not proto_src_file:
642                    raise Exception(
643                        'Failed to get source file for "{0}" in upb rule "{1}".'.format(
644                            proto_src, name
645                        )
646                    )
647
648                extensions = (
649                    # There is no longer a .upb.c extension.
650                    [".upb.h", ".upb_minitable.h", ".upb_minitable.c"]
651                    if gen_func == "grpc_upb_proto_library"
652                    else [".upbdefs.h", ".upbdefs.c"]
653                )
654                root = (
655                    GEN_UPB_ROOT
656                    if gen_func == "grpc_upb_proto_library"
657                    else GEN_UPBDEFS_ROOT
658                )
659                for ext in extensions:
660                    srcs.append(root + proto_src_file.replace(".proto", ext))
661                    hdrs.append(root + proto_src_file.replace(".proto", ext))
662            bazel_rule["srcs"] = srcs
663            bazel_rule["hdrs"] = hdrs
664
665
666def _patch_grpc_proto_library_rules(bazel_rules):
667    for name, bazel_rule in bazel_rules.items():
668        generator_func = bazel_rule.get("generator_function", None)
669        if name.startswith("//") and (
670            generator_func == "grpc_proto_library"
671            or bazel_rule["class"] == "cc_proto_library"
672        ):
673            # Add explicit protobuf dependency for internal c++ proto targets.
674            bazel_rule["deps"].append("//third_party:protobuf")
675
676
677def _patch_descriptor_upb_proto_library(bazel_rules):
678    # The upb's descriptor_upb_proto library doesn't reference the generated descriptor.proto
679    # sources explicitly, so we add them manually.
680    bazel_rule = bazel_rules.get(
681        "@com_google_protobuf//upb:descriptor_upb_proto", None
682    )
683    if bazel_rule:
684        bazel_rule["srcs"].append(
685            ":src/core/ext/upb-gen/google/protobuf/descriptor.upb_minitable.c"
686        )
687        bazel_rule["hdrs"].append(
688            ":src/core/ext/upb-gen/google/protobuf/descriptor.upb.h"
689        )
690
691
692def _generate_build_metadata(
693    build_extra_metadata: BuildDict, bazel_rules: BuildDict
694) -> BuildDict:
695    """Generate build metadata in build.yaml-like format bazel build metadata and build.yaml-specific "extra metadata"."""
696    lib_names = list(build_extra_metadata.keys())
697    result = {}
698
699    for lib_name in lib_names:
700        lib_dict = _create_target_from_bazel_rule(lib_name, bazel_rules)
701
702        # populate extra properties from the build.yaml-specific "extra metadata"
703        lib_dict.update(build_extra_metadata.get(lib_name, {}))
704
705        # store to results
706        result[lib_name] = lib_dict
707
708    # Rename targets marked with "_RENAME" extra metadata.
709    # This is mostly a cosmetic change to ensure that we end up with build.yaml target
710    # names we're used to from the past (and also to avoid too long target names).
711    # The rename step needs to be made after we're done with most of processing logic
712    # otherwise the already-renamed libraries will have different names than expected
713    for lib_name in lib_names:
714        to_name = build_extra_metadata.get(lib_name, {}).get("_RENAME", None)
715        if to_name:
716            # store lib under the new name and also change its 'name' property
717            if to_name in result:
718                raise Exception(
719                    "Cannot rename target "
720                    + str(lib_name)
721                    + ", "
722                    + str(to_name)
723                    + " already exists."
724                )
725            lib_dict = result.pop(lib_name)
726            lib_dict["name"] = to_name
727            result[to_name] = lib_dict
728
729            # dep names need to be updated as well
730            for lib_dict_to_update in list(result.values()):
731                lib_dict_to_update["deps"] = list(
732                    [
733                        to_name if dep == lib_name else dep
734                        for dep in lib_dict_to_update["deps"]
735                    ]
736                )
737
738    return result
739
740
741def _convert_to_build_yaml_like(lib_dict: BuildMetadata) -> BuildYaml:
742    lib_names = [
743        lib_name
744        for lib_name in list(lib_dict.keys())
745        if lib_dict[lib_name].get("_TYPE", "library") == "library"
746    ]
747    target_names = [
748        lib_name
749        for lib_name in list(lib_dict.keys())
750        if lib_dict[lib_name].get("_TYPE", "library") == "target"
751    ]
752    test_names = [
753        lib_name
754        for lib_name in list(lib_dict.keys())
755        if lib_dict[lib_name].get("_TYPE", "library") == "test"
756        or lib_dict[lib_name].get("_TYPE", "library") == "plugin_test"
757    ]
758
759    # list libraries and targets in predefined order
760    lib_list = [lib_dict[lib_name] for lib_name in lib_names]
761    target_list = [lib_dict[lib_name] for lib_name in target_names]
762    test_list = [lib_dict[lib_name] for lib_name in test_names]
763
764    # get rid of temporary private fields prefixed with "_" and some other useless fields
765    for lib in lib_list:
766        for field_to_remove in [
767            k for k in list(lib.keys()) if k.startswith("_")
768        ]:
769            lib.pop(field_to_remove, None)
770    for target in target_list:
771        for field_to_remove in [
772            k for k in list(target.keys()) if k.startswith("_")
773        ]:
774            target.pop(field_to_remove, None)
775        target.pop(
776            "public_headers", None
777        )  # public headers make no sense for targets
778    for test in test_list:
779        for field_to_remove in [
780            k for k in list(test.keys()) if k.startswith("_")
781        ]:
782            test.pop(field_to_remove, None)
783        test.pop(
784            "public_headers", None
785        )  # public headers make no sense for tests
786
787    build_yaml_like = {
788        "libs": lib_list,
789        "filegroups": [],
790        "targets": target_list,
791        "tests": test_list,
792    }
793    return build_yaml_like
794
795
796def _extract_cc_tests(bazel_rules: BuildDict) -> List[str]:
797    """Gets list of cc_test tests from bazel rules"""
798    result = []
799    for bazel_rule in list(bazel_rules.values()):
800        if bazel_rule["class"] == "cc_test":
801            test_name = bazel_rule["name"]
802            if test_name.startswith("//"):
803                prefixlen = len("//")
804                result.append(test_name[prefixlen:])
805    return list(sorted(result))
806
807
808def _exclude_unwanted_cc_tests(tests: List[str]) -> List[str]:
809    """Filters out bazel tests that we don't want to run with other build systems or we cannot build them reasonably"""
810
811    # most qps tests are autogenerated, we are fine without them
812    tests = [test for test in tests if not test.startswith("test/cpp/qps:")]
813    # microbenchmarks aren't needed for checking correctness
814    tests = [
815        test
816        for test in tests
817        if not test.startswith("test/cpp/microbenchmarks:")
818    ]
819    tests = [
820        test
821        for test in tests
822        if not test.startswith("test/core/promise/benchmark:")
823    ]
824
825    # we have trouble with census dependency outside of bazel
826    tests = [
827        test
828        for test in tests
829        if not test.startswith("test/cpp/ext/filters/census:")
830        and not test.startswith(
831            "test/core/server:xds_channel_stack_modifier_test"
832        )
833        and not test.startswith("test/cpp/ext/gcp:")
834        and not test.startswith("test/cpp/ext/filters/logging:")
835        and not test.startswith("test/cpp/interop:observability_interop")
836    ]
837
838    # we have not added otel dependency outside of bazel
839    tests = [
840        test
841        for test in tests
842        if not test.startswith("test/cpp/ext/csm:")
843        and not test.startswith("test/cpp/interop:xds_interop")
844    ]
845
846    # missing opencensus/stats/stats.h
847    tests = [
848        test
849        for test in tests
850        if not test.startswith(
851            "test/cpp/end2end:server_load_reporting_end2end_test"
852        )
853    ]
854    tests = [
855        test
856        for test in tests
857        if not test.startswith(
858            "test/cpp/server/load_reporter:lb_load_reporter_test"
859        )
860    ]
861
862    # The test uses --running_under_bazel cmdline argument
863    # To avoid the trouble needing to adjust it, we just skip the test
864    tests = [
865        test
866        for test in tests
867        if not test.startswith(
868            "test/cpp/naming:resolver_component_tests_runner_invoker"
869        )
870    ]
871
872    # the test requires 'client_crash_test_server' to be built
873    tests = [
874        test
875        for test in tests
876        if not test.startswith("test/cpp/end2end:time_change_test")
877    ]
878
879    # the test requires 'client_crash_test_server' to be built
880    tests = [
881        test
882        for test in tests
883        if not test.startswith("test/cpp/end2end:client_crash_test")
884    ]
885
886    # the test requires 'server_crash_test_client' to be built
887    tests = [
888        test
889        for test in tests
890        if not test.startswith("test/cpp/end2end:server_crash_test")
891    ]
892
893    # test never existed under build.yaml and it fails -> skip it
894    tests = [
895        test
896        for test in tests
897        if not test.startswith("test/core/tsi:ssl_session_cache_test")
898    ]
899
900    # the binary of this test does not get built with cmake
901    tests = [
902        test
903        for test in tests
904        if not test.startswith("test/cpp/util:channelz_sampler_test")
905    ]
906
907    # chaotic good not supported outside bazel
908    tests = [
909        test
910        for test in tests
911        if not test.startswith("test/core/transport/chaotic_good")
912    ]
913
914    # we don't need to generate fuzzers outside of bazel
915    tests = [test for test in tests if not test.endswith("_fuzzer")]
916    tests = [test for test in tests if "_fuzzer_" not in test]
917
918    return tests
919
920
921def _generate_build_extra_metadata_for_tests(
922    tests: List[str], bazel_rules: BuildDict
923) -> BuildDict:
924    """For given tests, generate the "extra metadata" that we need for our "build.yaml"-like output. The extra metadata is generated from the bazel rule metadata by using a bunch of heuristics."""
925    test_metadata = {}
926    for test in tests:
927        test_dict = {"build": "test", "_TYPE": "target"}
928
929        bazel_rule = bazel_rules[_get_bazel_label(test)]
930
931        bazel_tags = bazel_rule["tags"]
932        if "manual" in bazel_tags:
933            # don't run the tests marked as "manual"
934            test_dict["run"] = False
935
936        if bazel_rule["flaky"]:
937            # don't run tests that are marked as "flaky" under bazel
938            # because that would only add noise for the run_tests.py tests
939            # and seeing more failures for tests that we already know are flaky
940            # doesn't really help anything
941            test_dict["run"] = False
942
943        if "no_uses_polling" in bazel_tags:
944            test_dict["uses_polling"] = False
945
946        if "grpc_fuzzer" == bazel_rule["generator_function"]:
947            # currently we hand-list fuzzers instead of generating them automatically
948            # because there's no way to obtain maxlen property from bazel BUILD file.
949            print(("skipping fuzzer " + test))
950            continue
951
952        if "bazel_only" in bazel_tags:
953            continue
954
955        # if any tags that restrict platform compatibility are present,
956        # generate the "platforms" field accordingly
957        # TODO(jtattermusch): there is also a "no_linux" tag, but we cannot take
958        # it into account as it is applied by grpc_cc_test when poller expansion
959        # is made (for tests where uses_polling=True). So for now, we just
960        # assume all tests are compatible with linux and ignore the "no_linux" tag
961        # completely.
962        known_platform_tags = set(["no_windows", "no_mac"])
963        if set(bazel_tags).intersection(known_platform_tags):
964            platforms = []
965            # assume all tests are compatible with linux and posix
966            platforms.append("linux")
967            platforms.append(
968                "posix"
969            )  # there is no posix-specific tag in bazel BUILD
970            if "no_mac" not in bazel_tags:
971                platforms.append("mac")
972            if "no_windows" not in bazel_tags:
973                platforms.append("windows")
974            test_dict["platforms"] = platforms
975
976        cmdline_args = bazel_rule["args"]
977        if cmdline_args:
978            test_dict["args"] = list(cmdline_args)
979
980        if test.startswith("test/cpp"):
981            test_dict["language"] = "c++"
982
983        elif test.startswith("test/core"):
984            test_dict["language"] = "c"
985        else:
986            raise Exception("wrong test" + test)
987
988        # short test name without the path.
989        # There can be name collisions, but we will resolve them later
990        simple_test_name = os.path.basename(_try_extract_source_file_path(test))
991        test_dict["_RENAME"] = simple_test_name
992
993        test_metadata[test] = test_dict
994
995    # detect duplicate test names
996    tests_by_simple_name = {}
997    for test_name, test_dict in list(test_metadata.items()):
998        simple_test_name = test_dict["_RENAME"]
999        if simple_test_name not in tests_by_simple_name:
1000            tests_by_simple_name[simple_test_name] = []
1001        tests_by_simple_name[simple_test_name].append(test_name)
1002
1003    # choose alternative names for tests with a name collision
1004    for collision_list in list(tests_by_simple_name.values()):
1005        if len(collision_list) > 1:
1006            for test_name in collision_list:
1007                long_name = test_name.replace("/", "_").replace(":", "_")
1008                print(
1009                    'short name of "%s" collides with another test, renaming'
1010                    " to %s" % (test_name, long_name)
1011                )
1012                test_metadata[test_name]["_RENAME"] = long_name
1013    print(test_metadata["test/cpp/ext/otel:otel_plugin_test"])
1014    return test_metadata
1015
1016
1017def _parse_http_archives(xml_tree: ET.Element) -> "List[ExternalProtoLibrary]":
1018    """Parse Bazel http_archive rule into ExternalProtoLibrary objects."""
1019    result = []
1020    for xml_http_archive in xml_tree:
1021        if (
1022            xml_http_archive.tag != "rule"
1023            or xml_http_archive.attrib["class"] != "http_archive"
1024        ):
1025            continue
1026        # A distilled Python representation of Bazel http_archive
1027        http_archive = dict()
1028        for xml_node in xml_http_archive:
1029            if xml_node.attrib["name"] == "name":
1030                http_archive["name"] = xml_node.attrib["value"]
1031            if xml_node.attrib["name"] == "urls":
1032                http_archive["urls"] = []
1033                for url_node in xml_node:
1034                    http_archive["urls"].append(url_node.attrib["value"])
1035            if xml_node.attrib["name"] == "url":
1036                http_archive["urls"] = [xml_node.attrib["value"]]
1037            if xml_node.attrib["name"] == "sha256":
1038                http_archive["hash"] = xml_node.attrib["value"]
1039            if xml_node.attrib["name"] == "strip_prefix":
1040                http_archive["strip_prefix"] = xml_node.attrib["value"]
1041        if http_archive["name"] not in EXTERNAL_PROTO_LIBRARIES:
1042            # If this http archive is not one of the external proto libraries,
1043            # we don't want to include it as a CMake target
1044            continue
1045        lib = EXTERNAL_PROTO_LIBRARIES[http_archive["name"]]
1046        lib.urls = http_archive["urls"]
1047        lib.hash = http_archive["hash"]
1048        lib.strip_prefix = http_archive["strip_prefix"]
1049        result.append(lib)
1050    return result
1051
1052
1053def _generate_external_proto_libraries() -> List[Dict[str, Any]]:
1054    """Generates the build metadata for external proto libraries"""
1055    xml_tree = _bazel_query_xml_tree("kind(http_archive, //external:*)")
1056    libraries = _parse_http_archives(xml_tree)
1057    libraries.sort(key=lambda x: x.destination)
1058    return list(map(lambda x: x.__dict__, libraries))
1059
1060
1061def _detect_and_print_issues(build_yaml_like: BuildYaml) -> None:
1062    """Try detecting some unusual situations and warn about them."""
1063    for tgt in build_yaml_like["targets"]:
1064        if tgt["build"] == "test":
1065            for src in tgt["src"]:
1066                if src.startswith("src/") and not src.endswith(".proto"):
1067                    print(
1068                        (
1069                            'source file from under "src/" tree used in test '
1070                            + tgt["name"]
1071                            + ": "
1072                            + src
1073                        )
1074                    )
1075
1076
1077# extra metadata that will be used to construct build.yaml
1078# there are mostly extra properties that we weren't able to obtain from the bazel build
1079# _TYPE: whether this is library, target or test
1080# _RENAME: whether this target should be renamed to a different name (to match expectations of make and cmake builds)
1081_BUILD_EXTRA_METADATA = {
1082    "third_party/address_sorting:address_sorting": {
1083        "language": "c",
1084        "build": "all",
1085        "_RENAME": "address_sorting",
1086    },
1087    "@com_google_protobuf//upb:base": {
1088        "language": "c",
1089        "build": "all",
1090        "_RENAME": "upb_base_lib",
1091    },
1092    "@com_google_protobuf//upb:mem": {
1093        "language": "c",
1094        "build": "all",
1095        "_RENAME": "upb_mem_lib",
1096    },
1097    "@com_google_protobuf//upb:message": {
1098        "language": "c",
1099        "build": "all",
1100        "_RENAME": "upb_message_lib",
1101    },
1102    "@com_google_protobuf//upb/json:json": {
1103        "language": "c",
1104        "build": "all",
1105        "_RENAME": "upb_json_lib",
1106    },
1107    "@com_google_protobuf//upb/mini_descriptor:mini_descriptor": {
1108        "language": "c",
1109        "build": "all",
1110        "_RENAME": "upb_mini_descriptor_lib",
1111    },
1112    "@com_google_protobuf//upb/text:text": {
1113        "language": "c",
1114        "build": "all",
1115        "_RENAME": "upb_textformat_lib",
1116    },
1117    "@com_google_protobuf//upb/wire:wire": {
1118        "language": "c",
1119        "build": "all",
1120        "_RENAME": "upb_wire_lib",
1121    },
1122    "@com_google_protobuf//third_party/utf8_range:utf8_range": {
1123        "language": "c",
1124        "build": "all",
1125        # rename to utf8_range_lib is necessary for now to avoid clash with utf8_range target in protobuf's cmake
1126        "_RENAME": "utf8_range_lib",
1127    },
1128    "@com_googlesource_code_re2//:re2": {
1129        "language": "c",
1130        "build": "all",
1131        "_RENAME": "re2",
1132    },
1133    "@com_google_googletest//:gtest": {
1134        "language": "c",
1135        "build": "private",
1136        "_RENAME": "gtest",
1137    },
1138    "@zlib//:zlib": {
1139        "language": "c",
1140        "zlib": True,
1141        "build": "private",
1142        "defaults": "zlib",
1143        "_RENAME": "z",
1144    },
1145    "gpr": {
1146        "language": "c",
1147        "build": "all",
1148    },
1149    "grpc": {
1150        "language": "c",
1151        "build": "all",
1152        "baselib": True,
1153        "generate_plugin_registry": True,
1154    },
1155    "grpc++": {
1156        "language": "c++",
1157        "build": "all",
1158        "baselib": True,
1159    },
1160    "grpc++_alts": {"language": "c++", "build": "all", "baselib": True},
1161    "grpc++_error_details": {"language": "c++", "build": "all"},
1162    "grpc++_reflection": {"language": "c++", "build": "all"},
1163    "grpc_authorization_provider": {"language": "c++", "build": "all"},
1164    "grpc++_unsecure": {
1165        "language": "c++",
1166        "build": "all",
1167        "baselib": True,
1168    },
1169    "grpc_unsecure": {
1170        "language": "c",
1171        "build": "all",
1172        "baselib": True,
1173        "generate_plugin_registry": True,
1174    },
1175    "grpcpp_channelz": {"language": "c++", "build": "all"},
1176    "grpcpp_otel_plugin": {
1177        "language": "c++",
1178        "build": "plugin",
1179    },
1180    "grpc++_test": {
1181        "language": "c++",
1182        "build": "private",
1183    },
1184    "src/compiler:grpc_plugin_support": {
1185        "language": "c++",
1186        "build": "protoc",
1187        "_RENAME": "grpc_plugin_support",
1188    },
1189    "src/compiler:grpc_cpp_plugin": {
1190        "language": "c++",
1191        "build": "protoc",
1192        "_TYPE": "target",
1193        "_RENAME": "grpc_cpp_plugin",
1194    },
1195    "src/compiler:grpc_csharp_plugin": {
1196        "language": "c++",
1197        "build": "protoc",
1198        "_TYPE": "target",
1199        "_RENAME": "grpc_csharp_plugin",
1200    },
1201    "src/compiler:grpc_node_plugin": {
1202        "language": "c++",
1203        "build": "protoc",
1204        "_TYPE": "target",
1205        "_RENAME": "grpc_node_plugin",
1206    },
1207    "src/compiler:grpc_objective_c_plugin": {
1208        "language": "c++",
1209        "build": "protoc",
1210        "_TYPE": "target",
1211        "_RENAME": "grpc_objective_c_plugin",
1212    },
1213    "src/compiler:grpc_php_plugin": {
1214        "language": "c++",
1215        "build": "protoc",
1216        "_TYPE": "target",
1217        "_RENAME": "grpc_php_plugin",
1218    },
1219    "src/compiler:grpc_python_plugin": {
1220        "language": "c++",
1221        "build": "protoc",
1222        "_TYPE": "target",
1223        "_RENAME": "grpc_python_plugin",
1224    },
1225    "src/compiler:grpc_ruby_plugin": {
1226        "language": "c++",
1227        "build": "protoc",
1228        "_TYPE": "target",
1229        "_RENAME": "grpc_ruby_plugin",
1230    },
1231    # TODO(jtattermusch): consider adding grpc++_core_stats
1232    # test support libraries
1233    "test/core/test_util:grpc_test_util": {
1234        "language": "c",
1235        "build": "private",
1236        "_RENAME": "grpc_test_util",
1237    },
1238    "test/core/test_util:grpc_test_util_unsecure": {
1239        "language": "c",
1240        "build": "private",
1241        "_RENAME": "grpc_test_util_unsecure",
1242    },
1243    # TODO(jtattermusch): consider adding grpc++_test_util_unsecure - it doesn't seem to be used by bazel build (don't forget to set secure: False)
1244    "test/cpp/util:test_config": {
1245        "language": "c++",
1246        "build": "private",
1247        "_RENAME": "grpc++_test_config",
1248    },
1249    "test/cpp/util:test_util": {
1250        "language": "c++",
1251        "build": "private",
1252        "_RENAME": "grpc++_test_util",
1253    },
1254    # benchmark support libraries
1255    "test/cpp/microbenchmarks:helpers": {
1256        "language": "c++",
1257        "build": "test",
1258        "defaults": "benchmark",
1259        "_RENAME": "benchmark_helpers",
1260    },
1261    "test/cpp/interop:interop_client": {
1262        "language": "c++",
1263        "build": "test",
1264        "run": False,
1265        "_TYPE": "target",
1266        "_RENAME": "interop_client",
1267    },
1268    "test/cpp/interop:interop_server": {
1269        "language": "c++",
1270        "build": "test",
1271        "run": False,
1272        "_TYPE": "target",
1273        "_RENAME": "interop_server",
1274    },
1275    # TODO(stanleycheung): re-enable this after cmake support for otel is added
1276    # "test/cpp/interop:xds_interop_client": {
1277    #     "language": "c++",
1278    #     "build": "test",
1279    #     "run": False,
1280    #     "_TYPE": "target",
1281    #     "_RENAME": "xds_interop_client",
1282    # },
1283    # "test/cpp/interop:xds_interop_server": {
1284    #     "language": "c++",
1285    #     "build": "test",
1286    #     "run": False,
1287    #     "_TYPE": "target",
1288    #     "_RENAME": "xds_interop_server",
1289    # },
1290    "test/cpp/interop:http2_client": {
1291        "language": "c++",
1292        "build": "test",
1293        "run": False,
1294        "_TYPE": "target",
1295        "_RENAME": "http2_client",
1296    },
1297    "test/cpp/qps:qps_json_driver": {
1298        "language": "c++",
1299        "build": "test",
1300        "run": False,
1301        "_TYPE": "target",
1302        "_RENAME": "qps_json_driver",
1303    },
1304    "test/cpp/qps:qps_worker": {
1305        "language": "c++",
1306        "build": "test",
1307        "run": False,
1308        "_TYPE": "target",
1309        "_RENAME": "qps_worker",
1310    },
1311    "test/cpp/util:grpc_cli": {
1312        "language": "c++",
1313        "build": "test",
1314        "run": False,
1315        "_TYPE": "target",
1316        "_RENAME": "grpc_cli",
1317    },
1318    "test/cpp/ext/otel:otel_plugin_test": {
1319        "language": "c++",
1320        "build": "plugin_test",
1321        "_TYPE": "target",
1322        "plugin_option": "gRPC_BUILD_GRPCPP_OTEL_PLUGIN",
1323        "_RENAME": "otel_plugin_test",
1324    },
1325    # TODO(jtattermusch): create_jwt and verify_jwt breaks distribtests because it depends on grpc_test_utils and thus requires tests to be built
1326    # For now it's ok to disable them as these binaries aren't very useful anyway.
1327    # 'test/core/security:create_jwt': { 'language': 'c', 'build': 'tool', '_TYPE': 'target', '_RENAME': 'grpc_create_jwt' },
1328    # 'test/core/security:verify_jwt': { 'language': 'c', 'build': 'tool', '_TYPE': 'target', '_RENAME': 'grpc_verify_jwt' },
1329    # TODO(jtattermusch): add remaining tools such as grpc_print_google_default_creds_token (they are not used by bazel build)
1330    # TODO(jtattermusch): these fuzzers had no build.yaml equivalent
1331    # test/core/compression:message_compress_fuzzer
1332    # test/core/compression:message_decompress_fuzzer
1333    # test/core/compression:stream_compression_fuzzer
1334    # test/core/compression:stream_decompression_fuzzer
1335    # test/core/slice:b64_decode_fuzzer
1336    # test/core/slice:b64_encode_fuzzer
1337}
1338
1339# We need a complete picture of all the targets and dependencies we're interested in
1340# so we run multiple bazel queries and merge the results.
1341_BAZEL_DEPS_QUERIES = [
1342    'deps("//test/...")',
1343    'deps("//:all")',
1344    'deps("//src/compiler/...")',
1345    # allow resolving alias() targets to the actual targets they point to
1346    'kind(alias, "//third_party:*")',
1347    # The ^ is needed to differentiate proto_library from go_proto_library
1348    'deps(kind("^proto_library", @envoy_api//envoy/...))',
1349    # Make sure we have source info for all the targets that _expand_upb_proto_library_rules artificially adds
1350    # as upb_c_proto_library dependencies.
1351    'deps("@com_google_protobuf//upb:generated_code_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me")',
1352]
1353
1354# Step 1: run a bunch of "bazel query --output xml" queries to collect
1355# the raw build metadata from the bazel build.
1356# At the end of this step we will have a dictionary of bazel rules
1357# that are interesting to us (libraries, binaries, etc.) along
1358# with their most important metadata (sources, headers, dependencies)
1359#
1360# Example of a single bazel rule after being populated:
1361# '//:grpc' : { 'class': 'cc_library',
1362#               'hdrs': ['//:include/grpc/byte_buffer.h', ... ],
1363#               'srcs': ['//:src/core/lib/surface/init.cc', ... ],
1364#               'deps': ['//:grpc_common', ...],
1365#               ... }
1366bazel_rules = {}
1367for query in _BAZEL_DEPS_QUERIES:
1368    bazel_rules.update(
1369        _extract_rules_from_bazel_xml(_bazel_query_xml_tree(query))
1370    )
1371
1372# Step 1.5: The sources for UPB protos are pre-generated, so we want
1373# to expand the UPB proto library bazel rules into the generated
1374# .upb.h and .upb.c files.
1375_expand_upb_proto_library_rules(bazel_rules)
1376
1377# Step 1.6: Add explicit protobuf dependency to grpc_proto_library rules
1378_patch_grpc_proto_library_rules(bazel_rules)
1379
1380# Step 1.7: Make sure upb descriptor.proto library uses the pre-generated sources.
1381_patch_descriptor_upb_proto_library(bazel_rules)
1382
1383# Step 2: Extract the known bazel cc_test tests. While most tests
1384# will be buildable with other build systems just fine, some of these tests
1385# would be too difficult to build and run with other build systems,
1386# so we simply exclude the ones we don't want.
1387# Note that while making tests buildable with other build systems
1388# than just bazel is extra effort, we still need to do that for these
1389# reasons:
1390# - If our cmake build doesn't have any tests at all, it's hard to make
1391#   sure that what it built actually works (we need at least some "smoke tests").
1392#   This is quite important because the build flags between bazel / non-bazel flag might differ
1393#   (sometimes it's for interesting reasons that are not easy to overcome)
1394#   which makes it even more important to have at least some tests for cmake/make
1395# - Our portability suite actually runs cmake tests and migration of portability
1396#   suite fully towards bazel might be intricate (e.g. it's unclear whether it's
1397#   possible to get a good enough coverage of different compilers / distros etc.
1398#   with bazel)
1399# - some things that are considered "tests" in build.yaml-based builds are actually binaries
1400#   we'd want to be able to build anyway (qps_json_worker, interop_client, interop_server, grpc_cli)
1401#   so it's unclear how much make/cmake simplification we would gain by removing just some (but not all) test
1402# TODO(jtattermusch): Investigate feasibility of running portability suite with bazel.
1403tests = _exclude_unwanted_cc_tests(_extract_cc_tests(bazel_rules))
1404
1405# Step 3: Generate the "extra metadata" for all our build targets.
1406# While the bazel rules give us most of the information we need,
1407# the legacy "build.yaml" format requires some additional fields that
1408# we cannot get just from bazel alone (we call that "extra metadata").
1409# In this step, we basically analyze the build metadata we have from bazel
1410# and use heuristics to determine (and sometimes guess) the right
1411# extra metadata to use for each target.
1412#
1413# - For some targets (such as the public libraries, helper libraries
1414#   and executables) determining the right extra metadata is hard to do
1415#   automatically. For these targets, the extra metadata is supplied "manually"
1416#   in form of the _BUILD_EXTRA_METADATA dictionary. That allows us to match
1417#   the semantics of the legacy "build.yaml" as closely as possible.
1418#
1419# - For test binaries, it is possible to generate the "extra metadata" mostly
1420#   automatically using a rule-based heuristic approach because most tests
1421#   look and behave alike from the build's perspective.
1422#
1423# TODO(jtattermusch): Of course neither "_BUILD_EXTRA_METADATA" or
1424# the heuristic approach used for tests are ideal and they cannot be made
1425# to cover all possible situations (and are tailored to work with the way
1426# the grpc build currently works), but the idea was to start with something
1427# reasonably simple that matches the "build.yaml"-like semantics as closely
1428# as possible (to avoid changing too many things at once) and gradually get
1429# rid of the legacy "build.yaml"-specific fields one by one. Once that is done,
1430# only very little "extra metadata" would be needed and/or it would be trivial
1431# to generate it automatically.
1432all_extra_metadata = {}
1433all_extra_metadata.update(
1434    _generate_build_extra_metadata_for_tests(tests, bazel_rules)
1435)
1436all_extra_metadata.update(_BUILD_EXTRA_METADATA)
1437
1438# Step 4: Compute the build metadata that will be used in the final build.yaml.
1439# The final build metadata includes transitive dependencies, and sources/headers
1440# expanded without intermediate dependencies.
1441# Example:
1442# '//:grpc' : { ...,
1443#               '_TRANSITIVE_DEPS': ['//:gpr_base', ...],
1444#               '_COLLAPSED_DEPS': ['gpr', ...],
1445#               '_COLLAPSED_SRCS': [...],
1446#               '_COLLAPSED_PUBLIC_HEADERS': [...],
1447#               '_COLLAPSED_HEADERS': [...]
1448#             }
1449_populate_transitive_metadata(bazel_rules, list(all_extra_metadata.keys()))
1450
1451# Step 4a: Update the existing test metadata with the updated build metadata.
1452# Certain build metadata of certain test targets depend on the transitive
1453# metadata that wasn't available earlier.
1454update_test_metadata_with_transitive_metadata(all_extra_metadata, bazel_rules)
1455
1456# Step 5: Generate the final metadata for all the targets.
1457# This is done by combining the bazel build metadata and the "extra metadata"
1458# we obtained in the previous step.
1459# In this step, we also perform some interesting massaging of the target metadata
1460# to end up with a result that is as similar to the legacy build.yaml data
1461# as possible.
1462# - Some targets get renamed (to match the legacy build.yaml target names)
1463# - Some intermediate libraries get elided ("expanded") to better match the set
1464#   of targets provided by the legacy build.yaml build
1465#
1466# Originally the target renaming was introduced to address these concerns:
1467# - avoid changing too many things at the same time and avoid people getting
1468#   confused by some well know targets suddenly being missing
1469# - Makefile/cmake and also language-specific generators rely on some build
1470#   targets being called exactly the way they they are. Some of our testing
1471#   scrips also invoke executables (e.g. "qps_json_driver") by their name.
1472# - The autogenerated test name from bazel includes the package path
1473#   (e.g. "test_cpp_TEST_NAME"). Without renaming, the target names would
1474#   end up pretty ugly (e.g. test_cpp_qps_qps_json_driver).
1475# TODO(jtattermusch): reevaluate the need for target renaming in the future.
1476#
1477# Example of a single generated target:
1478# 'grpc' : { 'language': 'c',
1479#            'public_headers': ['include/grpc/byte_buffer.h', ... ],
1480#            'headers': ['src/core/ext/filters/client_channel/client_channel.h', ... ],
1481#            'src': ['src/core/lib/surface/init.cc', ... ],
1482#            'deps': ['gpr', 'address_sorting', ...],
1483#            ... }
1484all_targets_dict = _generate_build_metadata(all_extra_metadata, bazel_rules)
1485
1486# Step 6: convert the dictionary with all the targets to a dict that has
1487# the desired "build.yaml"-like layout.
1488# TODO(jtattermusch): We use the custom "build.yaml"-like layout because
1489# currently all other build systems use that format as their source of truth.
1490# In the future, we can get rid of this custom & legacy format entirely,
1491# but we would need to update the generators for other build systems
1492# at the same time.
1493#
1494# Layout of the result:
1495# { 'libs': { TARGET_DICT_FOR_LIB_XYZ, ... },
1496#   'targets': { TARGET_DICT_FOR_BIN_XYZ, ... },
1497#   'tests': { TARGET_DICT_FOR_TEST_XYZ, ...} }
1498build_yaml_like = _convert_to_build_yaml_like(all_targets_dict)
1499
1500# Step 7: generates build metadata for external ProtoBuf libraries.
1501# We only want the ProtoBuf sources from these ProtoBuf dependencies, which may
1502# not be present in our release source tar balls. These rules will be used in CMake
1503# to download these libraries if not existed. Even if the download failed, it
1504# will be a soft error that doesn't block existing target from successfully
1505# built.
1506build_yaml_like[
1507    "external_proto_libraries"
1508] = _generate_external_proto_libraries()
1509
1510# detect and report some suspicious situations we've seen before
1511_detect_and_print_issues(build_yaml_like)
1512
1513# Step 7: Store the build_autogenerated.yaml in a deterministic (=sorted)
1514# and cleaned-up form.
1515# A basic overview of the resulting "build.yaml"-like format is here:
1516# https://github.com/grpc/grpc/blob/master/templates/README.md
1517# TODO(jtattermusch): The "cleanup" function is taken from the legacy
1518# build system (which used build.yaml) and can be eventually removed.
1519build_yaml_string = build_cleaner.cleaned_build_yaml_dict_as_string(
1520    build_yaml_like
1521)
1522with open("build_autogenerated.yaml", "w") as file:
1523    file.write(build_yaml_string)
1524