• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Description:
2#   GPU-specific components in XLA service implementation.
3
4load("@bazel_skylib//rules:common_settings.bzl", "bool_flag")
5load(
6    "//tensorflow/core/platform:build_config.bzl",
7    "tf_proto_library",
8)
9load(
10    "//tensorflow/core/platform:build_config_root.bzl",
11    "tf_cuda_tests_tags",
12)
13load(
14    "//tensorflow:tensorflow.bzl",
15    "check_deps",
16    "if_google",
17    "tf_cc_test",
18    "tf_copts",
19    "tf_cuda_library",
20)
21load(
22    "@local_config_rocm//rocm:build_defs.bzl",
23    "if_rocm_is_configured",
24)
25load(
26    "//tensorflow/stream_executor:build_defs.bzl",
27    "if_gpu_is_configured",
28)
29load(
30    "//tensorflow/tsl/platform/default:cuda_build_defs.bzl",
31    "if_cuda_is_configured",
32)
33
34# buildifier: disable=same-origin-load
35load("//tensorflow:tensorflow.bzl", "filegroup")
36
37# buildifier: disable=same-origin-load
38load("//tensorflow:tensorflow.bzl", "get_compatible_with_cloud")
39
40# buildifier: disable=same-origin-load
41load("//tensorflow:tensorflow.bzl", "if_nccl")
42
43package(
44    default_visibility = [":friends"],
45    licenses = ["notice"],
46)
47
48package_group(
49    name = "friends",
50    includes = [
51        "//tensorflow/compiler/xla:friends",
52    ],
53)
54
55# This target checks that we are not accidentally adding TFRT dependencies.
56# It captures the current state of dependencies and might need to get updated
57# from time to time.
58# Note: private targets may depend on TFRT if they are tagged 'manual'.
59check_deps(
60    name = "tfrt_deps_check",
61    disallowed_deps = select({
62        ":is_xlir_enabled": [],
63        "//conditions:default": [
64            # copybara:uncomment ":jitrt_custom_calls",
65        ],
66    }),
67    deps = [
68        # Targets that are included in CPU builds should not depend on TFRT.
69        ":gpu_device_info",
70        ":gpu_executable_run_options",
71        ":ir_emission_utils",
72        ":launch_dimensions",
73        ":parallel_loop_emitter",
74        ":target_util",
75        # XLIR targets should only depend on TFRT if JitRt is enabled.
76        ":gpu_compiler",
77        ":gpu_executable",
78        ":nccl_utils",
79    ],
80)
81
82# Filegroup used to collect source files for dependency checking.
83filegroup(
84    name = "c_srcs",
85    data = glob([
86        "**/*.cc",
87        "**/*.h",
88    ]),
89)
90
91tf_proto_library(
92    name = "backend_configs",
93    srcs = ["backend_configs.proto"],
94    cc_api_version = 2,
95    protodeps = [
96        "//tensorflow/compiler/xla:xla_data_proto",
97        "//tensorflow/compiler/xla/stream_executor:dnn_proto",
98    ],
99)
100
101cc_library(
102    name = "gpu_executable_run_options",
103    srcs = ["gpu_executable_run_options.cc"],
104    hdrs = ["gpu_executable_run_options.h"],
105    compatible_with = get_compatible_with_cloud(),
106    visibility = ["//visibility:public"],
107    deps = [
108        "//tensorflow/compiler/xla:status_macros",
109        "//tensorflow/compiler/xla:statusor",
110        "//tensorflow/compiler/xla:types",
111        "//tensorflow/compiler/xla/service:executable",
112        "//tensorflow/compiler/xla/service:global_device_id",
113        "//tensorflow/core/platform:stream_executor_no_cuda",
114        "@com_google_absl//absl/algorithm:container",
115    ],
116)
117
118cc_library(
119    name = "gpu_constants",
120    srcs = ["gpu_constants.cc"],
121    hdrs = ["gpu_constants.h"],
122    deps = [
123        "//tensorflow/compiler/xla:types",
124        "//tensorflow/core:framework",
125    ],
126)
127
128cc_library(
129    name = "gpu_types",
130    hdrs = ["gpu_types.h"],
131    deps = [
132        "//tensorflow/compiler/xla:types",
133        "//tensorflow/stream_executor:device_description",
134        "@com_google_absl//absl/types:variant",
135    ],
136)
137
138cc_library(
139    name = "launch_dimensions",
140    srcs = [
141        "launch_dimensions.cc",
142    ],
143    hdrs = [
144        "launch_dimensions.h",
145    ],
146    compatible_with = get_compatible_with_cloud(),
147    deps = [
148        ":gpu_device_info",
149        "//tensorflow/compiler/xla:shape_util",
150        "//tensorflow/core:lib",
151    ],
152)
153
154cc_library(
155    name = "custom_call_thunk",
156    srcs = ["custom_call_thunk.cc"],
157    hdrs = ["custom_call_thunk.h"],
158    local_defines = if_cuda_is_configured([
159        "GOOGLE_CUDA=1",
160    ]),
161    deps = [
162        ":buffer_allocations",
163        ":thunk",
164        "//tensorflow/compiler/xla:util",
165        "//tensorflow/compiler/xla/service:buffer_assignment",
166        "//tensorflow/compiler/xla/service:custom_call_status_internal",
167        "//tensorflow/core/platform:errors",
168        "//tensorflow/stream_executor/gpu:gpu_stream_header",
169        "//tensorflow/stream_executor/gpu:gpu_types_header",
170        "@com_google_absl//absl/strings:str_format",
171    ],
172)
173
174tf_cc_test(
175    name = "custom_call_test",
176    srcs = if_gpu_is_configured(["custom_call_test.cc"]),
177    local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]),
178    tags = tf_cuda_tests_tags(),
179    deps = [
180        "//tensorflow/compiler/xla/tests:xla_internal_test_main",  # fixdeps: keep
181        "//tensorflow/compiler/xla:status_macros",
182        "//tensorflow/compiler/xla:test_helpers",
183        "//tensorflow/compiler/xla/client:xla_builder",
184        "//tensorflow/compiler/xla/client/lib:constants",
185        "//tensorflow/compiler/xla/service:custom_call_status",
186        "//tensorflow/compiler/xla/service:custom_call_target_registry",
187        "//tensorflow/compiler/xla/service:gpu_plugin",
188        "//tensorflow/compiler/xla/tests:client_library_test_base",
189        "//tensorflow/core:test",
190        "//tensorflow/stream_executor/gpu:gpu_types_header",
191    ] + if_cuda_is_configured([
192        "@local_config_cuda//cuda:cuda_headers",
193    ]) + if_rocm_is_configured([
194        "@local_config_rocm//rocm:rocm_headers",
195    ]),
196)
197
198cc_library(
199    name = "hlo_to_ir_bindings",
200    srcs = ["hlo_to_ir_bindings.cc"],
201    hdrs = ["hlo_to_ir_bindings.h"],
202    deps = [
203        ":buffer_allocations",
204        ":ir_emission_utils",
205        "//tensorflow/compiler/xla:util",
206        "//tensorflow/compiler/xla/service:hlo",
207        "//tensorflow/compiler/xla/service/llvm_ir:buffer_assignment_util",
208        "//tensorflow/compiler/xla/service/llvm_ir:ir_array",
209        "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
210        "//tensorflow/compiler/xla/service/llvm_ir:tuple_ops",
211        "//tensorflow/core:lib",
212        "@com_google_absl//absl/container:flat_hash_map",
213        "@com_google_absl//absl/container:flat_hash_set",
214        "@com_google_absl//absl/strings",
215        "@com_google_absl//absl/types:span",
216        "@llvm-project//llvm:Core",
217    ],
218)
219
220cc_library(
221    name = "target_util",
222    srcs = ["target_util.cc"],
223    hdrs = ["target_util.h"],
224    compatible_with = get_compatible_with_cloud(),
225    deps = [
226        "//tensorflow/compiler/xla:xla_data_proto_cc",
227        "//tensorflow/compiler/xla/service/llvm_ir:llvm_type_conversion_util",
228        "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
229        "//tensorflow/core:lib",
230        "@com_google_absl//absl/strings",
231        "@com_google_absl//absl/types:span",
232        "@llvm-project//llvm:Core",
233        "@llvm-project//llvm:Support",
234    ],
235)
236
237cc_library(
238    name = "gpu_device_info",
239    srcs = ["gpu_device_info.cc"],
240    hdrs = ["gpu_device_info.h"],
241    compatible_with = get_compatible_with_cloud(),
242    deps = ["//tensorflow/stream_executor:stream_header"],
243)
244
245cc_library(
246    name = "ir_emitter",
247    srcs = [
248        "ir_emitter.cc",
249        "ir_emitter_nested.cc",
250        "ir_emitter_unnested.cc",
251    ],
252    hdrs = [
253        "ir_emitter.h",
254        "ir_emitter_context.h",
255        "ir_emitter_nested.h",
256        "ir_emitter_unnested.h",
257        "kernel_mapping_scheme.h",
258    ],
259    copts = if_cuda_is_configured(["-DGOOGLE_CUDA=1"]),
260    deps = [
261        ":backend_configs_cc",
262        ":buffer_allocations",
263        ":elemental_ir_emitter",
264        ":fft_thunk",
265        ":gpu_asm_opts_util",
266        ":gpu_constants",
267        ":gpu_conv_runner",
268        ":gpu_executable",
269        ":hlo_to_ir_bindings",
270        ":ir_emission_utils",
271        ":launch_dimensions",
272        ":matmul_utils",
273        ":nccl_collective_thunks",
274        ":parallel_loop_emitter",
275        ":target_util",
276        ":thunk",
277        "@com_google_absl//absl/algorithm:container",
278        "@com_google_absl//absl/container:flat_hash_set",
279        "@com_google_absl//absl/container:inlined_vector",
280        "@com_google_absl//absl/memory",
281        "@com_google_absl//absl/strings",
282        "@com_google_absl//absl/strings:str_format",
283        "@com_google_absl//absl/types:span",
284        "@llvm-project//llvm:Core",
285        "@llvm-project//llvm:Linker",
286        "@llvm-project//llvm:Support",
287        "@llvm-project//mlir:ArithmeticDialect",
288        "@llvm-project//mlir:FuncDialect",
289        "@llvm-project//mlir:GPUDialect",
290        "@llvm-project//mlir:LLVMDialect",
291        "@llvm-project//mlir:LLVMToLLVMIRTranslation",
292        "@llvm-project//mlir:ToLLVMIRTranslation",
293        "@llvm-project//mlir:IR",
294        "@llvm-project//mlir:MemRefDialect",
295        "@llvm-project//mlir:ROCDLToLLVMIRTranslation",
296        "@llvm-project//mlir:Support",
297        "@llvm-project//mlir:NVVMToLLVMIRTranslation",
298        "//tensorflow/compiler/mlir:name_utils",
299        "//tensorflow/compiler/xla/mlir_hlo",
300        "//tensorflow/compiler/xla/mlir_hlo:lhlo",
301        "//tensorflow/compiler/xla/mlir_hlo:lhlo_gpu",
302        "//tensorflow/compiler/xla/mlir_hlo:gpu_fusion_rewrite",
303        "//tensorflow/compiler/mlir/xla:attribute_exporter",
304        "//tensorflow/compiler/mlir/xla:hlo_module_importer",
305        "//tensorflow/compiler/mlir/xla:hlo_utils",
306        "//tensorflow/compiler/mlir/xla:mhlo_to_lhlo_with_xla",
307        "//tensorflow/compiler/mlir/xla:mlir_hlo_to_hlo",
308        "//tensorflow/compiler/mlir/xla:type_to_shape",
309        "//tensorflow/compiler/xla:literal",
310        "//tensorflow/compiler/xla:permutation_util",
311        "//tensorflow/compiler/xla:shape_util",
312        "//tensorflow/compiler/xla:status_macros",
313        "//tensorflow/compiler/xla:statusor",
314        "//tensorflow/compiler/xla:types",
315        "//tensorflow/compiler/xla:union_find",
316        "//tensorflow/compiler/xla:util",
317        "//tensorflow/compiler/xla:window_util",
318        "//tensorflow/compiler/xla:xla_data_proto_cc",
319        "//tensorflow/compiler/xla/service:buffer_assignment",
320        "//tensorflow/compiler/xla/service:collective_ops_utils",
321        "//tensorflow/compiler/xla/service:custom_call_status",
322        "//tensorflow/compiler/xla/service:custom_call_target_registry",
323        "//tensorflow/compiler/xla/service:elemental_ir_emitter",
324        "//tensorflow/compiler/xla/service:hlo",
325        "//tensorflow/compiler/xla/service:hlo_execution_profile",
326        "//tensorflow/compiler/xla/service:name_uniquer",
327        "//tensorflow/compiler/xla/service:pattern_matcher",
328        "//tensorflow/compiler/xla/service:shape_inference",
329        "//tensorflow/compiler/xla/service:while_loop_analysis",
330        "//tensorflow/compiler/xla/service/llvm_ir:buffer_assignment_util",
331        "//tensorflow/compiler/xla/service/llvm_ir:dynamic_update_slice_util",
332        "//tensorflow/compiler/xla/service/llvm_ir:fused_ir_emitter",
333        "//tensorflow/compiler/xla/service/llvm_ir:ir_array",
334        "//tensorflow/compiler/xla/service/llvm_ir:ir_builder_mixin",
335        "//tensorflow/compiler/xla/service/llvm_ir:kernel_support_library",
336        "//tensorflow/compiler/xla/service/llvm_ir:llvm_loop",
337        "//tensorflow/compiler/xla/service/llvm_ir:llvm_type_conversion_util",
338        "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
339        "//tensorflow/compiler/xla/service/llvm_ir:loop_emitter",
340        "//tensorflow/compiler/xla/service/llvm_ir:sort_util",
341        "//tensorflow/compiler/xla/service/llvm_ir:tuple_ops",
342        "//tensorflow/core:lib",
343        "//tensorflow/core/platform:human_readable_json",
344    ] + if_gpu_is_configured([
345        ":triangular_solve_thunk",
346        ":cholesky_thunk",
347    ]) + if_cuda_is_configured([
348        ":cublas_lt_matmul_thunk",
349    ]),
350)
351
352cc_library(
353    name = "parallel_loop_emitter",
354    srcs = ["parallel_loop_emitter.cc"],
355    hdrs = ["parallel_loop_emitter.h"],
356    compatible_with = get_compatible_with_cloud(),
357    deps = [
358        ":launch_dimensions",
359        ":target_util",
360        "//tensorflow/compiler/xla:shape_util",
361        "//tensorflow/compiler/xla:xla_data_proto_cc",
362        "//tensorflow/compiler/xla/service/llvm_ir:ir_array",
363        "//tensorflow/compiler/xla/service/llvm_ir:kernel_support_library",
364        "//tensorflow/compiler/xla/service/llvm_ir:llvm_loop",
365        "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
366        "//tensorflow/compiler/xla/service/llvm_ir:loop_emitter",
367        "//tensorflow/core:lib",
368        "@llvm-project//llvm:Core",
369    ],
370)
371
372cc_library(
373    name = "elemental_ir_emitter",
374    srcs = ["elemental_ir_emitter.cc"],
375    hdrs = ["elemental_ir_emitter.h"],
376    deps = [
377        ":backend_configs_cc",
378        ":target_util",
379        "//tensorflow/compiler/xla:literal",
380        "//tensorflow/compiler/xla:shape_util",
381        "//tensorflow/compiler/xla:status_macros",
382        "//tensorflow/compiler/xla:statusor",
383        "//tensorflow/compiler/xla:types",
384        "//tensorflow/compiler/xla:util",
385        "//tensorflow/compiler/xla:window_util",
386        "//tensorflow/compiler/xla:xla_data_proto_cc",
387        "//tensorflow/compiler/xla/service:elemental_ir_emitter",
388        "//tensorflow/compiler/xla/service:hlo",
389        "//tensorflow/compiler/xla/service:hlo_module_config",
390        "//tensorflow/compiler/xla/service/llvm_ir:ir_array",
391        "//tensorflow/compiler/xla/service/llvm_ir:llvm_loop",
392        "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
393        "//tensorflow/compiler/xla/service/llvm_ir:loop_emitter",
394        "//tensorflow/compiler/xla/service/llvm_ir:math_ops",
395        "//tensorflow/core:lib",
396        "@com_google_absl//absl/strings",
397        "@com_google_absl//absl/types:span",
398        "@llvm-project//llvm:Core",
399        "@llvm-project//llvm:Support",
400    ],
401)
402
403cc_library(
404    name = "buffer_allocations",
405    srcs = ["buffer_allocations.cc"],
406    hdrs = ["buffer_allocations.h"],
407    deps = [
408        ":gpu_constants",
409        "//tensorflow/compiler/xla:status_macros",
410        "//tensorflow/compiler/xla:statusor",
411        "//tensorflow/compiler/xla:types",
412        "//tensorflow/compiler/xla:util",
413        "//tensorflow/compiler/xla/service:buffer_assignment",
414        "//tensorflow/core:lib",
415        "//tensorflow/core:lib_internal",
416        "//tensorflow/core/platform:stream_executor_no_cuda",
417        "//tensorflow/stream_executor:device_memory_allocator",
418        "@com_google_absl//absl/container:flat_hash_map",
419        "@com_google_absl//absl/memory",
420        "@com_google_absl//absl/strings:str_format",
421        "@com_google_absl//absl/types:span",
422    ],
423)
424
425cc_library(
426    name = "thunk",
427    srcs = ["thunk.cc"],
428    hdrs = ["thunk.h"],
429    deps = [
430        ":buffer_allocations",
431        ":gpu_executable_run_options",
432        "//tensorflow/compiler/xla:executable_run_options",
433        "//tensorflow/compiler/xla/service:executable",
434        "//tensorflow/compiler/xla/service:hlo",
435        "//tensorflow/core:lib",
436        "//tensorflow/core/platform:stream_executor_no_cuda",
437    ],
438)
439
440tf_cuda_library(
441    name = "nccl_collective_thunks",
442    srcs = [
443        "nccl_all_gather_thunk.cc",
444        "nccl_all_reduce_thunk.cc",
445        "nccl_all_to_all_thunk.cc",
446        "nccl_collective_permute_thunk.cc",
447        "nccl_collective_thunk.cc",
448    ],
449    hdrs = [
450        "nccl_all_gather_thunk.h",
451        "nccl_all_reduce_thunk.h",
452        "nccl_all_to_all_thunk.h",
453        "nccl_collective_permute_thunk.h",
454        "nccl_collective_thunk.h",
455    ],
456    # Override tf_cuda_library()'s internal default value of ["//buildenv/target:gce"].
457    compatible_with = [],
458    deps = [
459        ":buffer_allocations",
460        ":ir_emission_utils",
461        ":nccl_utils",
462        ":thunk",
463        "//tensorflow/compiler/mlir/xla:attribute_exporter",
464        "//tensorflow/compiler/mlir/xla:hlo_utils",
465        "//tensorflow/compiler/mlir/xla:type_to_shape",
466        "//tensorflow/compiler/xla:shape_util",
467        "//tensorflow/compiler/xla:util",
468        "//tensorflow/compiler/xla:xla_data_proto_cc",
469        "//tensorflow/compiler/xla/mlir_hlo:lhlo",
470        "//tensorflow/compiler/xla/mlir_hlo:lhlo_gpu",
471        "//tensorflow/compiler/xla/service:buffer_assignment",
472        "//tensorflow/compiler/xla/service:collective_ops_utils",
473        "//tensorflow/compiler/xla/service:global_device_id",
474        "//tensorflow/compiler/xla/service:hlo",
475        "//tensorflow/core:lib",
476        "//tensorflow/stream_executor/gpu:gpu_activation_header",
477        "//tensorflow/stream_executor/gpu:gpu_stream",
478        "@com_google_absl//absl/algorithm:container",
479        "@com_google_absl//absl/base",
480        "@com_google_absl//absl/container:flat_hash_map",
481        "@com_google_absl//absl/container:flat_hash_set",
482        "@com_google_absl//absl/strings",
483        "@com_google_absl//absl/strings:str_format",
484        "@com_google_absl//absl/synchronization",
485        "@llvm-project//mlir:IR",
486    ],
487)
488
489# Empty library to implement nested dependency conditions.
490cc_library(name = "empty")
491
492# If NCCL/RCCL is supported, this target '#defines XLA_ENABLE_XCCL' and
493# provides a header which #includes NCCL/RCCL.
494alias(
495    name = "nccl_utils",
496    actual = if_nccl(":_nccl_utils", ":empty"),
497)
498
499# Do not depend on this target, but rather depend on :nccl_utils.
500tf_cuda_library(
501    name = "_nccl_utils",
502    srcs = if_gpu_is_configured(["nccl_utils.cc"]),
503    hdrs = if_gpu_is_configured(["nccl_utils.h"]),
504    # Override tf_cuda_library()'s internal default value of ["//buildenv/target:gce"].
505    compatible_with = [],
506    defines = if_gpu_is_configured(["XLA_ENABLE_XCCL"]),
507    tags = ["manual"],  # Only builds with if_nccl().
508    deps = if_gpu_is_configured([
509        ":gpu_executable_run_options",
510        "@com_google_absl//absl/strings:str_format",
511        "@com_google_absl//absl/container:flat_hash_map",
512        "@com_google_absl//absl/synchronization",
513        "@com_google_absl//absl/time",
514        "//tensorflow/compiler/xla:debug_options_flags",
515        "//tensorflow/compiler/xla:status",
516        "//tensorflow/compiler/xla:status_macros",
517        "//tensorflow/compiler/xla:statusor",
518        "//tensorflow/compiler/xla:xla_data_proto_cc",
519        "//tensorflow/compiler/xla/service:collective_ops_utils",
520        "//tensorflow/compiler/xla/service:global_device_id",
521        "//tensorflow/compiler/xla/service:rendezvous",
522        "//tensorflow/core:lib",
523    ]) + if_cuda_is_configured([
524        "@local_config_nccl//:nccl",
525    ]) + if_rocm_is_configured([
526        "@local_config_rocm//rocm:rccl",
527    ]),
528)
529
530bool_flag(
531    name = "enable_xlir",
532    build_setting_default = if_google(True, False),
533)
534
535config_setting(
536    name = "is_xlir_enabled",
537    flag_values = {":enable_xlir": "True"},
538)
539
540# copybara:uncomment_begin
541#
542# cc_library(
543#     name = "jitrt_custom_calls",
544#     srcs = ["jitrt_custom_calls.cc"],
545#     hdrs = ["jitrt_custom_calls.h"],
546#     copts = if_cuda_is_configured(["-DGOOGLE_CUDA=1"]),
547#     tags = ["manual"],
548#     visibility = ["//visibility:private"],
549#     deps = [
550#         ":fft_thunk",
551#         ":gpu_asm_opts_util",
552#         ":io_feed_manager",
553#         ":matmul_utils",
554#         ":nccl_collective_thunks",
555#         ":stream_executor_util",
556#         "@llvm-project//llvm:OrcJIT",
557#         "@llvm-project//mlir:Support",
558#         "//tensorflow/compiler/xla:tfrt_utils",
559#         "//tensorflow/compiler/xla/runtime:arguments",
560#         "//tensorflow/compiler/xla/runtime:types",
561#         "//tensorflow/compiler/xla/runtime:executable",
562#         "//tensorflow/compiler/xla/runtime:jit_executable",
563#         "//tensorflow/compiler/xla:shape_util",
564#         "//tensorflow/compiler/xla/service:custom_call_status_internal",
565#         "//tensorflow/compiler/xla/service:custom_call_target_registry",
566#         "//tensorflow/compiler/xla/service:executable",
567#         "//tensorflow/compiler/xla/service/gpu:gpu_conv_runner",
568#         "//tensorflow/core/platform:human_readable_json",
569#         "//tensorflow/stream_executor/gpu:gpu_stream",
570#         "//tensorflow/stream_executor/gpu:gpu_types_header",
571#         "@tf_runtime//:dtype",
572#         "@tf_runtime//:support",
573#         "//tensorflow/compiler/xla/runtime:custom_call",
574#         "//tensorflow/compiler/xla/runtime:type_id",
575#         "//tensorflow/compiler/xla/mlir/transforms/runtime:custom_call_encoding",
576#     ] + if_gpu_is_configured([
577#         ":cholesky_thunk",
578#         ":triangular_solve_thunk",
579#     ]),
580# )
581#
582# copybara:uncomment_end
583
584cc_library(
585    name = "gpu_executable",
586    srcs = [
587        "conditional_thunk.cc",
588        "convolution_thunk.cc",
589        "copy_thunk.cc",
590        "for_thunk.cc",
591        "gpu_executable.cc",
592        "infeed_thunk.cc",
593        "kernel_thunk.cc",
594        "memset_thunk.cc",
595        "outfeed_thunk.cc",
596        "replica_id_thunk.cc",
597        "sequential_thunk.cc",
598        "while_thunk.cc",
599    ],
600    hdrs = [
601        "conditional_thunk.h",
602        "convolution_thunk.h",
603        "copy_thunk.h",
604        "custom_call_thunk.h",
605        "for_thunk.h",
606        "gemm_thunk.h",
607        "gpu_executable.h",
608        "infeed_thunk.h",
609        "kernel_thunk.h",
610        "memset_thunk.h",
611        "outfeed_thunk.h",
612        "replica_id_thunk.h",
613        "sequential_thunk.h",
614        "while_thunk.h",
615    ],
616    local_defines = select({
617        ":is_xlir_enabled": ["XLA_ENABLE_XLIR=1"],
618        "//conditions:default": [],
619    }),
620    deps = [
621        ":backend_configs_cc",
622        ":buffer_allocations",
623        ":cusolver_context",
624        ":custom_call_thunk",
625        ":gemm_thunk",
626        ":gpu_asm_opts_util",
627        ":gpu_constants",
628        ":gpu_conv_runner",
629        ":gpu_executable_run_options",
630        ":gpu_types",
631        ":fft_thunk",
632        ":io_feed_manager",
633        ":ir_emission_utils",
634        ":matmul_utils",
635        ":nccl_collective_thunks",
636        ":launch_dimensions",
637        ":stream_executor_util",
638        ":thunk",
639        "@com_google_absl//absl/base",
640        "@com_google_absl//absl/cleanup",
641        "@com_google_absl//absl/synchronization",
642        "@llvm-project//mlir:FuncDialect",
643        "@llvm-project//mlir:IR",
644        "@llvm-project//mlir:Parser",
645        "//tensorflow/compiler/xla/mlir_hlo:lhlo_gpu",
646        "//tensorflow/compiler/xla/service:hlo_execution_profile",
647        "//tensorflow/compiler/xla:array2d",
648        "//tensorflow/compiler/xla:literal",
649        "//tensorflow/compiler/xla:refcounting_hash_map",
650        "//tensorflow/compiler/xla:shape_tree",
651        "//tensorflow/compiler/xla:shape_util",
652        "//tensorflow/compiler/xla:status",
653        "//tensorflow/compiler/xla:status_macros",
654        "//tensorflow/compiler/xla:statusor",
655        "//tensorflow/compiler/xla:types",
656        "//tensorflow/compiler/xla:util",
657        "//tensorflow/compiler/xla:xla_data_proto_cc",
658        "//tensorflow/compiler/xla/service:buffer_assignment",
659        "//tensorflow/compiler/xla/service:custom_call_status_internal",
660        "//tensorflow/compiler/xla/service:executable",
661        "//tensorflow/compiler/xla/service:hlo",
662        "//tensorflow/compiler/xla/service:hlo_dataflow_analysis",
663        "//tensorflow/compiler/xla/service:hlo_parser",
664        "//tensorflow/compiler/xla/service:logical_buffer",
665        "//tensorflow/compiler/xla/service:shaped_buffer",
666        "//tensorflow/compiler/xla/service:transfer_manager",
667        "//tensorflow/compiler/xla/service:xla_debug_info_manager",
668        "//tensorflow/compiler/xla/service/llvm_ir:buffer_assignment_util",
669        "//tensorflow/core:lib",
670        "//tensorflow/core:lib_internal",
671        "//tensorflow/core/platform:stream_executor_no_cuda",
672        "//tensorflow/core/profiler/lib:traceme",
673        "//tensorflow/core/profiler/lib:scoped_annotation",
674        "//tensorflow/stream_executor",
675        "//tensorflow/stream_executor/gpu:asm_compiler",
676        "//tensorflow/stream_executor/gpu:gpu_asm_opts",
677        "//tensorflow/stream_executor/gpu:gpu_types_header",
678        "//tensorflow/stream_executor:blas",
679        "//tensorflow/stream_executor:device_memory",
680        "//tensorflow/stream_executor:device_memory_allocator",
681        "//tensorflow/stream_executor:kernel",
682        "//tensorflow/stream_executor/gpu:gpu_stream",
683        "@com_google_absl//absl/algorithm:container",
684        "@com_google_absl//absl/base:core_headers",
685        "@com_google_absl//absl/container:flat_hash_map",
686        "@com_google_absl//absl/container:flat_hash_set",
687        "@com_google_absl//absl/memory",
688        "@com_google_absl//absl/strings",
689        "@com_google_absl//absl/strings:str_format",
690        "@com_google_absl//absl/types:span",
691        "@com_google_absl//absl/types:variant",
692        "//tensorflow/stream_executor:scratch_allocator",
693    ] + if_gpu_is_configured([
694        ":cholesky_thunk",
695        ":precompiled_kernels",
696        ":triangular_solve_thunk",
697    ]) + if_cuda_is_configured([
698        "//tensorflow/stream_executor/cuda:cuda_stream",
699        "//tensorflow/tsl/platform/default/build_config:cublas_plugin",
700        "//tensorflow/tsl/platform/default/build_config:cudnn_plugin",
701        "//tensorflow/tsl/platform/default/build_config:cufft_plugin",
702        "//tensorflow/tsl/platform/default/build_config:stream_executor_cuda",  # build_cleaner: keep
703        "@local_config_cuda//cuda:cuda_headers",
704    ]) + if_rocm_is_configured([
705        "//tensorflow/tsl/platform/default/build_config:stream_executor_rocm",
706        "@local_config_rocm//rocm:rocm_headers",
707    ]) + select({
708        ":is_xlir_enabled": [
709            ":jitrt_custom_calls",
710            "//tensorflow/compiler/xla/runtime:jit_executable",
711            "//tensorflow/compiler/xla/runtime:executable",
712            "//tensorflow/compiler/xla/runtime:diagnostics",
713            "//tensorflow/compiler/xla/mlir/transforms/runtime:compilation_pipeline",
714            # copybara:uncomment "@tf_runtime//:init_tfrt_dialects",
715        ],
716        "//conditions:default": [],
717    }),
718)
719
720cc_library(
721    name = "ir_emission_utils",
722    srcs = ["ir_emission_utils.cc"],
723    hdrs = ["ir_emission_utils.h"],
724    compatible_with = get_compatible_with_cloud(),
725    deps = [
726        ":target_util",
727        "//tensorflow/compiler/mlir/xla:hlo_utils",
728        "//tensorflow/compiler/mlir/xla:type_to_shape",
729        "//tensorflow/compiler/xla:shape_util",
730        "//tensorflow/compiler/xla/mlir_hlo",
731        "//tensorflow/compiler/xla/mlir_hlo:lhlo",
732        "//tensorflow/compiler/xla/service:buffer_assignment",
733        "//tensorflow/compiler/xla/service:hlo",
734        "//tensorflow/compiler/xla/service:hlo_parser",
735        "//tensorflow/compiler/xla/service/llvm_ir:llvm_type_conversion_util",
736        "//tensorflow/core/platform:stream_executor_no_cuda",
737        "@llvm-project//llvm:Core",
738        "@llvm-project//mlir:ArithmeticDialect",
739    ],
740)
741
742tf_cc_test(
743    name = "ir_emission_utils_test",
744    srcs = ["ir_emission_utils_test.cc"],
745    deps = [
746        ":ir_emission_utils",
747        "//tensorflow/compiler/xla/mlir_hlo:lhlo",
748        "//tensorflow/compiler/xla/tests:test_utils",
749        "//tensorflow/compiler/xla/tests:xla_internal_test_main",  # fixdeps: keep
750        "//tensorflow/core:test",
751        "@llvm-project//mlir:FuncDialect",
752        "@llvm-project//mlir:IR",
753        "@llvm-project//mlir:Parser",
754    ],
755)
756
757cc_library(
758    name = "cublas_cudnn",
759    srcs = ["cublas_cudnn.cc"],
760    hdrs = ["cublas_cudnn.h"],
761    compatible_with = get_compatible_with_cloud(),
762    deps = [
763        "//tensorflow/compiler/xla/service:hlo",
764        "//tensorflow/core/platform:statusor",
765    ],
766)
767
768# TODO(ezhulenev): Extract `RunCholesky` into a separate library.
769cc_library(
770    name = "cholesky_thunk",
771    srcs = if_gpu_is_configured(["cholesky_thunk.cc"]),
772    hdrs = if_gpu_is_configured(["cholesky_thunk.h"]),
773    deps = if_gpu_is_configured([
774        ":buffer_allocations",
775        ":cusolver_context",
776        ":precompiled_kernels",
777        ":thunk",
778        "@com_google_absl//absl/base",
779        "@com_google_absl//absl/strings",
780        "@com_google_absl//absl/strings:str_format",
781        "@com_google_absl//absl/types:optional",
782        "//tensorflow/compiler/xla:types",
783        "//tensorflow/compiler/xla:util",
784        "//tensorflow/compiler/xla:xla_data_proto_cc",
785        "//tensorflow/compiler/xla/service:buffer_assignment",
786        "//tensorflow/compiler/xla/service:hlo",
787        "//tensorflow/core:lib_proto_parsing",
788        "//tensorflow/core:portable_gif_internal",
789        "//tensorflow/core/platform:stream_executor_no_cuda",
790        "//tensorflow/stream_executor:device_memory",
791        "//tensorflow/stream_executor:stream_header",
792        "//tensorflow/stream_executor/gpu:gpu_asm_opts",
793    ]),
794)
795
796# TODO(ezhulenev): Extract `RunTriangularSolve` into a separate library.
797cc_library(
798    name = "triangular_solve_thunk",
799    srcs = if_gpu_is_configured(["triangular_solve_thunk.cc"]),
800    hdrs = if_gpu_is_configured(["triangular_solve_thunk.h"]),
801    deps = if_gpu_is_configured([
802        ":buffer_allocations",
803        ":cusolver_context",
804        ":precompiled_kernels",
805        ":thunk",
806        "@com_google_absl//absl/base",
807        "@com_google_absl//absl/strings",
808        "@com_google_absl//absl/strings:str_format",
809        "@com_google_absl//absl/types:optional",
810        "//tensorflow/compiler/xla:types",
811        "//tensorflow/compiler/xla:util",
812        "//tensorflow/compiler/xla:xla_data_proto_cc",
813        "//tensorflow/compiler/xla/service:buffer_assignment",
814        "//tensorflow/compiler/xla/service:hlo",
815        "//tensorflow/core:lib_proto_parsing",
816        "//tensorflow/core:portable_gif_internal",
817        "//tensorflow/core/platform:stream_executor_no_cuda",
818        "//tensorflow/stream_executor:device_memory",
819        "//tensorflow/stream_executor:stream_header",
820        "//tensorflow/stream_executor/gpu:gpu_asm_opts",
821    ]),
822)
823
824cc_library(
825    name = "fft_thunk",
826    srcs = ["fft_thunk.cc"],
827    hdrs = ["fft_thunk.h"],
828    deps = [
829        ":buffer_allocations",
830        ":cusolver_context",
831        ":precompiled_kernels",
832        ":thunk",
833        "//tensorflow/compiler/xla:types",
834        "//tensorflow/compiler/xla:util",
835        "//tensorflow/compiler/xla:xla_data_proto_cc",
836        "//tensorflow/compiler/xla/service:buffer_assignment",
837        "//tensorflow/compiler/xla/service:hlo",
838        "//tensorflow/core:lib_proto_parsing",
839        "//tensorflow/core:portable_gif_internal",
840        "//tensorflow/core/platform:stream_executor_no_cuda",
841        "//tensorflow/stream_executor:device_memory",
842        "//tensorflow/stream_executor:scratch_allocator",
843        "//tensorflow/stream_executor:stream_header",
844        "//tensorflow/stream_executor/gpu:gpu_asm_opts",
845        "@com_google_absl//absl/base",
846        "@com_google_absl//absl/container:flat_hash_map",
847        "@com_google_absl//absl/strings",
848        "@com_google_absl//absl/strings:str_format",
849        "@com_google_absl//absl/types:optional",
850    ],
851)
852
853cc_library(
854    name = "gemm_rewriter",
855    srcs = ["gemm_rewriter.cc"],
856    hdrs = ["gemm_rewriter.h"],
857    deps = [
858        ":backend_configs_cc",
859        ":cublas_cudnn",
860        ":ir_emission_utils",
861        "//tensorflow/compiler/xla:status_macros",
862        "//tensorflow/compiler/xla:statusor",
863        "//tensorflow/compiler/xla:xla_data_proto_cc",
864        "//tensorflow/compiler/xla/service:hlo",
865        "//tensorflow/compiler/xla/service:hlo_creation_utils",
866        "//tensorflow/compiler/xla/service:hlo_evaluator",
867        "//tensorflow/compiler/xla/service:hlo_pass",
868        "//tensorflow/compiler/xla/service:pattern_matcher",
869        "//tensorflow/core:lib",
870        "//tensorflow/stream_executor/lib",
871        "@com_google_absl//absl/algorithm:container",
872    ],
873)
874
875cc_library(
876    name = "gemm_thunk",
877    srcs = ["gemm_thunk.cc"],
878    hdrs = ["gemm_thunk.h"],
879    deps = [
880        ":matmul_utils",
881        ":thunk",
882        "//tensorflow/compiler/xla:status",
883        "//tensorflow/compiler/xla/service:buffer_assignment",
884        "//tensorflow/core:tflite_portable_logging",
885        "//tensorflow/stream_executor:device_memory",
886        "//tensorflow/stream_executor:stream_header",
887    ],
888)
889
890cc_library(
891    name = "cublas_lt_matmul_thunk",
892    srcs = if_cuda_is_configured(["cublas_lt_matmul_thunk.cc"]),
893    hdrs = if_cuda_is_configured(["cublas_lt_matmul_thunk.h"]),
894    deps = if_cuda_is_configured([
895        ":matmul_utils",
896        ":thunk",
897        "//tensorflow/compiler/xla/service:buffer_assignment",
898        "//tensorflow/compiler/xla:status",
899        "//tensorflow/core:tflite_portable_logging",
900        "//tensorflow/tsl/platform/default/build_config:cublas_plugin",
901        "//tensorflow/stream_executor:device_memory",
902        "//tensorflow/stream_executor:stream_header",
903        "//tensorflow/stream_executor/cuda:cublas_lt_header",
904    ]),
905)
906
907cc_library(
908    name = "gemm_algorithm_picker",
909    srcs = if_cuda_is_configured(["gemm_algorithm_picker.cc"]),
910    hdrs = if_cuda_is_configured(["gemm_algorithm_picker.h"]),
911    deps = if_cuda_is_configured([
912        ":backend_configs_cc",
913        ":buffer_comparator",
914        ":gemm_thunk",
915        ":gpu_asm_opts_util",
916        ":gpu_conv_runner",
917        ":ir_emission_utils",
918        ":matmul_utils",
919        ":stream_executor_util",
920        "//tensorflow/compiler/xla:status_macros",
921        "//tensorflow/compiler/xla:util",
922        "//tensorflow/compiler/xla/service:hlo",
923        "//tensorflow/compiler/xla/service:hlo_pass",
924        "//tensorflow/core:lib",
925        "//tensorflow/core/platform:stream_executor_no_cuda",
926        "//tensorflow/core/protobuf:autotuning_proto_cc",
927        "//tensorflow/core/util/proto:proto_utils",
928        "//tensorflow/stream_executor:blas",
929        "//tensorflow/tsl/platform/default/build_config:cublas_plugin",
930        "//tensorflow/stream_executor/cuda:cublas_lt_header",
931        "//tensorflow/stream_executor:device_memory",
932        "//tensorflow/stream_executor:device_memory_allocator",
933        "//tensorflow/stream_executor/gpu:redzone_allocator",
934    ]),
935)
936
937cc_library(
938    name = "matmul_utils",
939    srcs = ["matmul_utils.cc"],
940    hdrs = ["matmul_utils.h"],
941    defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]),
942    deps = [
943        ":backend_configs_cc",
944        ":ir_emission_utils",
945        "@com_google_absl//absl/algorithm:container",
946        "@com_google_absl//absl/types:span",
947        "//tensorflow/compiler/xla/mlir_hlo",
948        "//tensorflow/compiler/xla/mlir_hlo:lhlo_gpu",
949        "//tensorflow/compiler/xla:shape_util",
950        "//tensorflow/compiler/xla:status_macros",
951        "//tensorflow/compiler/xla:statusor",
952        "//tensorflow/compiler/xla:types",
953        "//tensorflow/compiler/xla:util",
954        "//tensorflow/compiler/xla:xla_data_proto_cc",
955        "//tensorflow/compiler/xla/service:hlo",
956        "//tensorflow/core/platform:statusor",
957        "//tensorflow/stream_executor:stream_header",
958    ] + if_cuda_is_configured([
959        "//tensorflow/stream_executor/cuda:cublas_lt_header",
960        "//tensorflow/tsl/platform/default/build_config:cublas_plugin",
961        "//tensorflow/stream_executor:host_or_device_scalar",
962        "//tensorflow/stream_executor:scratch_allocator",
963    ]),
964)
965
966tf_cc_test(
967    name = "matmul_utils_test",
968    srcs = ["matmul_utils_test.cc"],
969    deps = [
970        ":matmul_utils",
971        "//tensorflow/compiler/xla:test",
972        "//tensorflow/compiler/xla/service:hlo_parser",
973        "//tensorflow/compiler/xla/tests:xla_internal_test_main",  # build_cleaner: keep
974        "//tensorflow/core/platform:status_matchers",
975        "@com_google_absl//absl/strings",
976    ],
977)
978
979cc_library(
980    name = "gpu_conv_algorithm_picker",
981    srcs = ["gpu_conv_algorithm_picker.cc"],
982    hdrs = ["gpu_conv_algorithm_picker.h"],
983    copts = if_cuda_is_configured(["-DGOOGLE_CUDA=1"]),
984    deps = [
985        ":backend_configs_cc",
986        ":gpu_asm_opts_util",
987        ":gpu_autotuning_proto_cc",
988        ":gpu_conv_runner",
989        ":gpu_executable",
990        ":hlo_algorithm_denylist",
991        ":ir_emission_utils",
992        ":stream_executor_util",
993        "@com_google_absl//absl/algorithm:container",
994        "@com_google_absl//absl/strings",
995        "@com_google_absl//absl/strings:str_format",
996        "@com_google_absl//absl/time",
997        "//tensorflow/compiler/xla:literal_util",
998        "//tensorflow/compiler/xla:status_macros",
999        "//tensorflow/compiler/xla:util",
1000        "//tensorflow/compiler/xla/service:compiler",
1001        "//tensorflow/compiler/xla/service:hlo",
1002        "//tensorflow/compiler/xla/service:hlo_pass",
1003        "//tensorflow/compiler/xla:xla_data_proto_cc",
1004        "//tensorflow/core/protobuf:autotuning_proto_cc",
1005        "//tensorflow/core:lib",
1006        "//tensorflow/core:lib_internal",
1007        "//tensorflow/core/platform:stream_executor_no_cuda",
1008        "//tensorflow/core/util/proto:proto_utils",
1009        "//tensorflow/stream_executor:device_memory_allocator",
1010        "//tensorflow/compiler/xla/stream_executor:dnn_proto_cc",
1011    ] + if_cuda_is_configured([
1012        ":buffer_comparator",
1013        "@local_config_cuda//cuda:cudnn_header",
1014        "//tensorflow/stream_executor/gpu:redzone_allocator",
1015    ]),
1016)
1017
1018cc_library(
1019    name = "gpu_conv_runner",
1020    srcs = ["gpu_conv_runner.cc"],
1021    hdrs = ["gpu_conv_runner.h"],
1022    deps = [
1023        ":backend_configs_cc",
1024        ":cublas_cudnn",
1025        ":stream_executor_util",
1026        "//tensorflow/compiler/xla:shape_util",
1027        "//tensorflow/compiler/xla:status",
1028        "//tensorflow/compiler/xla:status_macros",
1029        "//tensorflow/compiler/xla:statusor",
1030        "//tensorflow/compiler/xla:types",
1031        "//tensorflow/compiler/xla:util",
1032        "//tensorflow/compiler/xla:xla_data_proto_cc",
1033        "//tensorflow/compiler/xla/service:hlo",
1034        "//tensorflow/compiler/xla/stream_executor:lazy_op_runner",
1035        "//tensorflow/core/platform:stream_executor_no_cuda",
1036        "//tensorflow/stream_executor:dnn",
1037        "@com_google_absl//absl/strings",
1038    ],
1039)
1040
1041cc_library(
1042    name = "gpu_conv_rewriter",
1043    srcs = ["gpu_conv_rewriter.cc"],
1044    hdrs = ["gpu_conv_rewriter.h"],
1045    deps = [
1046        ":backend_configs_cc",
1047        ":cublas_cudnn",
1048        "//tensorflow/compiler/xla:literal",
1049        "//tensorflow/compiler/xla:permutation_util",
1050        "//tensorflow/compiler/xla:util",
1051        "//tensorflow/compiler/xla:window_util",
1052        "//tensorflow/compiler/xla:xla_data_proto_cc",
1053        "//tensorflow/compiler/xla/service:hlo",
1054        "//tensorflow/compiler/xla/service:hlo_pass",
1055        "//tensorflow/core:lib",
1056    ],
1057)
1058
1059tf_cc_test(
1060    name = "gpu_conv_rewriter_test",
1061    srcs = ["gpu_conv_rewriter_test.cc"],
1062    tags = tf_cuda_tests_tags(),
1063    deps = [
1064        ":cublas_cudnn",
1065        ":gpu_conv_rewriter",
1066        "//tensorflow/compiler/jit:xla_gpu_jit",
1067        "//tensorflow/compiler/xla:protobuf_util",
1068        "//tensorflow/compiler/xla:test",
1069        "//tensorflow/compiler/xla:test_helpers",
1070        "//tensorflow/compiler/xla/service:hlo",
1071        "//tensorflow/compiler/xla/service:hlo_matchers",
1072        "//tensorflow/compiler/xla/service:shape_inference",
1073        "//tensorflow/compiler/xla/tests:hlo_test_base",
1074        "//tensorflow/compiler/xla/tests:xla_internal_test_main",  # fixdeps: keep
1075        "//tensorflow/core:test",
1076    ],
1077)
1078
1079cc_library(
1080    name = "cusolver_context",
1081    srcs = if_gpu_is_configured(["cusolver_context.cc"]),
1082    hdrs = if_gpu_is_configured(["cusolver_context.h"]),
1083    deps = [
1084        "//tensorflow/compiler/xla:comparison_util",
1085        "//tensorflow/compiler/xla:statusor",
1086        "//tensorflow/compiler/xla:types",
1087        "//tensorflow/compiler/xla:util",
1088        "//tensorflow/core:lib",
1089        "//tensorflow/core/platform:stream_executor_no_cuda",
1090        "//tensorflow/stream_executor:blas",
1091    ] + if_cuda_is_configured([
1092        "@local_config_cuda//cuda:cuda_headers",
1093        "//tensorflow/stream_executor/cuda:cusolver_lib",
1094    ]) + if_rocm_is_configured([
1095        "@local_config_rocm//rocm:rocm_headers",
1096        "//tensorflow/stream_executor/rocm:rocblas_wrapper",
1097        "//tensorflow/stream_executor/rocm:rocsolver_wrapper",
1098        "//tensorflow/stream_executor/rocm:hipsolver_wrapper",
1099    ]),
1100)
1101
1102cc_library(
1103    name = "cusolver_rewriter",
1104    srcs = if_gpu_is_configured(["cusolver_rewriter.cc"]),
1105    hdrs = if_gpu_is_configured(["cusolver_rewriter.h"]),
1106    deps = if_gpu_is_configured([
1107        ":cusolver_context",
1108        ":ir_emission_utils",
1109        "//tensorflow/compiler/xla:literal",
1110        "//tensorflow/compiler/xla:literal_util",
1111        "//tensorflow/compiler/xla:util",
1112        "//tensorflow/compiler/xla:xla_data_proto_cc",
1113        "//tensorflow/compiler/xla/service:hlo",
1114        "//tensorflow/compiler/xla/service:hlo_pass",
1115        "//tensorflow/core:lib",
1116        "//tensorflow/core/platform:stream_executor_no_cuda",
1117        "//tensorflow/stream_executor:blas",
1118        "//tensorflow/stream_executor:device_memory_allocator",
1119        "@com_google_absl//absl/algorithm:container",
1120    ]),
1121)
1122
1123cc_library(
1124    name = "instruction_fusion",
1125    srcs = ["instruction_fusion.cc"],
1126    hdrs = ["instruction_fusion.h"],
1127    deps = [
1128        ":gpu_fusible",
1129        ":ir_emission_utils",
1130        "//tensorflow/compiler/xla:shape_util",
1131        "//tensorflow/compiler/xla:xla_data_proto_cc",
1132        "//tensorflow/compiler/xla/service:fusion_node_indexing_evaluation",
1133        "//tensorflow/compiler/xla/service:hlo",
1134        "//tensorflow/compiler/xla/service:hlo_query",
1135        "//tensorflow/compiler/xla/service:instruction_fusion",
1136        "//tensorflow/compiler/xla/service:pattern_matcher",
1137        "//tensorflow/compiler/xla/service/llvm_ir:fused_ir_emitter",
1138        "@com_google_absl//absl/container:flat_hash_map",
1139        "@com_google_absl//absl/container:flat_hash_set",
1140    ],
1141)
1142
1143tf_cc_test(
1144    name = "instruction_fusion_test",
1145    srcs = ["instruction_fusion_test.cc"],
1146    tags = ["no_pip"],
1147    deps = [
1148        ":gpu_fusible",
1149        ":instruction_fusion",
1150        "//tensorflow/compiler/xla:status_macros",
1151        "//tensorflow/compiler/xla:util",
1152        "//tensorflow/compiler/xla/service:hlo",
1153        "//tensorflow/compiler/xla/service:hlo_matchers",
1154        "//tensorflow/compiler/xla/service:hlo_parser",
1155        "//tensorflow/compiler/xla/tests:hlo_test_base",
1156        "//tensorflow/compiler/xla/tests:test_utils",
1157        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
1158    ],
1159)
1160
1161cc_library(
1162    name = "multi_output_fusion",
1163    srcs = ["multi_output_fusion.cc"],
1164    hdrs = ["multi_output_fusion.h"],
1165    deps = [
1166        ":gpu_fusible",
1167        ":instruction_fusion",
1168        ":ir_emission_utils",
1169        "//tensorflow/compiler/xla:debug_options_flags",
1170        "//tensorflow/compiler/xla:shape_util",
1171        "//tensorflow/compiler/xla:statusor",
1172        "//tensorflow/compiler/xla/service:hlo",
1173        "//tensorflow/compiler/xla/service:hlo_graph_dumper",
1174        "//tensorflow/compiler/xla/service:hlo_pass",
1175        "//tensorflow/compiler/xla/service:hlo_reachability",
1176        "//tensorflow/compiler/xla/service/llvm_ir:fused_ir_emitter",
1177        "//tensorflow/core:lib",
1178        "@com_google_absl//absl/algorithm:container",
1179        "@com_google_absl//absl/container:flat_hash_map",
1180        "@com_google_absl//absl/container:flat_hash_set",
1181        "@com_google_absl//absl/strings",
1182    ],
1183)
1184
1185tf_cc_test(
1186    name = "multi_output_fusion_test",
1187    srcs = ["multi_output_fusion_test.cc"],
1188    tags = ["no_pip"],
1189    deps = [
1190        ":gpu_fusible",
1191        ":instruction_fusion",
1192        ":multi_output_fusion",
1193        "//tensorflow/compiler/xla:status_macros",
1194        "//tensorflow/compiler/xla:util",
1195        "//tensorflow/compiler/xla/service:hlo",
1196        "//tensorflow/compiler/xla/service:hlo_matchers",
1197        "//tensorflow/compiler/xla/service:hlo_parser",
1198        "//tensorflow/compiler/xla/tests:hlo_test_base",
1199        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
1200        "//tensorflow/core:lib",
1201        "@com_google_absl//absl/strings",
1202    ],
1203)
1204
1205cc_library(
1206    name = "gpu_sanitize_constant_names",
1207    srcs = ["gpu_sanitize_constant_names.cc"],
1208    hdrs = ["gpu_sanitize_constant_names.h"],
1209    deps = [
1210        "//tensorflow/compiler/xla/service:hlo",
1211        "//tensorflow/compiler/xla/service:hlo_pass",
1212        "//tensorflow/compiler/xla/service/llvm_ir:buffer_assignment_util",
1213        "//tensorflow/core:lib",
1214    ],
1215)
1216
1217tf_cc_test(
1218    name = "gpu_sanitize_constant_names_test",
1219    srcs = ["gpu_sanitize_constant_names_test.cc"],
1220    tags = tf_cuda_tests_tags(),
1221    deps = [
1222        ":gpu_sanitize_constant_names",
1223        ":ir_emission_utils",
1224        "//tensorflow/compiler/xla:shape_layout",
1225        "//tensorflow/compiler/xla:shape_util",
1226        "//tensorflow/compiler/xla:status_macros",
1227        "//tensorflow/compiler/xla:test_helpers",
1228        "//tensorflow/compiler/xla:util",
1229        "//tensorflow/compiler/xla:xla_data_proto_cc",
1230        "//tensorflow/compiler/xla/service:computation_layout",
1231        "//tensorflow/compiler/xla/service:hlo",
1232        "//tensorflow/compiler/xla/service:hlo_matchers",
1233        "//tensorflow/compiler/xla/service:hlo_module_config",
1234        "//tensorflow/compiler/xla/service:hlo_parser",
1235        "//tensorflow/compiler/xla/tests:hlo_test_base",
1236        "//tensorflow/compiler/xla/tests:test_utils",
1237        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
1238        "//tensorflow/core:test",
1239        "@com_google_absl//absl/strings",
1240    ],
1241)
1242
1243cc_library(
1244    name = "fusion_bitcast_lift",
1245    srcs = ["fusion_bitcast_lift.cc"],
1246    hdrs = ["fusion_bitcast_lift.h"],
1247    deps = [
1248        "//tensorflow/compiler/xla:shape_util",
1249        "//tensorflow/compiler/xla/service:hlo",
1250        "//tensorflow/compiler/xla/service:hlo_dce",
1251        "//tensorflow/compiler/xla/service:hlo_pass",
1252        "//tensorflow/compiler/xla/service:hlo_verifier",
1253        "//tensorflow/core/platform:errors",
1254        "@com_google_absl//absl/types:span",
1255    ],
1256)
1257
1258tf_cc_test(
1259    name = "fusion_bitcast_lift_test",
1260    srcs = ["fusion_bitcast_lift_test.cc"],
1261    tags = ["no_pip"],
1262    deps = [
1263        ":fusion_bitcast_lift",
1264        "//tensorflow/compiler/xla/service:hlo_dce",
1265        "//tensorflow/compiler/xla/service:hlo_parser",
1266        "//tensorflow/compiler/xla/tests:filecheck",
1267        "//tensorflow/compiler/xla/tests:hlo_test_base",
1268        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
1269        "@com_google_absl//absl/types:span",
1270    ],
1271)
1272
1273cc_library(
1274    name = "fusion_merger",
1275    srcs = ["fusion_merger.cc"],
1276    hdrs = ["fusion_merger.h"],
1277    deps = [
1278        ":gpu_fusible",
1279        ":instruction_fusion",
1280        "//tensorflow/compiler/xla:shape_util",
1281        "//tensorflow/compiler/xla:util",
1282        "//tensorflow/compiler/xla/service:hlo",
1283        "//tensorflow/compiler/xla/service:hlo_cost_analysis",
1284        "//tensorflow/compiler/xla/service:hlo_graph_dumper",
1285        "//tensorflow/compiler/xla/service:hlo_pass",
1286        "//tensorflow/compiler/xla/service/llvm_ir:fused_ir_emitter",
1287        "//tensorflow/core:lib",
1288        "@com_google_absl//absl/algorithm:container",
1289        "@com_google_absl//absl/strings",
1290    ],
1291)
1292
1293tf_cc_test(
1294    name = "fusion_merger_test",
1295    srcs = ["fusion_merger_test.cc"],
1296    tags = ["no_pip"],
1297    deps = [
1298        ":fusion_merger",
1299        ":gpu_fusible",
1300        ":instruction_fusion",
1301        "//tensorflow/compiler/xla:test_helpers",
1302        "//tensorflow/compiler/xla/service:hlo_matchers",
1303        "//tensorflow/compiler/xla/service:hlo_parser",
1304        "//tensorflow/compiler/xla/tests:hlo_test_base",
1305        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
1306        "@com_google_absl//absl/types:span",
1307    ],
1308)
1309
1310cc_library(
1311    name = "gpu_conv_padding_legalization",
1312    srcs = ["gpu_conv_padding_legalization.cc"],
1313    hdrs = ["gpu_conv_padding_legalization.h"],
1314    deps = [
1315        ":cublas_cudnn",
1316        "//tensorflow/compiler/xla:literal",
1317        "//tensorflow/compiler/xla:literal_util",
1318        "//tensorflow/compiler/xla:util",
1319        "//tensorflow/compiler/xla:window_util",
1320        "//tensorflow/compiler/xla:xla_data_proto_cc",
1321        "//tensorflow/compiler/xla/service:hlo",
1322        "//tensorflow/compiler/xla/service:hlo_creation_utils",
1323        "//tensorflow/compiler/xla/service:hlo_pass",
1324        "//tensorflow/compiler/xla/service:shape_inference",
1325        "@com_google_absl//absl/memory",
1326    ],
1327)
1328
1329tf_cc_test(
1330    name = "gpu_conv_padding_legalization_test",
1331    srcs = ["gpu_conv_padding_legalization_test.cc"],
1332    tags = tf_cuda_tests_tags(),
1333    deps = [
1334        ":cublas_cudnn",
1335        ":gpu_conv_padding_legalization",
1336        "//tensorflow/compiler/xla:shape_util",
1337        "//tensorflow/compiler/xla:test",
1338        "//tensorflow/compiler/xla:xla_data_proto_cc",
1339        "//tensorflow/compiler/xla/service:hlo",
1340        "//tensorflow/compiler/xla/service:hlo_matchers",
1341        "//tensorflow/compiler/xla/tests:hlo_test_base",
1342        "//tensorflow/compiler/xla/tests:xla_internal_test_main",  # fixdeps: keep
1343        "//tensorflow/core:test",
1344    ],
1345)
1346
1347cc_library(
1348    name = "cudnn_support_utils",
1349    srcs = ["cudnn_support_utils.cc"],
1350    hdrs = ["cudnn_support_utils.h"],
1351    deps = [
1352        ":cublas_cudnn",
1353        "//tensorflow/compiler/xla:comparison_util",
1354        "//tensorflow/compiler/xla:util",
1355        "//tensorflow/compiler/xla:window_util",
1356        "//tensorflow/compiler/xla/service:hlo",
1357        "//tensorflow/core/platform:status",
1358        "//tensorflow/stream_executor:stream_header",
1359    ],
1360)
1361
1362tf_cc_test(
1363    name = "cudnn_support_utils_test",
1364    srcs = ["cudnn_support_utils_test.cc"],
1365    tags = tf_cuda_tests_tags(),
1366    deps = [
1367        ":cudnn_support_utils",
1368        "//tensorflow/compiler/xla:status_macros",
1369        "//tensorflow/compiler/xla:test",
1370        "//tensorflow/compiler/xla:util",
1371        "//tensorflow/compiler/xla/service:hlo",
1372        "//tensorflow/compiler/xla/service:hlo_parser",
1373        "//tensorflow/compiler/xla/service:pattern_matcher",
1374        "//tensorflow/compiler/xla/service:pattern_matcher_gmock",
1375        "//tensorflow/compiler/xla/tests:hlo_test_base",
1376        "//tensorflow/compiler/xla/tests:verified_hlo_module",
1377        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
1378        "//tensorflow/core/platform:errors",
1379        "//tensorflow/core/platform:status",
1380        "//tensorflow/core/platform:status_matchers",
1381        "//tensorflow/stream_executor:device_description",
1382        "//tensorflow/stream_executor:stream_header",
1383        "@com_google_absl//absl/status",
1384        "@com_google_absl//absl/strings",
1385    ],
1386)
1387
1388cc_library(
1389    name = "cudnn_pad_for_convolutions",
1390    srcs = ["cudnn_pad_for_convolutions.cc"],
1391    hdrs = ["cudnn_pad_for_convolutions.h"],
1392    deps = [
1393        ":cudnn_support_utils",
1394        ":ir_emission_utils",
1395        ":stream_executor_util",
1396        "//tensorflow/compiler/xla:literal_util",
1397        "//tensorflow/compiler/xla:util",
1398        "//tensorflow/compiler/xla:window_util",
1399        "//tensorflow/compiler/xla/service:hlo",
1400        "//tensorflow/compiler/xla/service:hlo_pass",
1401        "//tensorflow/core/platform:status",
1402        "//tensorflow/stream_executor:stream_header",
1403        "@com_google_absl//absl/functional:bind_front",
1404    ],
1405)
1406
1407tf_cc_test(
1408    name = "cudnn_pad_for_convolutions_test",
1409    srcs = ["cudnn_pad_for_convolutions_test.cc"],
1410    tags = tf_cuda_tests_tags(),
1411    deps = [
1412        ":cublas_cudnn",
1413        ":cudnn_pad_for_convolutions",
1414        "//tensorflow/compiler/xla:status_macros",
1415        "//tensorflow/compiler/xla:test",
1416        "//tensorflow/compiler/xla:util",
1417        "//tensorflow/compiler/xla/service:hlo_parser",
1418        "//tensorflow/compiler/xla/service:pattern_matcher",
1419        "//tensorflow/compiler/xla/service:pattern_matcher_gmock",
1420        "//tensorflow/compiler/xla/tests:hlo_test_base",
1421        "//tensorflow/compiler/xla/tests:xla_internal_test_main",  # build_cleaner: keep
1422        "//tensorflow/core:test",
1423    ],
1424)
1425
1426cc_library(
1427    name = "cudnn_vectorize_convolutions",
1428    srcs = ["cudnn_vectorize_convolutions.cc"],
1429    hdrs = ["cudnn_vectorize_convolutions.h"],
1430    deps = [
1431        ":cudnn_support_utils",
1432        ":stream_executor_util",
1433        "//tensorflow/compiler/xla:statusor",
1434        "//tensorflow/compiler/xla/client:xla_builder",
1435        "//tensorflow/compiler/xla/service:call_inliner",
1436        "//tensorflow/compiler/xla/service:hlo",
1437        "//tensorflow/compiler/xla/service:hlo_pass",
1438    ],
1439)
1440
1441tf_cc_test(
1442    name = "cudnn_vectorize_convolutions_test",
1443    srcs = ["cudnn_vectorize_convolutions_test.cc"],
1444    tags = tf_cuda_tests_tags(),
1445    deps = [
1446        ":cublas_cudnn",
1447        ":cudnn_vectorize_convolutions",
1448        "//tensorflow/compiler/xla:status_macros",
1449        "//tensorflow/compiler/xla:util",
1450        "//tensorflow/compiler/xla/service:call_inliner",
1451        "//tensorflow/compiler/xla/service:hlo_parser",
1452        "//tensorflow/compiler/xla/service:pattern_matcher",
1453        "//tensorflow/compiler/xla/service:pattern_matcher_gmock",
1454        "//tensorflow/compiler/xla/tests:hlo_test_base",
1455        "//tensorflow/compiler/xla/tests:xla_internal_test_main",  # build_cleaner: keep
1456        "//tensorflow/core/platform:statusor",
1457    ],
1458)
1459
1460cc_library(
1461    name = "cudnn_simplify_padding",
1462    srcs = ["cudnn_simplify_padding.cc"],
1463    hdrs = ["cudnn_simplify_padding.h"],
1464    deps = [
1465        ":cublas_cudnn",
1466        "//tensorflow/compiler/xla:statusor",
1467        "//tensorflow/compiler/xla:xla_data_proto_cc",
1468        "//tensorflow/compiler/xla/service:hlo_creation_utils",
1469        "//tensorflow/compiler/xla/service:hlo_pass",
1470        "//tensorflow/compiler/xla/service:pattern_matcher",
1471    ],
1472)
1473
1474tf_cc_test(
1475    name = "cudnn_simplify_padding_test",
1476    srcs = ["cudnn_simplify_padding_test.cc"],
1477    tags = tf_cuda_tests_tags(),
1478    deps = [
1479        ":cublas_cudnn",
1480        ":cudnn_pad_for_convolutions",
1481        ":cudnn_simplify_padding",
1482        ":cudnn_vectorize_convolutions",
1483        "//tensorflow/compiler/xla:status_macros",
1484        "//tensorflow/compiler/xla:util",
1485        "//tensorflow/compiler/xla/service:algebraic_simplifier",
1486        "//tensorflow/compiler/xla/service:call_inliner",
1487        "//tensorflow/compiler/xla/service:hlo_pass",
1488        "//tensorflow/compiler/xla/service:pattern_matcher",
1489        "//tensorflow/compiler/xla/service:pattern_matcher_gmock",
1490        "//tensorflow/compiler/xla/service:tuple_simplifier",
1491        "//tensorflow/compiler/xla/tests:hlo_test_base",
1492        "//tensorflow/compiler/xla/tests:xla_internal_test_main",  # build_cleaner: keep
1493        "//tensorflow/core:test",
1494        "//tensorflow/core/platform:statusor",
1495    ],
1496)
1497
1498cc_library(
1499    name = "cublas_pad_for_gemms",
1500    srcs = ["cublas_pad_for_gemms.cc"],
1501    hdrs = ["cublas_pad_for_gemms.h"],
1502    deps = [
1503        ":ir_emission_utils",
1504        "//tensorflow/compiler/xla:literal_util",
1505        "//tensorflow/compiler/xla:util",
1506        "//tensorflow/compiler/xla:window_util",
1507        "//tensorflow/compiler/xla/service:hlo",
1508        "//tensorflow/compiler/xla/service:hlo_pass",
1509    ],
1510)
1511
1512tf_cc_test(
1513    name = "cublas_pad_for_gemms_test",
1514    srcs = ["cublas_pad_for_gemms_test.cc"],
1515    tags = ["no_pip"],
1516    deps = [
1517        ":cublas_pad_for_gemms",
1518        ":ir_emission_utils",
1519        "//tensorflow/compiler/xla:status_macros",
1520        "//tensorflow/compiler/xla:util",
1521        "//tensorflow/compiler/xla/service:hlo_matchers",
1522        "//tensorflow/compiler/xla/service:hlo_parser",
1523        "//tensorflow/compiler/xla/tests:hlo_test_base",
1524        "//tensorflow/compiler/xla/tests:test_utils",
1525        "//tensorflow/compiler/xla/tests:xla_internal_test_main",  # build_cleaner: keep
1526    ],
1527)
1528
1529cc_library(
1530    name = "target_constants",
1531    hdrs = ["target_constants.h"],
1532)
1533
1534cc_library(
1535    name = "gpu_transfer_manager",
1536    srcs = ["gpu_transfer_manager.cc"],
1537    hdrs = ["gpu_transfer_manager.h"],
1538    deps = [
1539        ":io_feed_manager",
1540        ":target_constants",
1541        "//tensorflow/compiler/xla:literal",
1542        "//tensorflow/compiler/xla:literal_util",
1543        "//tensorflow/compiler/xla:shape_tree",
1544        "//tensorflow/compiler/xla:shape_util",
1545        "//tensorflow/compiler/xla:status_macros",
1546        "//tensorflow/compiler/xla:statusor",
1547        "//tensorflow/compiler/xla:types",
1548        "//tensorflow/compiler/xla:util",
1549        "//tensorflow/compiler/xla:xla_data_proto_cc",
1550        "//tensorflow/compiler/xla/service:compiler",
1551        "//tensorflow/compiler/xla/service:generic_transfer_manager",
1552        "//tensorflow/compiler/xla/service:transfer_manager",
1553        "//tensorflow/core:lib",
1554        "//tensorflow/core/platform:stream_executor_no_cuda",
1555        "//tensorflow/stream_executor:stream_header",
1556        "@com_google_absl//absl/cleanup",
1557        "@com_google_absl//absl/memory",
1558        "@llvm-project//llvm:Core",
1559    ],
1560    alwayslink = True,  # Contains per-platform transfer manager registration
1561)
1562
1563cc_library(
1564    name = "gpu_reduce_scatter_creator",
1565    srcs = ["gpu_reduce_scatter_creator.cc"],
1566    hdrs = ["gpu_reduce_scatter_creator.h"],
1567    deps = [
1568        "//tensorflow/compiler/xla/service:hlo",
1569        "//tensorflow/compiler/xla/service:hlo_pass",
1570        "//tensorflow/compiler/xla/service:hlo_query",
1571        "//tensorflow/compiler/xla/service:reduce_scatter_utils",
1572    ],
1573)
1574
1575cc_library(
1576    name = "gpu_compiler",
1577    srcs = [
1578        "gpu_compiler.cc",
1579    ],
1580    hdrs = [
1581        "gpu_compiler.h",
1582    ],
1583    local_defines = select({
1584        ":is_xlir_enabled": ["XLA_ENABLE_XLIR=1"],
1585        "//conditions:default": [],
1586    }),
1587    deps = [
1588        ":alias_passthrough_params",
1589        ":all_reduce_blueconnect",
1590        ":fusion_bitcast_lift",
1591        ":fusion_merger",
1592        ":gemm_broadcast_folding_rewriter",
1593        ":gemm_rewriter",
1594        ":gpu_constants",
1595        ":gpu_conv_algorithm_picker",
1596        ":gpu_conv_rewriter",
1597        ":gpu_device_info",
1598        ":gpu_executable",
1599        ":gpu_hlo_schedule",
1600        ":gpu_layout_assignment",
1601        ":gpu_reduce_scatter_creator",
1602        ":gpu_sanitize_constant_names",
1603        ":gpu_scatter_expander",
1604        ":gpu_shape_verifier",
1605        ":matmul_utils",
1606        "@llvm-project//mlir:FuncDialect",
1607        "//tensorflow/compiler/xla/service/spmd:stateful_rng_spmd_partitioner",
1608        ":gpu_hlo_cost_analysis",
1609        ":horizontal_input_fusion",
1610        ":horizontal_loop_fusion",
1611        ":instruction_fusion",
1612        ":ir_emission_utils",
1613        ":ir_emitter",
1614        ":launch_dimensions",
1615        ":metrics",
1616        ":multi_output_fusion",
1617        ":nccl_collective_thunks",
1618        ":reduction_degenerate_dim_remover",
1619        ":reduction_dimension_grouper",
1620        ":reduction_layout_normalizer",
1621        ":reduction_splitter",
1622        ":hlo_fusion_stats",
1623        ":stream_executor_util",
1624        ":target_constants",
1625        ":tree_reduction_rewriter",
1626        ":variadic_op_splitter",
1627        "//tensorflow/compiler/xla/service:gather_simplifier",
1628        "//tensorflow/compiler/xla/service:layout_normalization",
1629        "@com_google_absl//absl/memory",
1630        "@com_google_absl//absl/strings",
1631        "@com_google_absl//absl/types:variant",
1632        "@llvm-project//llvm:AsmParser",
1633        "@llvm-project//llvm:BitReader",
1634        "@llvm-project//llvm:BitWriter",
1635        "@llvm-project//llvm:Core",
1636        "@llvm-project//llvm:TransformUtils",
1637        "@llvm-project//mlir:AllPassesAndDialects",
1638        "@llvm-project//mlir:ArithmeticDialect",
1639        "@llvm-project//mlir:GPUTransforms",
1640        "@llvm-project//mlir:IR",
1641        "@llvm-project//mlir:Pass",
1642        "@llvm-project//mlir:Transforms",
1643        "//tensorflow/compiler/mlir:name_utils",
1644        "//tensorflow/compiler/xla/mlir_hlo:gpu_fusion_rewrite",
1645        "//tensorflow/compiler/mlir/xla:hlo_utils",
1646        "//tensorflow/compiler/mlir/xla:mhlo_to_lhlo_with_xla",
1647        "//tensorflow/compiler/mlir/xla:type_to_shape",
1648        "//tensorflow/compiler/xla/service:bitcast_dtypes_expander",
1649        "//tensorflow/compiler/xla/service:scatter_simplifier",
1650        "//tensorflow/compiler/xla/service:simplify_fp_conversions",
1651        "//tensorflow/compiler/xla:protobuf_util",
1652        "//tensorflow/compiler/xla:status_macros",
1653        "//tensorflow/compiler/xla:statusor",
1654        "//tensorflow/compiler/xla:types",
1655        "//tensorflow/compiler/xla:util",
1656        "//tensorflow/compiler/xla/service:broadcast_canonicalizer",
1657        "//tensorflow/compiler/xla/service:reduce_decomposer",
1658        "//tensorflow/compiler/xla/service:algebraic_simplifier",
1659        "//tensorflow/compiler/xla/service:all_gather_broadcast_reorder",
1660        "//tensorflow/compiler/xla/service:dynamic_dimension_simplifier",
1661        "//tensorflow/compiler/xla/service:all_gather_combiner",
1662        "//tensorflow/compiler/xla/service:all_gather_decomposer",
1663        "//tensorflow/compiler/xla/service:all_reduce_combiner",
1664        "//tensorflow/compiler/xla/service:all_reduce_contiguous",
1665        "//tensorflow/compiler/xla/service:all_reduce_folder",
1666        "//tensorflow/compiler/xla/service:all_reduce_reassociate",
1667        "//tensorflow/compiler/xla/service:all_to_all_decomposer",
1668        "//tensorflow/compiler/xla/service:async_collective_creator",
1669        "//tensorflow/compiler/xla/service:batchnorm_expander",
1670        "//tensorflow/compiler/xla/service:bfloat16_normalization",
1671        "//tensorflow/compiler/xla/service:bitcast_decomposer",
1672        "//tensorflow/compiler/xla/service:buffer_assignment",
1673        "//tensorflow/compiler/xla/service:call_inliner",
1674        "//tensorflow/compiler/xla/service:collectives_schedule_linearizer",
1675        "//tensorflow/compiler/xla/service:comparison_expander",
1676        "//tensorflow/compiler/xla/service:conditional_canonicalizer",
1677        "//tensorflow/compiler/xla/service:convert_mover",
1678        "//tensorflow/compiler/xla/service:conditional_simplifier",
1679        "//tensorflow/compiler/xla/service:convolution_4d_expander",
1680        "//tensorflow/compiler/xla/service:convolution_pred_expander",
1681        "//tensorflow/compiler/xla/service:copy_insertion",
1682        "//tensorflow/compiler/xla/service:dot_decomposer",
1683        "//tensorflow/compiler/xla/service:dot_merger",
1684        "//tensorflow/compiler/xla/service:dump",
1685        "//tensorflow/compiler/xla/service:dynamic_index_splitter",
1686        "//tensorflow/compiler/xla/service:dynamic_padder",
1687        "//tensorflow/compiler/xla/service:eigh_expander",
1688        "//tensorflow/compiler/xla/service:executable",
1689        "//tensorflow/compiler/xla/service:flatten_call_graph",
1690        "//tensorflow/compiler/xla/service:gather_expander",
1691        "//tensorflow/compiler/xla/service:hlo",
1692        "//tensorflow/compiler/xla/service:hlo_constant_folding",
1693        "//tensorflow/compiler/xla/service:hlo_cse",
1694        "//tensorflow/compiler/xla/service:hlo_dataflow_analysis",
1695        "//tensorflow/compiler/xla/service:hlo_dce",
1696        "//tensorflow/compiler/xla/service:hlo_parser",
1697        "//tensorflow/compiler/xla/service:hlo_pass",
1698        "//tensorflow/compiler/xla/service:hlo_pass_pipeline",
1699        "//tensorflow/compiler/xla/service:hlo_proto_util",
1700        "//tensorflow/compiler/xla/service:hlo_subcomputation_unification",
1701        "//tensorflow/compiler/xla/service:hlo_verifier",
1702        "//tensorflow/compiler/xla/service:llvm_compiler",
1703        "//tensorflow/compiler/xla/service:logistic_expander",
1704        "//tensorflow/compiler/xla/service:loop_schedule_linearizer",
1705        "//tensorflow/compiler/xla/service:optimization_barrier_expander",
1706        "//tensorflow/compiler/xla/service:operand_upcaster",
1707        "//tensorflow/compiler/xla/service:qr_expander",
1708        "//tensorflow/compiler/xla/service:real_imag_expander",
1709        "//tensorflow/compiler/xla/service:reduce_scatter_combiner",
1710        "//tensorflow/compiler/xla/service:reshape_decomposer",
1711        "//tensorflow/compiler/xla/service:reshape_mover",
1712        "//tensorflow/compiler/xla/service:result_caster",
1713        "//tensorflow/compiler/xla/service:rng_bit_generator_expander",
1714        "//tensorflow/compiler/xla/service:rng_expander",
1715        "//tensorflow/compiler/xla/service:scatter_expander",
1716        "//tensorflow/compiler/xla/service:sharding_propagation",
1717        "//tensorflow/compiler/xla/service:sharding_remover",
1718        "//tensorflow/compiler/xla/service:slice_sinker",
1719        "//tensorflow/compiler/xla/service:slow_operation_alarm",
1720        "//tensorflow/compiler/xla/service:sort_simplifier",
1721        "//tensorflow/compiler/xla/service:stable_sort_expander",
1722        "//tensorflow/compiler/xla/service:transpose_folding",
1723        "//tensorflow/compiler/xla/service:tuple_simplifier",
1724        "//tensorflow/compiler/xla/service:while_loop_constant_sinking",
1725        "//tensorflow/compiler/xla/service:while_loop_simplifier",
1726        "//tensorflow/compiler/xla/service:while_loop_trip_count_annotator",
1727        "//tensorflow/compiler/xla/service:zero_sized_hlo_elimination",
1728        "//tensorflow/compiler/xla/service/gpu/llvm_gpu_backend",
1729        "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
1730        "//tensorflow/core:lib",
1731        "//tensorflow/core:lib_internal",
1732        "//tensorflow/core/platform:regexp",
1733        "//tensorflow/core/platform:stream_executor_no_cuda",
1734        "//tensorflow/core/profiler/lib:traceme",
1735        "//tensorflow/stream_executor:stream_executor_headers",
1736        ":runtime_intrinsics",
1737    ] + select({
1738        ":is_xlir_enabled": [
1739            ":jitrt_custom_calls",
1740            "//tensorflow/compiler/xla/runtime:jit_executable",
1741            "//tensorflow/compiler/mlir/tfrt/transforms/lmhlo_to_gpu:pass_utils",
1742            "//tensorflow/compiler/xla/mlir/transforms/runtime:compilation_pipeline",
1743        ],
1744        "//conditions:default": [],
1745    }),
1746)
1747
1748cc_library(
1749    name = "nvptx_compiler",
1750    srcs = if_cuda_is_configured([
1751        "nvptx_compiler_registration.cc",
1752    ]),
1753    deps = if_cuda_is_configured([
1754        ":nvptx_compiler_impl",
1755    ]),
1756    alwayslink = True,  # Contains compiler registration
1757)
1758
1759cc_library(
1760    name = "nvptx_helper",
1761    srcs = ["nvptx_helper.cc"],
1762    hdrs = ["nvptx_helper.h"],
1763    deps = [
1764        "//tensorflow/compiler/xla/service:hlo_module_config",
1765        "//tensorflow/core:lib",
1766        "//tensorflow/core:lib_internal",
1767        "//tensorflow/core/platform:cuda_libdevice_path",
1768        "@com_google_absl//absl/strings",
1769    ],
1770)
1771
1772cc_library(
1773    name = "nvptx_compiler_impl",
1774    srcs = if_cuda_is_configured([
1775        "nvptx_compiler.cc",
1776    ]),
1777    hdrs = if_cuda_is_configured([
1778        "nvptx_compiler.h",
1779    ]),
1780    deps = if_cuda_is_configured([
1781        ":cublas_cudnn",
1782        ":cublas_pad_for_gemms",
1783        ":cudnn_fused_conv_rewriter",
1784        ":cudnn_pad_for_convolutions",
1785        ":cudnn_simplify_padding",
1786        ":cudnn_vectorize_convolutions",
1787        ":cusolver_rewriter",
1788        ":gemm_algorithm_picker",
1789        ":gpu_asm_opts_util",
1790        ":gpu_compiler",
1791        ":gpu_conv_padding_legalization",
1792        ":gpu_conv_rewriter",
1793        ":gpu_executable",
1794        ":gpu_layout_assignment",
1795        ":ir_emission_utils",
1796        ":metrics",
1797        ":nvptx_helper",
1798        ":target_constants",
1799        "@com_google_absl//absl/base",
1800        "@com_google_absl//absl/container:node_hash_map",
1801        "@llvm-project//llvm:IRReader",
1802        "@llvm-project//llvm:Support",
1803        "//tensorflow/compiler/xla/service:algebraic_simplifier",
1804        "//tensorflow/compiler/xla/service:call_inliner",
1805        "//tensorflow/compiler/xla/service:dump",
1806        "//tensorflow/compiler/xla/service/gpu/llvm_gpu_backend",
1807        "//tensorflow/compiler/xla/service:hlo",
1808        "//tensorflow/compiler/xla/service:hlo_constant_folding",
1809        "//tensorflow/compiler/xla/service:hlo_cse",
1810        "//tensorflow/compiler/xla/service:hlo_pass",
1811        "//tensorflow/compiler/xla/service:hlo_pass_pipeline",
1812        "//tensorflow/compiler/xla/service:hlo_proto_cc",
1813        "//tensorflow/compiler/xla/service:hlo_verifier",
1814        "//tensorflow/compiler/xla/service:llvm_compiler",
1815        "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
1816        "//tensorflow/compiler/xla/service:tuple_simplifier",
1817        "//tensorflow/compiler/xla:status_macros",
1818        "//tensorflow/compiler/xla:statusor",
1819        "//tensorflow/compiler/xla:types",
1820        "//tensorflow/compiler/xla:util",
1821        "//tensorflow/core:lib",
1822        "//tensorflow/core:lib_internal",
1823        "//tensorflow/core/platform:cuda_libdevice_path",
1824        "//tensorflow/core/profiler/lib:traceme",
1825        "//tensorflow/stream_executor/cuda:cuda_diagnostics",
1826        "//tensorflow/stream_executor/gpu:asm_compiler",
1827        "//tensorflow/stream_executor/gpu:gpu_driver_header",
1828        "//tensorflow/stream_executor:stream_executor_headers",
1829        ":triangular_solve_rewriter",
1830    ]),
1831)
1832
1833tf_cc_test(
1834    name = "nvptx_compiler_test",
1835    srcs = if_gpu_is_configured([
1836        "nvptx_compiler_test.cc",
1837    ]),
1838    tags = [
1839        "gpu",
1840        "no_rocm",
1841        "nomsan",  # Pulls in precompiled NVIDIA libraries which cause false
1842        # positives in msan.
1843    ],
1844    deps = [
1845        ":nvptx_compiler_impl",
1846        "//tensorflow/compiler/xla:status_macros",
1847        "//tensorflow/compiler/xla:util",
1848        "//tensorflow/compiler/xla/service:buffer_assignment",
1849        "//tensorflow/compiler/xla/service:hlo",
1850        "//tensorflow/compiler/xla/service:hlo_parser",
1851        "//tensorflow/compiler/xla/tests:hlo_test_base",
1852        "//tensorflow/compiler/xla/tests:xla_internal_test_main",  # build_cleaner: keep
1853    ],
1854)
1855
1856# TODO(ezhulenev): This test breaks MacOS build, try to re-enable it later.
1857# copybara:uncomment_begin
1858# tf_cc_test(
1859#     name = "gpu_aot_compilation_test",
1860#     srcs = [
1861#         "gpu_aot_compilation_test.cc",
1862#     ],
1863#     env = {
1864#         "XLA_FLAGS": "--xla_gpu_jitrt_executable",
1865#     },
1866#     tags = [
1867#         "gpu",
1868#         "no_oss",
1869#         "no_rocm",
1870#         "nomsan",  # Pulls in precompiled NVIDIA libraries which cause false positives in msan.
1871#         "requires-gpu-nvidia",
1872#     ],
1873#     deps = [
1874#         ":nvptx_compiler_impl",
1875#         "//tensorflow/compiler/xla/tests:hlo_test_base",
1876#         "//tensorflow/compiler/xla/tests:xla_internal_test_main",  # build_cleaner: keep
1877#     ],
1878# )
1879# copybara:uncomment_end
1880
1881cc_library(
1882    name = "amdgpu_compiler",
1883    srcs = if_rocm_is_configured([
1884        "amdgpu_compiler_registration.cc",
1885    ]),
1886    deps = if_rocm_is_configured([
1887        ":amdgpu_compiler_impl",
1888    ]),
1889    alwayslink = True,  # Contains compiler registration
1890)
1891
1892cc_library(
1893    name = "amdgpu_compiler_impl",
1894    srcs = if_rocm_is_configured([
1895        "amdgpu_compiler.cc",
1896    ]),
1897    hdrs = if_rocm_is_configured([
1898        "amdgpu_compiler.h",
1899    ]),
1900    deps = if_rocm_is_configured([
1901        ":cusolver_rewriter",
1902        ":gemm_rewriter",
1903        ":gpu_compiler",
1904        ":gpu_conv_algorithm_picker",
1905        ":gpu_conv_padding_legalization",
1906        ":gpu_conv_rewriter",
1907        ":gpu_layout_assignment",
1908        ":reduction_degenerate_dim_remover",
1909        ":reduction_dimension_grouper",
1910        ":reduction_layout_normalizer",
1911        ":target_constants",
1912        ":tree_reduction_rewriter",
1913        ":triangular_solve_rewriter",
1914        "//tensorflow/compiler/xla:statusor",
1915        "//tensorflow/compiler/xla/service:algebraic_simplifier",
1916        "//tensorflow/compiler/xla/service:call_inliner",
1917        "//tensorflow/compiler/xla/service:hlo",
1918        "//tensorflow/compiler/xla/service:hlo_constant_folding",
1919        "//tensorflow/compiler/xla/service:hlo_cse",
1920        "//tensorflow/compiler/xla/service:hlo_pass",
1921        "//tensorflow/compiler/xla/service:hlo_pass_pipeline",
1922        "//tensorflow/compiler/xla/service:hlo_verifier",
1923        "//tensorflow/compiler/xla/service:tuple_simplifier",
1924        "//tensorflow/compiler/xla/service/gpu/llvm_gpu_backend",
1925        "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
1926        "//tensorflow/core/platform:rocm_rocdl_path",
1927    ]),
1928)
1929
1930cc_library(
1931    name = "all_reduce_blueconnect",
1932    srcs = ["all_reduce_blueconnect.cc"],
1933    hdrs = ["all_reduce_blueconnect.h"],
1934    deps = [
1935        "//tensorflow/compiler/xla:shape_util",
1936        "//tensorflow/compiler/xla:status_macros",
1937        "//tensorflow/compiler/xla:statusor",
1938        "//tensorflow/compiler/xla/service:hlo",
1939        "//tensorflow/compiler/xla/service:hlo_creation_utils",
1940        "//tensorflow/compiler/xla/service:hlo_pass",
1941        "//tensorflow/compiler/xla/service:hlo_query",
1942        "@com_google_absl//absl/algorithm:container",
1943        "@com_google_absl//absl/container:btree",
1944        "@com_google_absl//absl/types:span",
1945    ],
1946)
1947
1948tf_cc_test(
1949    name = "all_reduce_blueconnect_test",
1950    srcs = ["all_reduce_blueconnect_test.cc"],
1951    deps = [
1952        ":all_reduce_blueconnect",
1953        "//tensorflow/compiler/xla/service:hlo",
1954        "//tensorflow/compiler/xla/service:hlo_matchers",
1955        "//tensorflow/compiler/xla/tests:hlo_test_base",
1956        "//tensorflow/compiler/xla/tests:test_utils",
1957        "//tensorflow/core:test_main",
1958        "//tensorflow/core/platform:status_matchers",
1959    ],
1960)
1961
1962cc_library(
1963    name = "xfeed_queue",
1964    hdrs = ["xfeed_queue.h"],
1965    deps = [
1966        "//tensorflow/core:lib",
1967        "@com_google_absl//absl/base:core_headers",
1968    ],
1969)
1970
1971cc_library(
1972    name = "io_feed_manager",
1973    srcs = [
1974        "infeed_manager.cc",
1975        "outfeed_manager.cc",
1976        "xla_executor_state.h",
1977    ],
1978    hdrs = [
1979        "infeed_manager.h",
1980        "outfeed_manager.h",
1981    ],
1982    copts = if_cuda_is_configured(["-DGOOGLE_CUDA=1"]),
1983    deps = [
1984        ":xfeed_queue",
1985        "//tensorflow/compiler/xla:literal",
1986        "//tensorflow/compiler/xla:shape_tree",
1987        "//tensorflow/compiler/xla:shape_util",
1988        "//tensorflow/compiler/xla:types",
1989        "//tensorflow/compiler/xla:util",
1990        "//tensorflow/core:lib",
1991        "//tensorflow/core/platform:stream_executor_no_cuda",
1992        "//tensorflow/stream_executor/gpu:gpu_executor_header",
1993        "@com_google_absl//absl/base:core_headers",
1994        "@com_google_absl//absl/memory",
1995    ],
1996)
1997
1998cc_library(
1999    name = "gpu_shape_verifier",
2000    srcs = ["gpu_shape_verifier.cc"],
2001    hdrs = ["gpu_shape_verifier.h"],
2002    deps = [
2003        "//tensorflow/compiler/xla/service:hlo_verifier",
2004    ],
2005)
2006
2007cc_library(
2008    name = "gpu_layout_assignment",
2009    srcs = ["gpu_layout_assignment.cc"],
2010    hdrs = ["gpu_layout_assignment.h"],
2011    deps = [
2012        ":backend_configs_cc",
2013        ":ir_emission_utils",
2014        ":matmul_utils",
2015        ":stream_executor_util",
2016        "//tensorflow/compiler/xla:shape_util",
2017        "//tensorflow/compiler/xla:status_macros",
2018        "//tensorflow/compiler/xla:window_util",
2019        "//tensorflow/compiler/xla:xla_data_proto_cc",
2020        "//tensorflow/compiler/xla/service:computation_layout",
2021        "//tensorflow/compiler/xla/service:hlo",
2022        "//tensorflow/compiler/xla/service:layout_assignment",
2023        "//tensorflow/core:lib",
2024        "//tensorflow/core/platform:stream_executor_no_cuda",
2025        "@com_google_absl//absl/algorithm:container",
2026        "@com_google_absl//absl/types:span",
2027    ],
2028)
2029
2030tf_cc_test(
2031    name = "gpu_layout_assignment_test",
2032    srcs = ["gpu_layout_assignment_test.cc"],
2033    tags = tf_cuda_tests_tags(),
2034    deps = [
2035        ":cublas_cudnn",
2036        ":gemm_rewriter",
2037        ":gpu_layout_assignment",
2038        "//tensorflow/compiler/xla:shape_layout",
2039        "//tensorflow/compiler/xla:shape_util",
2040        "//tensorflow/compiler/xla:xla_data_proto_cc",
2041        "//tensorflow/compiler/xla/service:computation_layout",
2042        "//tensorflow/compiler/xla/service:hlo",
2043        "//tensorflow/compiler/xla/service:hlo_matchers",
2044        "//tensorflow/compiler/xla/service:hlo_parser",
2045        "//tensorflow/compiler/xla/tests:hlo_test_base",
2046        "//tensorflow/compiler/xla/tests:xla_internal_test_main",  # build_cleaner: keep
2047        "//tensorflow/core/platform:status_matchers",
2048        "//tensorflow/stream_executor/lib",
2049        "@com_google_absl//absl/strings",
2050    ],
2051)
2052
2053cc_library(
2054    name = "gpu_hlo_schedule",
2055    srcs = ["gpu_hlo_schedule.cc"],
2056    hdrs = ["gpu_hlo_schedule.h"],
2057    deps = [
2058        "//tensorflow/compiler/xla:statusor",
2059        "//tensorflow/compiler/xla/service:buffer_value",
2060        "//tensorflow/compiler/xla/service:hlo",
2061        "//tensorflow/compiler/xla/service:hlo_memory_scheduler",
2062        "//tensorflow/compiler/xla/service:hlo_ordering",
2063        "@com_google_absl//absl/container:flat_hash_map",
2064        "@com_google_absl//absl/memory",
2065    ],
2066)
2067
2068tf_cc_test(
2069    name = "gpu_hlo_schedule_test",
2070    srcs = [
2071        "gpu_hlo_schedule_test.cc",
2072    ],
2073    tags = ["no_pip"],
2074    deps = [
2075        ":gpu_hlo_schedule",
2076        "//tensorflow/compiler/xla:test_helpers",
2077        "//tensorflow/compiler/xla:types",
2078        "//tensorflow/compiler/xla/service:hlo",
2079        "//tensorflow/compiler/xla/tests:hlo_test_base",
2080        "//tensorflow/compiler/xla/tests:test_utils",
2081        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
2082        "@com_google_absl//absl/container:flat_hash_set",
2083        "@com_google_absl//absl/memory",
2084        "@com_google_absl//absl/strings:str_format",
2085    ],
2086)
2087
2088tf_cc_test(
2089    name = "while_transformer_test",
2090    srcs = ["while_transformer_test.cc"],
2091    tags = ["no_pip"],
2092    deps = [
2093        ":instruction_fusion",
2094        "//tensorflow/compiler/xla:shape_util",
2095        "//tensorflow/compiler/xla:test",
2096        "//tensorflow/compiler/xla:test_helpers",
2097        "//tensorflow/compiler/xla/service:copy_insertion",
2098        "//tensorflow/compiler/xla/service:hlo_verifier",
2099        "//tensorflow/compiler/xla/service:while_loop_analysis",
2100        "//tensorflow/compiler/xla/tests:hlo_test_base",
2101        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
2102        "//tensorflow/core:test",
2103    ],
2104)
2105
2106cc_library(
2107    name = "stream_executor_util",
2108    srcs = ["stream_executor_util.cc"],
2109    hdrs = ["stream_executor_util.h"],
2110    copts = tf_copts(),
2111    deps = [
2112        ":cublas_cudnn",
2113        ":launch_dimensions",
2114        "//tensorflow/compiler/xla:shape_util",
2115        "//tensorflow/compiler/xla:statusor",
2116        "//tensorflow/compiler/xla:types",
2117        "//tensorflow/compiler/xla:util",
2118        "//tensorflow/compiler/xla:xla_data_proto_cc",
2119        "//tensorflow/compiler/xla/service:hlo",
2120        "//tensorflow/compiler/xla/service:hlo_module_config",
2121        "//tensorflow/core:lib",
2122        "//tensorflow/core:lib_internal",
2123        "//tensorflow/core/platform:cuda_libdevice_path",
2124        "//tensorflow/core/platform:regexp",
2125        "//tensorflow/core/platform:stream_executor_no_cuda",
2126        "//tensorflow/core/profiler/lib:traceme",
2127        "//tensorflow/core/protobuf:autotuning_proto_cc",
2128        "//tensorflow/core/util:determinism_for_kernels",
2129        "//tensorflow/core/util/proto:proto_utils",
2130        "//tensorflow/stream_executor:kernel_spec",
2131        "@com_google_absl//absl/memory",
2132        "@com_google_absl//absl/strings",
2133        "@com_google_absl//absl/types:span",
2134    ],
2135)
2136
2137cc_library(
2138    name = "gpu_asm_opts_util",
2139    srcs = ["gpu_asm_opts_util.cc"],
2140    hdrs = ["gpu_asm_opts_util.h"],
2141    copts = tf_copts(),
2142    deps = [
2143        "//tensorflow/compiler/xla:xla_proto_cc",
2144        "//tensorflow/stream_executor/gpu:gpu_asm_opts",
2145        "@com_google_absl//absl/strings",
2146    ],
2147)
2148
2149cc_library(
2150    name = "gpu_hlo_cost_analysis",
2151    srcs = ["gpu_hlo_cost_analysis.cc"],
2152    hdrs = ["gpu_hlo_cost_analysis.h"],
2153    compatible_with = get_compatible_with_cloud(),
2154    deps = [
2155        ":backend_configs_cc",
2156        ":cublas_cudnn",
2157        "//tensorflow/compiler/xla/service:hlo_cost_analysis",
2158    ],
2159)
2160
2161tf_cc_test(
2162    name = "gpu_hlo_cost_analysis_test",
2163    srcs = ["gpu_hlo_cost_analysis_test.cc"],
2164    deps = [
2165        ":gpu_hlo_cost_analysis",
2166        "//tensorflow/compiler/xla/tests:hlo_test_base",
2167        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
2168    ],
2169)
2170
2171cc_library(
2172    name = "buffer_comparator",
2173    srcs = if_cuda_is_configured(["buffer_comparator.cc"]),
2174    hdrs = if_cuda_is_configured(["buffer_comparator.h"]),
2175    deps = if_cuda_is_configured([
2176        ":launch_dimensions",
2177        ":gpu_asm_opts_util",
2178        "@com_google_absl//absl/base",
2179        "@com_google_absl//absl/strings",
2180        "//tensorflow/compiler/xla:shape_util",
2181        "//tensorflow/compiler/xla:status_macros",
2182        "//tensorflow/compiler/xla:util",
2183        "//tensorflow/compiler/xla/service:hlo_module_config",
2184        "//tensorflow/core/platform:stream_executor_no_cuda",
2185        "//tensorflow/stream_executor:stream_executor_headers",
2186        "//tensorflow/stream_executor/gpu:asm_compiler",
2187    ]),
2188)
2189
2190tf_cc_test(
2191    name = "buffer_comparator_test",
2192    srcs = if_cuda_is_configured(["buffer_comparator_test.cc"]),
2193    tags = tf_cuda_tests_tags(),
2194    deps = [
2195        "//tensorflow/core:test_main",
2196        "//tensorflow/compiler/xla:shape_util",
2197        "//tensorflow/compiler/xla:types",
2198        "//tensorflow/core:test",
2199    ] + if_cuda_is_configured([
2200        ":buffer_comparator",
2201        "//tensorflow/tsl/platform/default/build_config:stream_executor_cuda",  # build_cleaner: keep
2202        "//tensorflow/stream_executor:device_memory",
2203    ]),
2204)
2205
2206cc_library(
2207    name = "gpu_fusible",
2208    srcs = ["gpu_fusible.cc"],
2209    hdrs = ["gpu_fusible.h"],
2210    deps = [
2211        ":ir_emission_utils",
2212        "//tensorflow/compiler/xla:shape_util",
2213        "//tensorflow/compiler/xla/service:hlo",
2214        "//tensorflow/compiler/xla/service:instruction_fusion",
2215    ],
2216)
2217
2218tf_cc_test(
2219    name = "gpu_fusible_test",
2220    srcs = ["gpu_fusible_test.cc"],
2221    tags = ["no_pip"],
2222    deps = [
2223        ":gpu_fusible",
2224        "//tensorflow/compiler/xla/service:hlo",
2225        "//tensorflow/compiler/xla/service:hlo_parser",
2226        "//tensorflow/compiler/xla/tests:hlo_test_base",
2227        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
2228        "@com_google_absl//absl/strings",
2229    ],
2230)
2231
2232cc_library(
2233    name = "cudnn_fused_conv_rewriter",
2234    srcs = ["cudnn_fused_conv_rewriter.cc"],
2235    hdrs = ["cudnn_fused_conv_rewriter.h"],
2236    deps = [
2237        ":backend_configs_cc",
2238        ":cublas_cudnn",
2239        "//tensorflow/compiler/xla:comparison_util",
2240        "//tensorflow/compiler/xla:literal_util",
2241        "//tensorflow/compiler/xla/service:hlo",
2242        "//tensorflow/compiler/xla/service:hlo_creation_utils",
2243        "//tensorflow/compiler/xla/service:hlo_pass",
2244        "//tensorflow/compiler/xla/service:pattern_matcher",
2245        "//tensorflow/compiler/xla/stream_executor:dnn_proto_cc",
2246        "//tensorflow/core/platform:errors",
2247        "//tensorflow/core/platform:statusor",
2248        "//tensorflow/core/platform:stream_executor_no_cuda",
2249    ],
2250)
2251
2252tf_cc_test(
2253    name = "cudnn_fused_conv_rewriter_test",
2254    srcs = ["cudnn_fused_conv_rewriter_test.cc"],
2255    tags = [
2256        "gpu",
2257        "no_oss",
2258        "noasan",
2259        "nomsan",
2260        "requires-gpu-sm70",
2261    ],
2262    deps = [
2263        ":backend_configs_cc",
2264        ":cublas_cudnn",
2265        ":cudnn_fused_conv_rewriter",
2266        ":gpu_conv_rewriter",
2267        ":ir_emission_utils",
2268        "//tensorflow/compiler/xla:test_helpers",
2269        "//tensorflow/compiler/xla/service:algebraic_simplifier",
2270        "//tensorflow/compiler/xla/service:convert_mover",
2271        "//tensorflow/compiler/xla/service:hlo_constant_folding",
2272        "//tensorflow/compiler/xla/service:hlo_parser",
2273        "//tensorflow/compiler/xla/service:hlo_pass",
2274        "//tensorflow/compiler/xla/service:hlo_pass_pipeline",
2275        "//tensorflow/compiler/xla/service:pattern_matcher",
2276        "//tensorflow/compiler/xla/service:pattern_matcher_gmock",
2277        "//tensorflow/compiler/xla/service:reshape_mover",
2278        "//tensorflow/compiler/xla/service/gpu/tests:gpu_codegen_test",
2279        "//tensorflow/compiler/xla/tests:filecheck",
2280        "//tensorflow/compiler/xla/tests:hlo_test_base",
2281        "//tensorflow/core:test",
2282        "//tensorflow/core:test_main",
2283        "@com_google_absl//absl/strings",
2284    ],
2285)
2286
2287cc_library(
2288    name = "variadic_op_splitter",
2289    srcs = ["variadic_op_splitter.cc"],
2290    hdrs = ["variadic_op_splitter.h"],
2291    deps = [
2292        "//tensorflow/compiler/xla:statusor",
2293        "//tensorflow/compiler/xla:util",
2294        "//tensorflow/compiler/xla:xla_data_proto_cc",
2295        "//tensorflow/compiler/xla/service:hlo",
2296        "//tensorflow/compiler/xla/service:hlo_pass",
2297        "//tensorflow/core:lib",
2298        "@com_google_absl//absl/strings",
2299        "@com_google_absl//absl/types:span",
2300    ],
2301)
2302
2303cc_library(
2304    name = "gpu_scatter_expander",
2305    srcs = ["gpu_scatter_expander.cc"],
2306    hdrs = ["gpu_scatter_expander.h"],
2307    deps = [
2308        "//tensorflow/compiler/xla:statusor",
2309        "//tensorflow/compiler/xla/service:hlo",
2310        "//tensorflow/compiler/xla/service:scatter_expander",
2311        "@com_google_absl//absl/algorithm:container",
2312    ],
2313)
2314
2315tf_cc_test(
2316    name = "variadic_op_splitter_test",
2317    srcs = ["variadic_op_splitter_test.cc"],
2318    tags = ["no_pip"],
2319    deps = [
2320        ":ir_emission_utils",
2321        ":variadic_op_splitter",
2322        "//tensorflow/compiler/xla:literal_util",
2323        "//tensorflow/compiler/xla:shape_util",
2324        "//tensorflow/compiler/xla:status_macros",
2325        "//tensorflow/compiler/xla:util",
2326        "//tensorflow/compiler/xla:xla_data_proto_cc",
2327        "//tensorflow/compiler/xla/service:hlo",
2328        "//tensorflow/compiler/xla/service:hlo_matchers",
2329        "//tensorflow/compiler/xla/service:hlo_parser",
2330        "//tensorflow/compiler/xla/service:pattern_matcher",
2331        "//tensorflow/compiler/xla/tests:hlo_test_base",
2332        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
2333    ],
2334)
2335
2336tf_proto_library(
2337    name = "gpu_autotuning_proto",
2338    srcs = ["gpu_autotuning.proto"],
2339    cc_api_version = 2,
2340    protodeps = [
2341        "//tensorflow/compiler/xla:xla_data_proto",
2342        "//tensorflow/compiler/xla/service:hlo_proto",
2343        "//tensorflow/core/protobuf:autotuning_proto",
2344    ],
2345)
2346
2347cc_library(
2348    name = "hlo_algorithm_denylist",
2349    srcs = ["hlo_algorithm_denylist.cc"],
2350    hdrs = ["hlo_algorithm_denylist.h"],
2351    deps = [
2352        ":gpu_autotuning_proto_cc",
2353        "//tensorflow/compiler/xla:debug_options_flags",
2354        "//tensorflow/core/platform:stream_executor_no_cuda",
2355        "//tensorflow/core/protobuf:autotuning_proto_cc",
2356        "@com_google_absl//absl/container:flat_hash_map",
2357    ],
2358)
2359
2360tf_cc_test(
2361    name = "hlo_algorithm_denylist_test",
2362    srcs = ["hlo_algorithm_denylist_test.cc"],
2363    data = ["data/hlo_algorithm_denylist.pbtxt"],
2364    tags = ["no_pip"],
2365    deps = [
2366        ":hlo_algorithm_denylist",
2367        "//tensorflow/core:lib",
2368        "//tensorflow/core:test",
2369        "//tensorflow/core:test_main",
2370        "//tensorflow/core/platform:resource_loader",
2371        "//tensorflow/stream_executor:dnn",
2372    ],
2373)
2374
2375cc_library(
2376    name = "alias_passthrough_params",
2377    srcs = ["alias_passthrough_params.cc"],
2378    hdrs = ["alias_passthrough_params.h"],
2379    deps = [
2380        "//tensorflow/compiler/xla:shape_util",
2381        "//tensorflow/compiler/xla/service:hlo",
2382        "//tensorflow/compiler/xla/service:hlo_pass",
2383    ],
2384)
2385
2386tf_cc_test(
2387    name = "alias_passthrough_params_test",
2388    srcs = ["alias_passthrough_params_test.cc"],
2389    tags = ["no_pip"],
2390    deps = [
2391        ":alias_passthrough_params",
2392        "//tensorflow/compiler/xla/tests:hlo_test_base",
2393        "//tensorflow/compiler/xla/tests:test_utils",
2394        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
2395        "//tensorflow/core:lib",
2396        "//tensorflow/core:test",
2397    ],
2398)
2399
2400cc_library(
2401    name = "horizontal_loop_fusion",
2402    srcs = ["horizontal_loop_fusion.cc"],
2403    hdrs = ["horizontal_loop_fusion.h"],
2404    deps = [
2405        ":gpu_fusible",
2406        "//tensorflow/compiler/xla:shape_util",
2407        "//tensorflow/compiler/xla:xla_data_proto_cc",
2408        "//tensorflow/compiler/xla/service:hlo",
2409        "//tensorflow/compiler/xla/service:hlo_creation_utils",
2410        "//tensorflow/compiler/xla/service:hlo_pass",
2411        "//tensorflow/core:lib",
2412        "//tensorflow/core:lib_internal",
2413        "@com_google_absl//absl/container:flat_hash_set",
2414        "@com_google_absl//absl/types:span",
2415    ],
2416)
2417
2418tf_cc_test(
2419    name = "horizontal_loop_fusion_test",
2420    srcs = ["horizontal_loop_fusion_test.cc"],
2421    tags = tf_cuda_tests_tags(),
2422    deps = [
2423        ":fusion_merger",
2424        ":horizontal_loop_fusion",
2425        ":instruction_fusion",
2426        ":multi_output_fusion",
2427        "//tensorflow/compiler/jit:xla_gpu_jit",
2428        "//tensorflow/compiler/xla:literal",
2429        "//tensorflow/compiler/xla:shape_util",
2430        "//tensorflow/compiler/xla:test",
2431        "//tensorflow/compiler/xla:test_helpers",
2432        "//tensorflow/compiler/xla/service:hlo_dce",
2433        "//tensorflow/compiler/xla/service:hlo_matchers",
2434        "//tensorflow/compiler/xla/service:hlo_parser",
2435        "//tensorflow/compiler/xla/service:hlo_pass",
2436        "//tensorflow/compiler/xla/service:hlo_pass_pipeline",
2437        "//tensorflow/compiler/xla/service:tuple_simplifier",
2438        "//tensorflow/compiler/xla/tests:filecheck",
2439        "//tensorflow/compiler/xla/tests:hlo_test_base",
2440        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
2441    ],
2442)
2443
2444cc_library(
2445    name = "horizontal_input_fusion",
2446    srcs = ["horizontal_input_fusion.cc"],
2447    hdrs = ["horizontal_input_fusion.h"],
2448    deps = [
2449        ":gpu_fusible",
2450        ":ir_emission_utils",
2451        "//tensorflow/compiler/xla:shape_util",
2452        "//tensorflow/compiler/xla/service:hlo",
2453        "//tensorflow/compiler/xla/service:hlo_creation_utils",
2454        "//tensorflow/compiler/xla/service:hlo_pass",
2455        "//tensorflow/core:lib",
2456        "//tensorflow/core:lib_internal",
2457        "@com_google_absl//absl/container:flat_hash_set",
2458        "@com_google_absl//absl/strings",
2459        "@com_google_absl//absl/types:span",
2460    ],
2461)
2462
2463tf_cc_test(
2464    name = "horizontal_input_fusion_test",
2465    srcs = ["horizontal_input_fusion_test.cc"],
2466    tags = tf_cuda_tests_tags(),
2467    deps = [
2468        ":horizontal_input_fusion",
2469        ":multi_output_fusion",
2470        "//tensorflow/compiler/jit:xla_gpu_jit",
2471        "//tensorflow/compiler/xla:shape_util",
2472        "//tensorflow/compiler/xla:test",
2473        "//tensorflow/compiler/xla:test_helpers",
2474        "//tensorflow/compiler/xla/service:hlo_matchers",
2475        "//tensorflow/compiler/xla/service:hlo_parser",
2476        "//tensorflow/compiler/xla/service:hlo_pass_pipeline",
2477        "//tensorflow/compiler/xla/service/gpu/tests:gpu_codegen_test",
2478        "//tensorflow/compiler/xla/tests:filecheck",
2479        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
2480    ],
2481)
2482
2483cc_library(
2484    name = "reduction_degenerate_dim_remover",
2485    srcs = ["reduction_degenerate_dim_remover.cc"],
2486    hdrs = ["reduction_degenerate_dim_remover.h"],
2487    deps = [
2488        ":ir_emission_utils",
2489        "//tensorflow/compiler/xla:shape_util",
2490        "//tensorflow/compiler/xla:status_macros",
2491        "//tensorflow/compiler/xla:statusor",
2492        "//tensorflow/compiler/xla/service:hlo",
2493        "//tensorflow/compiler/xla/service:hlo_pass",
2494        "//tensorflow/compiler/xla/service:pattern_matcher",
2495        "//tensorflow/core:lib",
2496        "//tensorflow/stream_executor/lib",
2497        "@com_google_absl//absl/algorithm:container",
2498        "@com_google_absl//absl/strings",
2499    ],
2500)
2501
2502cc_library(
2503    name = "reduction_dimension_grouper",
2504    srcs = ["reduction_dimension_grouper.cc"],
2505    hdrs = ["reduction_dimension_grouper.h"],
2506    deps = [
2507        "//tensorflow/compiler/xla:shape_util",
2508        "//tensorflow/compiler/xla:statusor",
2509        "//tensorflow/compiler/xla/service:hlo",
2510        "//tensorflow/compiler/xla/service:hlo_pass",
2511        "@com_google_absl//absl/algorithm:container",
2512    ],
2513)
2514
2515cc_library(
2516    name = "reduction_splitter",
2517    srcs = ["reduction_splitter.cc"],
2518    hdrs = ["reduction_splitter.h"],
2519    deps = [
2520        ":ir_emission_utils",
2521        "//tensorflow/compiler/xla:shape_util",
2522        "//tensorflow/compiler/xla/service:hlo",
2523        "//tensorflow/compiler/xla/service:hlo_pass",
2524    ],
2525)
2526
2527tf_cc_test(
2528    name = "reduction_splitter_test",
2529    srcs = ["reduction_splitter_test.cc"],
2530    deps = [
2531        ":reduction_splitter",
2532        "//tensorflow/compiler/xla:shape_util",
2533        "//tensorflow/compiler/xla:test",
2534        "//tensorflow/compiler/xla:test_helpers",
2535        "//tensorflow/compiler/xla/service:hlo_matchers",
2536        "//tensorflow/compiler/xla/service:hlo_parser",
2537        "//tensorflow/compiler/xla/tests:hlo_test_base",
2538        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
2539    ],
2540)
2541
2542cc_library(
2543    name = "reduction_layout_normalizer",
2544    srcs = ["reduction_layout_normalizer.cc"],
2545    hdrs = ["reduction_layout_normalizer.h"],
2546    deps = [
2547        ":ir_emission_utils",
2548        "//tensorflow/compiler/xla:shape_util",
2549        "//tensorflow/compiler/xla:status_macros",
2550        "//tensorflow/compiler/xla:statusor",
2551        "//tensorflow/compiler/xla/service:hlo",
2552        "//tensorflow/compiler/xla/service:hlo_pass",
2553        "//tensorflow/compiler/xla/service:pattern_matcher",
2554        "//tensorflow/core:lib",
2555        "//tensorflow/stream_executor/lib",
2556        "@com_google_absl//absl/algorithm:container",
2557        "@com_google_absl//absl/strings",
2558    ],
2559)
2560
2561cc_library(
2562    name = "tree_reduction_rewriter",
2563    srcs = ["tree_reduction_rewriter.cc"],
2564    hdrs = ["tree_reduction_rewriter.h"],
2565    deps = [
2566        ":ir_emission_utils",
2567        "//tensorflow/compiler/xla:shape_util",
2568        "//tensorflow/compiler/xla:statusor",
2569        "//tensorflow/compiler/xla:util",
2570        "//tensorflow/compiler/xla:xla_data_proto_cc",
2571        "//tensorflow/compiler/xla/service:collective_ops_utils",
2572        "//tensorflow/compiler/xla/service:hlo",
2573        "//tensorflow/compiler/xla/service:hlo_pass",
2574        "@com_google_absl//absl/algorithm:container",
2575        "@com_google_absl//absl/strings",
2576    ],
2577)
2578
2579cc_library(
2580    name = "gemm_broadcast_folding_rewriter",
2581    srcs = ["gemm_broadcast_folding_rewriter.cc"],
2582    hdrs = ["gemm_broadcast_folding_rewriter.h"],
2583    deps = [
2584        ":backend_configs_cc",
2585        ":cublas_cudnn",
2586        "//tensorflow/compiler/xla:status_macros",
2587        "//tensorflow/compiler/xla:statusor",
2588        "//tensorflow/compiler/xla/service:hlo",
2589        "//tensorflow/compiler/xla/service:hlo_pass",
2590        "//tensorflow/compiler/xla/service:pattern_matcher",
2591        "//tensorflow/core:lib_proto_parsing",
2592        "//tensorflow/stream_executor/lib",
2593        "@com_google_absl//absl/algorithm:container",
2594    ],
2595)
2596
2597# These tests are intended to be run with --test_env=XLA_FLAGS=--xla_gpu_jitrt_executable
2598# See tap/tensorflow.xla_gpu_jitrt.
2599test_suite(
2600    name = "jitrt_executable_tests",
2601    tests = [
2602        # copybara:uncomment "//platforms/xla/tests/internal:xfeed_test_gpu",
2603        "//tensorflow/compiler/tests:fft_test_gpu",
2604        "//tensorflow/compiler/xla/python:xla_client_test_gpu",
2605        "//tensorflow/compiler/xla/service/gpu:cudnn_fused_conv_rewriter_test",
2606        "//tensorflow/compiler/xla/service/gpu:custom_call_test",
2607        # copybara:uncomment "//tensorflow/compiler/xla/service/gpu:gpu_aot_compilation_test",
2608        "//tensorflow/compiler/xla/service/gpu/tests:add_preds.hlo.test",
2609        "//tensorflow/compiler/xla/service/gpu/tests:all_reduce.hlo.test",
2610        "//tensorflow/compiler/xla/service/gpu/tests:concat.hlo.test",
2611        "//tensorflow/compiler/xla/service/gpu/tests:constant.hlo.test",
2612        "//tensorflow/compiler/xla/service/gpu/tests:copy.hlo.test",
2613        "//tensorflow/compiler/xla/service/gpu/tests:copy_nested.hlo.test",
2614        "//tensorflow/compiler/xla/service/gpu/tests:dynamic_update_slice_inplace.hlo.test",
2615        "//tensorflow/compiler/xla/service/gpu/tests:element_wise_row_vectorization.hlo.test",
2616        "//tensorflow/compiler/xla/service/gpu/tests:element_wise_row_vectorization_test",
2617        "//tensorflow/compiler/xla/service/gpu/tests:fused_scatter.hlo.test",
2618        "//tensorflow/compiler/xla/service/gpu/tests:fused_slice.hlo.test",
2619        "//tensorflow/compiler/xla/service/gpu/tests:fused_slice_different_operands.hlo.test",
2620        "//tensorflow/compiler/xla/service/gpu/tests:fusion.hlo.test",
2621        "//tensorflow/compiler/xla/service/gpu/tests:fusion_logical_index_test",
2622        "//tensorflow/compiler/xla/service/gpu/tests:gemm_broadcast_folding_rewrite_test",
2623        "//tensorflow/compiler/xla/service/gpu/tests:gemm_rewrite_test",
2624        "//tensorflow/compiler/xla/service/gpu/tests:gpu_alignment_test",
2625        "//tensorflow/compiler/xla/service/gpu/tests:gpu_atomic_test",
2626        "//tensorflow/compiler/xla/service/gpu/tests:gpu_compilation_parallelism_test",
2627        "//tensorflow/compiler/xla/service/gpu/tests:gpu_convolution_regression_test",
2628        "//tensorflow/compiler/xla/service/gpu/tests:gpu_copy_alone_test",
2629        "//tensorflow/compiler/xla/service/gpu/tests:gpu_copy_test",
2630        "//tensorflow/compiler/xla/service/gpu/tests:gpu_dyn_shape_test",
2631        "//tensorflow/compiler/xla/service/gpu/tests:gpu_ftz_test",
2632        "//tensorflow/compiler/xla/service/gpu/tests:gpu_fusion_test",
2633        "//tensorflow/compiler/xla/service/gpu/tests:gpu_index_test",
2634        "//tensorflow/compiler/xla/service/gpu/tests:gpu_infeed_test",
2635        "//tensorflow/compiler/xla/service/gpu/tests:gpu_input_fusible_slice_test",
2636        "//tensorflow/compiler/xla/service/gpu/tests:gpu_kernel_tiling_test",
2637        "//tensorflow/compiler/xla/service/gpu/tests:gpu_ldg_test",
2638        "//tensorflow/compiler/xla/service/gpu/tests:gpu_noalias_test",
2639        "//tensorflow/compiler/xla/service/gpu/tests:gpu_reduce_scatter_creator_test",
2640        "//tensorflow/compiler/xla/service/gpu/tests:gpu_spmd_e2e_compile_test",
2641        "//tensorflow/compiler/xla/service/gpu/tests:gpu_too_many_blocks_test",
2642        "//tensorflow/compiler/xla/service/gpu/tests:gpu_unrolling_test",
2643        "//tensorflow/compiler/xla/service/gpu/tests:in_place_op_test",
2644        "//tensorflow/compiler/xla/service/gpu/tests:kernel_launch_test",
2645        "//tensorflow/compiler/xla/service/gpu/tests:launch_dimensions.hlo.test",
2646        "//tensorflow/compiler/xla/service/gpu/tests:mlir_fft_test",
2647        "//tensorflow/compiler/xla/service/gpu/tests:mlir_gemm_test",
2648        "//tensorflow/compiler/xla/service/gpu/tests:mlir_gpu_compile_test",
2649        "//tensorflow/compiler/xla/service/gpu/tests:mlir_sorting_test",
2650        "//tensorflow/compiler/xla/service/gpu/tests:pad_to_static.hlo.test",
2651        "//tensorflow/compiler/xla/service/gpu/tests:parallel_reduction_test",
2652        "//tensorflow/compiler/xla/service/gpu/tests:pred_arithmetic_test",
2653        "//tensorflow/compiler/xla/service/gpu/tests:reduce_unnested.hlo.test",
2654        "//tensorflow/compiler/xla/service/gpu/tests:reduction_degenerate_dim_remover_test",
2655        "//tensorflow/compiler/xla/service/gpu/tests:reduction_dimension_grouper_test",
2656        "//tensorflow/compiler/xla/service/gpu/tests:reduction_layout_normalizer_test",
2657        "//tensorflow/compiler/xla/service/gpu/tests:reduction_vectorization_sm_all.hlo.test",
2658        "//tensorflow/compiler/xla/service/gpu/tests:reduction_vectorization_test",
2659        "//tensorflow/compiler/xla/service/gpu/tests:rng_get_and_update_state.hlo.test",
2660        "//tensorflow/compiler/xla/service/gpu/tests:scatter.hlo.test",
2661        "//tensorflow/compiler/xla/service/gpu/tests:select_and_scatter.hlo.test",
2662        "//tensorflow/compiler/xla/service/gpu/tests:select_and_scatter_test",
2663        "//tensorflow/compiler/xla/service/gpu/tests:slice_to_dynamic.hlo.test",
2664        "//tensorflow/compiler/xla/service/gpu/tests:sorting.hlo.test",
2665        "//tensorflow/compiler/xla/service/gpu/tests:sorting_test",
2666        "//tensorflow/compiler/xla/service/gpu/tests:swap_conv_operands_test",
2667        "//tensorflow/compiler/xla/service/gpu/tests:tree_reduction_rewriter_test",
2668        "//tensorflow/compiler/xla/tests:all_reduce_test_gpu",
2669        "//tensorflow/compiler/xla/tests:array_elementwise_ops_test_gpu",
2670        "//tensorflow/compiler/xla/tests:axpy_simple_test_gpu",
2671        "//tensorflow/compiler/xla/tests:bad_rng_shape_validation_test_gpu",
2672        "//tensorflow/compiler/xla/tests:batch_normalization_test_gpu",
2673        "//tensorflow/compiler/xla/tests:bfloat16_test_gpu",
2674        "//tensorflow/compiler/xla/tests:binop_scaling_test_gpu",
2675        "//tensorflow/compiler/xla/tests:bitcast_convert_test_gpu",
2676        "//tensorflow/compiler/xla/tests:broadcast_simple_test_gpu",
2677        "//tensorflow/compiler/xla/tests:broadcast_test_gpu",
2678        "//tensorflow/compiler/xla/tests:buffer_donation_test_gpu",
2679        "//tensorflow/compiler/xla/tests:call_test_gpu",
2680        "//tensorflow/compiler/xla/tests:check_execution_arity_test_gpu",
2681        "//tensorflow/compiler/xla/tests:cholesky_test_gpu",
2682        "//tensorflow/compiler/xla/tests:client_test_gpu",
2683        "//tensorflow/compiler/xla/tests:compilation_cache_test_gpu",
2684        "//tensorflow/compiler/xla/tests:compute_constant_test_gpu",
2685        "//tensorflow/compiler/xla/tests:concat_test_gpu",
2686        "//tensorflow/compiler/xla/tests:conditional_test_gpu",
2687        "//tensorflow/compiler/xla/tests:constant_reduction_function_test_gpu",
2688        "//tensorflow/compiler/xla/tests:constants_test_gpu",
2689        "//tensorflow/compiler/xla/tests:conv_depthwise_backprop_filter_test_gpu",
2690        "//tensorflow/compiler/xla/tests:conv_depthwise_test_gpu",
2691        "//tensorflow/compiler/xla/tests:convert_test_gpu",
2692        "//tensorflow/compiler/xla/tests:convolution_dimension_numbers_test_gpu",
2693        "//tensorflow/compiler/xla/tests:convolution_test_1d_autotune_disabled_gpu",
2694        "//tensorflow/compiler/xla/tests:convolution_test_1d_gpu_alternative_layout_gpu",
2695        "//tensorflow/compiler/xla/tests:convolution_test_1d_no_vmodule_gpu",
2696        "//tensorflow/compiler/xla/tests:convolution_test_autotune_disabled_gpu",
2697        "//tensorflow/compiler/xla/tests:convolution_test_cudnn_frontend_disabled_gpu",
2698        "//tensorflow/compiler/xla/tests:convolution_test_gpu",
2699        "//tensorflow/compiler/xla/tests:convolution_test_gpu_alternative_layout_gpu",
2700        "//tensorflow/compiler/xla/tests:convolution_variants_test_gpu",
2701        "//tensorflow/compiler/xla/tests:copy_test_gpu",
2702        "//tensorflow/compiler/xla/tests:cpu_gpu_fusion_test_gpu",
2703        "//tensorflow/compiler/xla/tests:deallocation_test_gpu",
2704        "//tensorflow/compiler/xla/tests:deconstruct_tuple_test_gpu",
2705        "//tensorflow/compiler/xla/tests:deep_graph_test_gpu",
2706        "//tensorflow/compiler/xla/tests:dot_operation_single_threaded_runtime_test_gpu",
2707        "//tensorflow/compiler/xla/tests:dot_operation_test_autotune_disabled_gpu",
2708        "//tensorflow/compiler/xla/tests:dot_operation_test_gpu",
2709        "//tensorflow/compiler/xla/tests:dynamic_ops_test_gpu",
2710        "//tensorflow/compiler/xla/tests:execution_profile_test_gpu",
2711        "//tensorflow/compiler/xla/tests:execution_profile_test_with_xla_hlo_profile_gpu",
2712        "//tensorflow/compiler/xla/tests:exhaustive_binary_16_bit_test_gpu",
2713        "//tensorflow/compiler/xla/tests:exhaustive_binary_test_f32_f64_gpu",
2714        "//tensorflow/compiler/xla/tests:exhaustive_unary_test_complex_gpu",
2715        "//tensorflow/compiler/xla/tests:exhaustive_unary_test_f32_or_smaller_gpu",
2716        "//tensorflow/compiler/xla/tests:exhaustive_unary_test_f64_gpu",
2717        "//tensorflow/compiler/xla/tests:floor_ceil_test_gpu",
2718        "//tensorflow/compiler/xla/tests:fmax_fmin_test_gpu",
2719        "//tensorflow/compiler/xla/tests:gather_operation_test_gpu",
2720        "//tensorflow/compiler/xla/tests:get_dimension_size_test_gpu",
2721        "//tensorflow/compiler/xla/tests:grouped_convolution_test_gpu",
2722        "//tensorflow/compiler/xla/tests:half_test_gpu",
2723        "//tensorflow/compiler/xla/tests:iota_test_gpu",
2724        "//tensorflow/compiler/xla/tests:local_client_allocation_test_gpu",
2725        "//tensorflow/compiler/xla/tests:local_client_execute_test_gpu",
2726        "//tensorflow/compiler/xla/tests:log_test_gpu",
2727        "//tensorflow/compiler/xla/tests:map_test_gpu",
2728        "//tensorflow/compiler/xla/tests:matmul_test_gpu",
2729        "//tensorflow/compiler/xla/tests:matrix_ops_simple_test_gpu",
2730        "//tensorflow/compiler/xla/tests:multidimensional_slice_test_gpu",
2731        "//tensorflow/compiler/xla/tests:multioutput_fusion_test_gpu",
2732        "//tensorflow/compiler/xla/tests:outfeed_in_nested_computation_test_gpu",
2733        "//tensorflow/compiler/xla/tests:pad_test_gpu",
2734        "//tensorflow/compiler/xla/tests:params_test_gpu",
2735        "//tensorflow/compiler/xla/tests:pred_test_gpu",
2736        "//tensorflow/compiler/xla/tests:prng_test_gpu",
2737        "//tensorflow/compiler/xla/tests:ptxas_bug_120501638_gpu",
2738        "//tensorflow/compiler/xla/tests:query_inferred_shape_test_gpu",
2739        "//tensorflow/compiler/xla/tests:reduce_hlo_test_gpu",
2740        "//tensorflow/compiler/xla/tests:reduce_precision_test_gpu",
2741        "//tensorflow/compiler/xla/tests:reduce_test_gpu",
2742        "//tensorflow/compiler/xla/tests:reduce_window_test_gpu",
2743        "//tensorflow/compiler/xla/tests:replay_test_gpu",
2744        "//tensorflow/compiler/xla/tests:reshape_motion_test_gpu",
2745        "//tensorflow/compiler/xla/tests:reshape_test_gpu",
2746        "//tensorflow/compiler/xla/tests:reverse_test_gpu",
2747        "//tensorflow/compiler/xla/tests:round_trip_packed_literal_test_gpu",
2748        "//tensorflow/compiler/xla/tests:round_trip_transfer_test_gpu",
2749        "//tensorflow/compiler/xla/tests:sample_text_test_gpu",
2750        "//tensorflow/compiler/xla/tests:scalar_computations_test_gpu",
2751        "//tensorflow/compiler/xla/tests:scatter_test_gpu",
2752        "//tensorflow/compiler/xla/tests:select_and_scatter_test_gpu",
2753        "//tensorflow/compiler/xla/tests:select_test_gpu",
2754        "//tensorflow/compiler/xla/tests:slice_test_gpu",
2755        "//tensorflow/compiler/xla/tests:token_hlo_test_gpu",
2756        "//tensorflow/compiler/xla/tests:transfer_manager_test_gpu",
2757        "//tensorflow/compiler/xla/tests:transpose_test_gpu",
2758        "//tensorflow/compiler/xla/tests:triangular_solve_test_gpu",
2759        "//tensorflow/compiler/xla/tests:tuple_test_gpu",
2760        "//tensorflow/compiler/xla/tests:unary_op_test_gpu",
2761        "//tensorflow/compiler/xla/tests:value_inference_test_gpu",
2762        "//tensorflow/compiler/xla/tests:vector_ops_reduce_test_gpu",
2763        "//tensorflow/compiler/xla/tests:vector_ops_simple_test_gpu",
2764        "//tensorflow/compiler/xla/tests:while_test_gpu",
2765        "//tensorflow/compiler/xla/tests:xla_hlo_profile_test_gpu",
2766    ] + if_google([
2767        # Currently fails in OSS.
2768        "//tensorflow/python/kernel_tests/signal:fft_ops_test_xla_gpu",
2769    ]),
2770)
2771
2772cc_library(
2773    name = "metrics",
2774    srcs = ["metrics.cc"],
2775    hdrs = ["metrics.h"],
2776    deps = [
2777        "//tensorflow/core/lib/monitoring:sampler",
2778    ],
2779)
2780
2781cc_library(
2782    name = "precompiled_kernels",
2783    srcs = if_gpu_is_configured(["precompiled_kernels.cc"]),
2784    hdrs = if_gpu_is_configured(["precompiled_kernels.h"]),
2785    deps = if_gpu_is_configured([
2786        "@com_google_absl//absl/base",
2787        "@com_google_absl//absl/base:core_headers",
2788        "@com_google_absl//absl/container:flat_hash_map",
2789        "//tensorflow/compiler/xla:status",
2790        "//tensorflow/compiler/xla:statusor",
2791        "//tensorflow/compiler/xla:types",
2792        "//tensorflow/compiler/xla:util",
2793        "//tensorflow/stream_executor:device_memory",
2794        "//tensorflow/stream_executor:stream_header",
2795        "//tensorflow/stream_executor/gpu:asm_compiler",
2796        "//tensorflow/stream_executor/gpu:gpu_asm_opts",
2797    ]) + if_rocm_is_configured([
2798        "//tensorflow/stream_executor/gpu:gpu_stream_header",
2799    ]),
2800)
2801
2802cc_library(
2803    name = "triangular_solve_rewriter",
2804    srcs = ["triangular_solve_rewriter.cc"],
2805    hdrs = ["triangular_solve_rewriter.h"],
2806    deps = [
2807        ":cublas_cudnn",
2808        "//tensorflow/compiler/xla:statusor",
2809        "//tensorflow/compiler/xla/service:hlo",
2810        "//tensorflow/compiler/xla/service:hlo_creation_utils",
2811        "//tensorflow/compiler/xla/service:hlo_pass",
2812        "@com_google_absl//absl/strings",
2813    ],
2814)
2815
2816tf_cuda_library(
2817    name = "runtime_intrinsics",
2818    srcs = ["runtime_intrinsics.cc"],
2819    hdrs = ["runtime_intrinsics.h"],
2820    deps = [
2821        "//tensorflow/compiler/xla:shape_util",
2822        "//tensorflow/compiler/xla:status",
2823        "//tensorflow/compiler/xla:statusor",
2824        "//tensorflow/compiler/xla:util",
2825        "//tensorflow/compiler/xla/service:custom_call_status",
2826        "//tensorflow/compiler/xla/service:custom_call_target_registry",
2827        "//tensorflow/stream_executor",
2828        "@com_google_absl//absl/cleanup",
2829    ],
2830    alwayslink = 1,
2831)
2832
2833cc_library(
2834    name = "hlo_fusion_stats",
2835    srcs = ["hlo_fusion_stats.cc"],
2836    hdrs = ["hlo_fusion_stats.h"],
2837    deps = [
2838        "//tensorflow/compiler/xla:status",
2839        "//tensorflow/compiler/xla:statusor",
2840        "//tensorflow/compiler/xla/service:hlo",
2841        "//tensorflow/compiler/xla/service:hlo_pass",
2842        "//tensorflow/core/platform:errors",
2843        "//tensorflow/core/platform:statusor",
2844        "@com_google_absl//absl/strings",
2845    ],
2846)
2847
2848tf_cc_test(
2849    name = "hlo_fusion_stats_test",
2850    srcs = ["hlo_fusion_stats_test.cc"],
2851    tags = ["no_pip"],
2852    deps = [
2853        ":hlo_fusion_stats",
2854        "//tensorflow/compiler/xla:status_macros",
2855        "//tensorflow/compiler/xla:test_helpers",
2856        "//tensorflow/compiler/xla/service:hlo",
2857        "//tensorflow/compiler/xla/service:hlo_parser",
2858        "//tensorflow/compiler/xla/tests:hlo_test_base",
2859        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
2860        "//tensorflow/core:test",
2861    ],
2862)
2863