1# Description: 2# GPU-specific components in XLA service implementation. 3 4load("@bazel_skylib//rules:common_settings.bzl", "bool_flag") 5load( 6 "//tensorflow/core/platform:build_config.bzl", 7 "tf_proto_library", 8) 9load( 10 "//tensorflow/core/platform:build_config_root.bzl", 11 "tf_cuda_tests_tags", 12) 13load( 14 "//tensorflow:tensorflow.bzl", 15 "check_deps", 16 "if_google", 17 "tf_cc_test", 18 "tf_copts", 19 "tf_cuda_library", 20) 21load( 22 "@local_config_rocm//rocm:build_defs.bzl", 23 "if_rocm_is_configured", 24) 25load( 26 "//tensorflow/stream_executor:build_defs.bzl", 27 "if_gpu_is_configured", 28) 29load( 30 "//tensorflow/tsl/platform/default:cuda_build_defs.bzl", 31 "if_cuda_is_configured", 32) 33 34# buildifier: disable=same-origin-load 35load("//tensorflow:tensorflow.bzl", "filegroup") 36 37# buildifier: disable=same-origin-load 38load("//tensorflow:tensorflow.bzl", "get_compatible_with_cloud") 39 40# buildifier: disable=same-origin-load 41load("//tensorflow:tensorflow.bzl", "if_nccl") 42 43package( 44 default_visibility = [":friends"], 45 licenses = ["notice"], 46) 47 48package_group( 49 name = "friends", 50 includes = [ 51 "//tensorflow/compiler/xla:friends", 52 ], 53) 54 55# This target checks that we are not accidentally adding TFRT dependencies. 56# It captures the current state of dependencies and might need to get updated 57# from time to time. 58# Note: private targets may depend on TFRT if they are tagged 'manual'. 59check_deps( 60 name = "tfrt_deps_check", 61 disallowed_deps = select({ 62 ":is_xlir_enabled": [], 63 "//conditions:default": [ 64 # copybara:uncomment ":jitrt_custom_calls", 65 ], 66 }), 67 deps = [ 68 # Targets that are included in CPU builds should not depend on TFRT. 69 ":gpu_device_info", 70 ":gpu_executable_run_options", 71 ":ir_emission_utils", 72 ":launch_dimensions", 73 ":parallel_loop_emitter", 74 ":target_util", 75 # XLIR targets should only depend on TFRT if JitRt is enabled. 76 ":gpu_compiler", 77 ":gpu_executable", 78 ":nccl_utils", 79 ], 80) 81 82# Filegroup used to collect source files for dependency checking. 83filegroup( 84 name = "c_srcs", 85 data = glob([ 86 "**/*.cc", 87 "**/*.h", 88 ]), 89) 90 91tf_proto_library( 92 name = "backend_configs", 93 srcs = ["backend_configs.proto"], 94 cc_api_version = 2, 95 protodeps = [ 96 "//tensorflow/compiler/xla:xla_data_proto", 97 "//tensorflow/compiler/xla/stream_executor:dnn_proto", 98 ], 99) 100 101cc_library( 102 name = "gpu_executable_run_options", 103 srcs = ["gpu_executable_run_options.cc"], 104 hdrs = ["gpu_executable_run_options.h"], 105 compatible_with = get_compatible_with_cloud(), 106 visibility = ["//visibility:public"], 107 deps = [ 108 "//tensorflow/compiler/xla:status_macros", 109 "//tensorflow/compiler/xla:statusor", 110 "//tensorflow/compiler/xla:types", 111 "//tensorflow/compiler/xla/service:executable", 112 "//tensorflow/compiler/xla/service:global_device_id", 113 "//tensorflow/core/platform:stream_executor_no_cuda", 114 "@com_google_absl//absl/algorithm:container", 115 ], 116) 117 118cc_library( 119 name = "gpu_constants", 120 srcs = ["gpu_constants.cc"], 121 hdrs = ["gpu_constants.h"], 122 deps = [ 123 "//tensorflow/compiler/xla:types", 124 "//tensorflow/core:framework", 125 ], 126) 127 128cc_library( 129 name = "gpu_types", 130 hdrs = ["gpu_types.h"], 131 deps = [ 132 "//tensorflow/compiler/xla:types", 133 "//tensorflow/stream_executor:device_description", 134 "@com_google_absl//absl/types:variant", 135 ], 136) 137 138cc_library( 139 name = "launch_dimensions", 140 srcs = [ 141 "launch_dimensions.cc", 142 ], 143 hdrs = [ 144 "launch_dimensions.h", 145 ], 146 compatible_with = get_compatible_with_cloud(), 147 deps = [ 148 ":gpu_device_info", 149 "//tensorflow/compiler/xla:shape_util", 150 "//tensorflow/core:lib", 151 ], 152) 153 154cc_library( 155 name = "custom_call_thunk", 156 srcs = ["custom_call_thunk.cc"], 157 hdrs = ["custom_call_thunk.h"], 158 local_defines = if_cuda_is_configured([ 159 "GOOGLE_CUDA=1", 160 ]), 161 deps = [ 162 ":buffer_allocations", 163 ":thunk", 164 "//tensorflow/compiler/xla:util", 165 "//tensorflow/compiler/xla/service:buffer_assignment", 166 "//tensorflow/compiler/xla/service:custom_call_status_internal", 167 "//tensorflow/core/platform:errors", 168 "//tensorflow/stream_executor/gpu:gpu_stream_header", 169 "//tensorflow/stream_executor/gpu:gpu_types_header", 170 "@com_google_absl//absl/strings:str_format", 171 ], 172) 173 174tf_cc_test( 175 name = "custom_call_test", 176 srcs = if_gpu_is_configured(["custom_call_test.cc"]), 177 local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]), 178 tags = tf_cuda_tests_tags(), 179 deps = [ 180 "//tensorflow/compiler/xla/tests:xla_internal_test_main", # fixdeps: keep 181 "//tensorflow/compiler/xla:status_macros", 182 "//tensorflow/compiler/xla:test_helpers", 183 "//tensorflow/compiler/xla/client:xla_builder", 184 "//tensorflow/compiler/xla/client/lib:constants", 185 "//tensorflow/compiler/xla/service:custom_call_status", 186 "//tensorflow/compiler/xla/service:custom_call_target_registry", 187 "//tensorflow/compiler/xla/service:gpu_plugin", 188 "//tensorflow/compiler/xla/tests:client_library_test_base", 189 "//tensorflow/core:test", 190 "//tensorflow/stream_executor/gpu:gpu_types_header", 191 ] + if_cuda_is_configured([ 192 "@local_config_cuda//cuda:cuda_headers", 193 ]) + if_rocm_is_configured([ 194 "@local_config_rocm//rocm:rocm_headers", 195 ]), 196) 197 198cc_library( 199 name = "hlo_to_ir_bindings", 200 srcs = ["hlo_to_ir_bindings.cc"], 201 hdrs = ["hlo_to_ir_bindings.h"], 202 deps = [ 203 ":buffer_allocations", 204 ":ir_emission_utils", 205 "//tensorflow/compiler/xla:util", 206 "//tensorflow/compiler/xla/service:hlo", 207 "//tensorflow/compiler/xla/service/llvm_ir:buffer_assignment_util", 208 "//tensorflow/compiler/xla/service/llvm_ir:ir_array", 209 "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", 210 "//tensorflow/compiler/xla/service/llvm_ir:tuple_ops", 211 "//tensorflow/core:lib", 212 "@com_google_absl//absl/container:flat_hash_map", 213 "@com_google_absl//absl/container:flat_hash_set", 214 "@com_google_absl//absl/strings", 215 "@com_google_absl//absl/types:span", 216 "@llvm-project//llvm:Core", 217 ], 218) 219 220cc_library( 221 name = "target_util", 222 srcs = ["target_util.cc"], 223 hdrs = ["target_util.h"], 224 compatible_with = get_compatible_with_cloud(), 225 deps = [ 226 "//tensorflow/compiler/xla:xla_data_proto_cc", 227 "//tensorflow/compiler/xla/service/llvm_ir:llvm_type_conversion_util", 228 "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", 229 "//tensorflow/core:lib", 230 "@com_google_absl//absl/strings", 231 "@com_google_absl//absl/types:span", 232 "@llvm-project//llvm:Core", 233 "@llvm-project//llvm:Support", 234 ], 235) 236 237cc_library( 238 name = "gpu_device_info", 239 srcs = ["gpu_device_info.cc"], 240 hdrs = ["gpu_device_info.h"], 241 compatible_with = get_compatible_with_cloud(), 242 deps = ["//tensorflow/stream_executor:stream_header"], 243) 244 245cc_library( 246 name = "ir_emitter", 247 srcs = [ 248 "ir_emitter.cc", 249 "ir_emitter_nested.cc", 250 "ir_emitter_unnested.cc", 251 ], 252 hdrs = [ 253 "ir_emitter.h", 254 "ir_emitter_context.h", 255 "ir_emitter_nested.h", 256 "ir_emitter_unnested.h", 257 "kernel_mapping_scheme.h", 258 ], 259 copts = if_cuda_is_configured(["-DGOOGLE_CUDA=1"]), 260 deps = [ 261 ":backend_configs_cc", 262 ":buffer_allocations", 263 ":elemental_ir_emitter", 264 ":fft_thunk", 265 ":gpu_asm_opts_util", 266 ":gpu_constants", 267 ":gpu_conv_runner", 268 ":gpu_executable", 269 ":hlo_to_ir_bindings", 270 ":ir_emission_utils", 271 ":launch_dimensions", 272 ":matmul_utils", 273 ":nccl_collective_thunks", 274 ":parallel_loop_emitter", 275 ":target_util", 276 ":thunk", 277 "@com_google_absl//absl/algorithm:container", 278 "@com_google_absl//absl/container:flat_hash_set", 279 "@com_google_absl//absl/container:inlined_vector", 280 "@com_google_absl//absl/memory", 281 "@com_google_absl//absl/strings", 282 "@com_google_absl//absl/strings:str_format", 283 "@com_google_absl//absl/types:span", 284 "@llvm-project//llvm:Core", 285 "@llvm-project//llvm:Linker", 286 "@llvm-project//llvm:Support", 287 "@llvm-project//mlir:ArithmeticDialect", 288 "@llvm-project//mlir:FuncDialect", 289 "@llvm-project//mlir:GPUDialect", 290 "@llvm-project//mlir:LLVMDialect", 291 "@llvm-project//mlir:LLVMToLLVMIRTranslation", 292 "@llvm-project//mlir:ToLLVMIRTranslation", 293 "@llvm-project//mlir:IR", 294 "@llvm-project//mlir:MemRefDialect", 295 "@llvm-project//mlir:ROCDLToLLVMIRTranslation", 296 "@llvm-project//mlir:Support", 297 "@llvm-project//mlir:NVVMToLLVMIRTranslation", 298 "//tensorflow/compiler/mlir:name_utils", 299 "//tensorflow/compiler/xla/mlir_hlo", 300 "//tensorflow/compiler/xla/mlir_hlo:lhlo", 301 "//tensorflow/compiler/xla/mlir_hlo:lhlo_gpu", 302 "//tensorflow/compiler/xla/mlir_hlo:gpu_fusion_rewrite", 303 "//tensorflow/compiler/mlir/xla:attribute_exporter", 304 "//tensorflow/compiler/mlir/xla:hlo_module_importer", 305 "//tensorflow/compiler/mlir/xla:hlo_utils", 306 "//tensorflow/compiler/mlir/xla:mhlo_to_lhlo_with_xla", 307 "//tensorflow/compiler/mlir/xla:mlir_hlo_to_hlo", 308 "//tensorflow/compiler/mlir/xla:type_to_shape", 309 "//tensorflow/compiler/xla:literal", 310 "//tensorflow/compiler/xla:permutation_util", 311 "//tensorflow/compiler/xla:shape_util", 312 "//tensorflow/compiler/xla:status_macros", 313 "//tensorflow/compiler/xla:statusor", 314 "//tensorflow/compiler/xla:types", 315 "//tensorflow/compiler/xla:union_find", 316 "//tensorflow/compiler/xla:util", 317 "//tensorflow/compiler/xla:window_util", 318 "//tensorflow/compiler/xla:xla_data_proto_cc", 319 "//tensorflow/compiler/xla/service:buffer_assignment", 320 "//tensorflow/compiler/xla/service:collective_ops_utils", 321 "//tensorflow/compiler/xla/service:custom_call_status", 322 "//tensorflow/compiler/xla/service:custom_call_target_registry", 323 "//tensorflow/compiler/xla/service:elemental_ir_emitter", 324 "//tensorflow/compiler/xla/service:hlo", 325 "//tensorflow/compiler/xla/service:hlo_execution_profile", 326 "//tensorflow/compiler/xla/service:name_uniquer", 327 "//tensorflow/compiler/xla/service:pattern_matcher", 328 "//tensorflow/compiler/xla/service:shape_inference", 329 "//tensorflow/compiler/xla/service:while_loop_analysis", 330 "//tensorflow/compiler/xla/service/llvm_ir:buffer_assignment_util", 331 "//tensorflow/compiler/xla/service/llvm_ir:dynamic_update_slice_util", 332 "//tensorflow/compiler/xla/service/llvm_ir:fused_ir_emitter", 333 "//tensorflow/compiler/xla/service/llvm_ir:ir_array", 334 "//tensorflow/compiler/xla/service/llvm_ir:ir_builder_mixin", 335 "//tensorflow/compiler/xla/service/llvm_ir:kernel_support_library", 336 "//tensorflow/compiler/xla/service/llvm_ir:llvm_loop", 337 "//tensorflow/compiler/xla/service/llvm_ir:llvm_type_conversion_util", 338 "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", 339 "//tensorflow/compiler/xla/service/llvm_ir:loop_emitter", 340 "//tensorflow/compiler/xla/service/llvm_ir:sort_util", 341 "//tensorflow/compiler/xla/service/llvm_ir:tuple_ops", 342 "//tensorflow/core:lib", 343 "//tensorflow/core/platform:human_readable_json", 344 ] + if_gpu_is_configured([ 345 ":triangular_solve_thunk", 346 ":cholesky_thunk", 347 ]) + if_cuda_is_configured([ 348 ":cublas_lt_matmul_thunk", 349 ]), 350) 351 352cc_library( 353 name = "parallel_loop_emitter", 354 srcs = ["parallel_loop_emitter.cc"], 355 hdrs = ["parallel_loop_emitter.h"], 356 compatible_with = get_compatible_with_cloud(), 357 deps = [ 358 ":launch_dimensions", 359 ":target_util", 360 "//tensorflow/compiler/xla:shape_util", 361 "//tensorflow/compiler/xla:xla_data_proto_cc", 362 "//tensorflow/compiler/xla/service/llvm_ir:ir_array", 363 "//tensorflow/compiler/xla/service/llvm_ir:kernel_support_library", 364 "//tensorflow/compiler/xla/service/llvm_ir:llvm_loop", 365 "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", 366 "//tensorflow/compiler/xla/service/llvm_ir:loop_emitter", 367 "//tensorflow/core:lib", 368 "@llvm-project//llvm:Core", 369 ], 370) 371 372cc_library( 373 name = "elemental_ir_emitter", 374 srcs = ["elemental_ir_emitter.cc"], 375 hdrs = ["elemental_ir_emitter.h"], 376 deps = [ 377 ":backend_configs_cc", 378 ":target_util", 379 "//tensorflow/compiler/xla:literal", 380 "//tensorflow/compiler/xla:shape_util", 381 "//tensorflow/compiler/xla:status_macros", 382 "//tensorflow/compiler/xla:statusor", 383 "//tensorflow/compiler/xla:types", 384 "//tensorflow/compiler/xla:util", 385 "//tensorflow/compiler/xla:window_util", 386 "//tensorflow/compiler/xla:xla_data_proto_cc", 387 "//tensorflow/compiler/xla/service:elemental_ir_emitter", 388 "//tensorflow/compiler/xla/service:hlo", 389 "//tensorflow/compiler/xla/service:hlo_module_config", 390 "//tensorflow/compiler/xla/service/llvm_ir:ir_array", 391 "//tensorflow/compiler/xla/service/llvm_ir:llvm_loop", 392 "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", 393 "//tensorflow/compiler/xla/service/llvm_ir:loop_emitter", 394 "//tensorflow/compiler/xla/service/llvm_ir:math_ops", 395 "//tensorflow/core:lib", 396 "@com_google_absl//absl/strings", 397 "@com_google_absl//absl/types:span", 398 "@llvm-project//llvm:Core", 399 "@llvm-project//llvm:Support", 400 ], 401) 402 403cc_library( 404 name = "buffer_allocations", 405 srcs = ["buffer_allocations.cc"], 406 hdrs = ["buffer_allocations.h"], 407 deps = [ 408 ":gpu_constants", 409 "//tensorflow/compiler/xla:status_macros", 410 "//tensorflow/compiler/xla:statusor", 411 "//tensorflow/compiler/xla:types", 412 "//tensorflow/compiler/xla:util", 413 "//tensorflow/compiler/xla/service:buffer_assignment", 414 "//tensorflow/core:lib", 415 "//tensorflow/core:lib_internal", 416 "//tensorflow/core/platform:stream_executor_no_cuda", 417 "//tensorflow/stream_executor:device_memory_allocator", 418 "@com_google_absl//absl/container:flat_hash_map", 419 "@com_google_absl//absl/memory", 420 "@com_google_absl//absl/strings:str_format", 421 "@com_google_absl//absl/types:span", 422 ], 423) 424 425cc_library( 426 name = "thunk", 427 srcs = ["thunk.cc"], 428 hdrs = ["thunk.h"], 429 deps = [ 430 ":buffer_allocations", 431 ":gpu_executable_run_options", 432 "//tensorflow/compiler/xla:executable_run_options", 433 "//tensorflow/compiler/xla/service:executable", 434 "//tensorflow/compiler/xla/service:hlo", 435 "//tensorflow/core:lib", 436 "//tensorflow/core/platform:stream_executor_no_cuda", 437 ], 438) 439 440tf_cuda_library( 441 name = "nccl_collective_thunks", 442 srcs = [ 443 "nccl_all_gather_thunk.cc", 444 "nccl_all_reduce_thunk.cc", 445 "nccl_all_to_all_thunk.cc", 446 "nccl_collective_permute_thunk.cc", 447 "nccl_collective_thunk.cc", 448 ], 449 hdrs = [ 450 "nccl_all_gather_thunk.h", 451 "nccl_all_reduce_thunk.h", 452 "nccl_all_to_all_thunk.h", 453 "nccl_collective_permute_thunk.h", 454 "nccl_collective_thunk.h", 455 ], 456 # Override tf_cuda_library()'s internal default value of ["//buildenv/target:gce"]. 457 compatible_with = [], 458 deps = [ 459 ":buffer_allocations", 460 ":ir_emission_utils", 461 ":nccl_utils", 462 ":thunk", 463 "//tensorflow/compiler/mlir/xla:attribute_exporter", 464 "//tensorflow/compiler/mlir/xla:hlo_utils", 465 "//tensorflow/compiler/mlir/xla:type_to_shape", 466 "//tensorflow/compiler/xla:shape_util", 467 "//tensorflow/compiler/xla:util", 468 "//tensorflow/compiler/xla:xla_data_proto_cc", 469 "//tensorflow/compiler/xla/mlir_hlo:lhlo", 470 "//tensorflow/compiler/xla/mlir_hlo:lhlo_gpu", 471 "//tensorflow/compiler/xla/service:buffer_assignment", 472 "//tensorflow/compiler/xla/service:collective_ops_utils", 473 "//tensorflow/compiler/xla/service:global_device_id", 474 "//tensorflow/compiler/xla/service:hlo", 475 "//tensorflow/core:lib", 476 "//tensorflow/stream_executor/gpu:gpu_activation_header", 477 "//tensorflow/stream_executor/gpu:gpu_stream", 478 "@com_google_absl//absl/algorithm:container", 479 "@com_google_absl//absl/base", 480 "@com_google_absl//absl/container:flat_hash_map", 481 "@com_google_absl//absl/container:flat_hash_set", 482 "@com_google_absl//absl/strings", 483 "@com_google_absl//absl/strings:str_format", 484 "@com_google_absl//absl/synchronization", 485 "@llvm-project//mlir:IR", 486 ], 487) 488 489# Empty library to implement nested dependency conditions. 490cc_library(name = "empty") 491 492# If NCCL/RCCL is supported, this target '#defines XLA_ENABLE_XCCL' and 493# provides a header which #includes NCCL/RCCL. 494alias( 495 name = "nccl_utils", 496 actual = if_nccl(":_nccl_utils", ":empty"), 497) 498 499# Do not depend on this target, but rather depend on :nccl_utils. 500tf_cuda_library( 501 name = "_nccl_utils", 502 srcs = if_gpu_is_configured(["nccl_utils.cc"]), 503 hdrs = if_gpu_is_configured(["nccl_utils.h"]), 504 # Override tf_cuda_library()'s internal default value of ["//buildenv/target:gce"]. 505 compatible_with = [], 506 defines = if_gpu_is_configured(["XLA_ENABLE_XCCL"]), 507 tags = ["manual"], # Only builds with if_nccl(). 508 deps = if_gpu_is_configured([ 509 ":gpu_executable_run_options", 510 "@com_google_absl//absl/strings:str_format", 511 "@com_google_absl//absl/container:flat_hash_map", 512 "@com_google_absl//absl/synchronization", 513 "@com_google_absl//absl/time", 514 "//tensorflow/compiler/xla:debug_options_flags", 515 "//tensorflow/compiler/xla:status", 516 "//tensorflow/compiler/xla:status_macros", 517 "//tensorflow/compiler/xla:statusor", 518 "//tensorflow/compiler/xla:xla_data_proto_cc", 519 "//tensorflow/compiler/xla/service:collective_ops_utils", 520 "//tensorflow/compiler/xla/service:global_device_id", 521 "//tensorflow/compiler/xla/service:rendezvous", 522 "//tensorflow/core:lib", 523 ]) + if_cuda_is_configured([ 524 "@local_config_nccl//:nccl", 525 ]) + if_rocm_is_configured([ 526 "@local_config_rocm//rocm:rccl", 527 ]), 528) 529 530bool_flag( 531 name = "enable_xlir", 532 build_setting_default = if_google(True, False), 533) 534 535config_setting( 536 name = "is_xlir_enabled", 537 flag_values = {":enable_xlir": "True"}, 538) 539 540# copybara:uncomment_begin 541# 542# cc_library( 543# name = "jitrt_custom_calls", 544# srcs = ["jitrt_custom_calls.cc"], 545# hdrs = ["jitrt_custom_calls.h"], 546# copts = if_cuda_is_configured(["-DGOOGLE_CUDA=1"]), 547# tags = ["manual"], 548# visibility = ["//visibility:private"], 549# deps = [ 550# ":fft_thunk", 551# ":gpu_asm_opts_util", 552# ":io_feed_manager", 553# ":matmul_utils", 554# ":nccl_collective_thunks", 555# ":stream_executor_util", 556# "@llvm-project//llvm:OrcJIT", 557# "@llvm-project//mlir:Support", 558# "//tensorflow/compiler/xla:tfrt_utils", 559# "//tensorflow/compiler/xla/runtime:arguments", 560# "//tensorflow/compiler/xla/runtime:types", 561# "//tensorflow/compiler/xla/runtime:executable", 562# "//tensorflow/compiler/xla/runtime:jit_executable", 563# "//tensorflow/compiler/xla:shape_util", 564# "//tensorflow/compiler/xla/service:custom_call_status_internal", 565# "//tensorflow/compiler/xla/service:custom_call_target_registry", 566# "//tensorflow/compiler/xla/service:executable", 567# "//tensorflow/compiler/xla/service/gpu:gpu_conv_runner", 568# "//tensorflow/core/platform:human_readable_json", 569# "//tensorflow/stream_executor/gpu:gpu_stream", 570# "//tensorflow/stream_executor/gpu:gpu_types_header", 571# "@tf_runtime//:dtype", 572# "@tf_runtime//:support", 573# "//tensorflow/compiler/xla/runtime:custom_call", 574# "//tensorflow/compiler/xla/runtime:type_id", 575# "//tensorflow/compiler/xla/mlir/transforms/runtime:custom_call_encoding", 576# ] + if_gpu_is_configured([ 577# ":cholesky_thunk", 578# ":triangular_solve_thunk", 579# ]), 580# ) 581# 582# copybara:uncomment_end 583 584cc_library( 585 name = "gpu_executable", 586 srcs = [ 587 "conditional_thunk.cc", 588 "convolution_thunk.cc", 589 "copy_thunk.cc", 590 "for_thunk.cc", 591 "gpu_executable.cc", 592 "infeed_thunk.cc", 593 "kernel_thunk.cc", 594 "memset_thunk.cc", 595 "outfeed_thunk.cc", 596 "replica_id_thunk.cc", 597 "sequential_thunk.cc", 598 "while_thunk.cc", 599 ], 600 hdrs = [ 601 "conditional_thunk.h", 602 "convolution_thunk.h", 603 "copy_thunk.h", 604 "custom_call_thunk.h", 605 "for_thunk.h", 606 "gemm_thunk.h", 607 "gpu_executable.h", 608 "infeed_thunk.h", 609 "kernel_thunk.h", 610 "memset_thunk.h", 611 "outfeed_thunk.h", 612 "replica_id_thunk.h", 613 "sequential_thunk.h", 614 "while_thunk.h", 615 ], 616 local_defines = select({ 617 ":is_xlir_enabled": ["XLA_ENABLE_XLIR=1"], 618 "//conditions:default": [], 619 }), 620 deps = [ 621 ":backend_configs_cc", 622 ":buffer_allocations", 623 ":cusolver_context", 624 ":custom_call_thunk", 625 ":gemm_thunk", 626 ":gpu_asm_opts_util", 627 ":gpu_constants", 628 ":gpu_conv_runner", 629 ":gpu_executable_run_options", 630 ":gpu_types", 631 ":fft_thunk", 632 ":io_feed_manager", 633 ":ir_emission_utils", 634 ":matmul_utils", 635 ":nccl_collective_thunks", 636 ":launch_dimensions", 637 ":stream_executor_util", 638 ":thunk", 639 "@com_google_absl//absl/base", 640 "@com_google_absl//absl/cleanup", 641 "@com_google_absl//absl/synchronization", 642 "@llvm-project//mlir:FuncDialect", 643 "@llvm-project//mlir:IR", 644 "@llvm-project//mlir:Parser", 645 "//tensorflow/compiler/xla/mlir_hlo:lhlo_gpu", 646 "//tensorflow/compiler/xla/service:hlo_execution_profile", 647 "//tensorflow/compiler/xla:array2d", 648 "//tensorflow/compiler/xla:literal", 649 "//tensorflow/compiler/xla:refcounting_hash_map", 650 "//tensorflow/compiler/xla:shape_tree", 651 "//tensorflow/compiler/xla:shape_util", 652 "//tensorflow/compiler/xla:status", 653 "//tensorflow/compiler/xla:status_macros", 654 "//tensorflow/compiler/xla:statusor", 655 "//tensorflow/compiler/xla:types", 656 "//tensorflow/compiler/xla:util", 657 "//tensorflow/compiler/xla:xla_data_proto_cc", 658 "//tensorflow/compiler/xla/service:buffer_assignment", 659 "//tensorflow/compiler/xla/service:custom_call_status_internal", 660 "//tensorflow/compiler/xla/service:executable", 661 "//tensorflow/compiler/xla/service:hlo", 662 "//tensorflow/compiler/xla/service:hlo_dataflow_analysis", 663 "//tensorflow/compiler/xla/service:hlo_parser", 664 "//tensorflow/compiler/xla/service:logical_buffer", 665 "//tensorflow/compiler/xla/service:shaped_buffer", 666 "//tensorflow/compiler/xla/service:transfer_manager", 667 "//tensorflow/compiler/xla/service:xla_debug_info_manager", 668 "//tensorflow/compiler/xla/service/llvm_ir:buffer_assignment_util", 669 "//tensorflow/core:lib", 670 "//tensorflow/core:lib_internal", 671 "//tensorflow/core/platform:stream_executor_no_cuda", 672 "//tensorflow/core/profiler/lib:traceme", 673 "//tensorflow/core/profiler/lib:scoped_annotation", 674 "//tensorflow/stream_executor", 675 "//tensorflow/stream_executor/gpu:asm_compiler", 676 "//tensorflow/stream_executor/gpu:gpu_asm_opts", 677 "//tensorflow/stream_executor/gpu:gpu_types_header", 678 "//tensorflow/stream_executor:blas", 679 "//tensorflow/stream_executor:device_memory", 680 "//tensorflow/stream_executor:device_memory_allocator", 681 "//tensorflow/stream_executor:kernel", 682 "//tensorflow/stream_executor/gpu:gpu_stream", 683 "@com_google_absl//absl/algorithm:container", 684 "@com_google_absl//absl/base:core_headers", 685 "@com_google_absl//absl/container:flat_hash_map", 686 "@com_google_absl//absl/container:flat_hash_set", 687 "@com_google_absl//absl/memory", 688 "@com_google_absl//absl/strings", 689 "@com_google_absl//absl/strings:str_format", 690 "@com_google_absl//absl/types:span", 691 "@com_google_absl//absl/types:variant", 692 "//tensorflow/stream_executor:scratch_allocator", 693 ] + if_gpu_is_configured([ 694 ":cholesky_thunk", 695 ":precompiled_kernels", 696 ":triangular_solve_thunk", 697 ]) + if_cuda_is_configured([ 698 "//tensorflow/stream_executor/cuda:cuda_stream", 699 "//tensorflow/tsl/platform/default/build_config:cublas_plugin", 700 "//tensorflow/tsl/platform/default/build_config:cudnn_plugin", 701 "//tensorflow/tsl/platform/default/build_config:cufft_plugin", 702 "//tensorflow/tsl/platform/default/build_config:stream_executor_cuda", # build_cleaner: keep 703 "@local_config_cuda//cuda:cuda_headers", 704 ]) + if_rocm_is_configured([ 705 "//tensorflow/tsl/platform/default/build_config:stream_executor_rocm", 706 "@local_config_rocm//rocm:rocm_headers", 707 ]) + select({ 708 ":is_xlir_enabled": [ 709 ":jitrt_custom_calls", 710 "//tensorflow/compiler/xla/runtime:jit_executable", 711 "//tensorflow/compiler/xla/runtime:executable", 712 "//tensorflow/compiler/xla/runtime:diagnostics", 713 "//tensorflow/compiler/xla/mlir/transforms/runtime:compilation_pipeline", 714 # copybara:uncomment "@tf_runtime//:init_tfrt_dialects", 715 ], 716 "//conditions:default": [], 717 }), 718) 719 720cc_library( 721 name = "ir_emission_utils", 722 srcs = ["ir_emission_utils.cc"], 723 hdrs = ["ir_emission_utils.h"], 724 compatible_with = get_compatible_with_cloud(), 725 deps = [ 726 ":target_util", 727 "//tensorflow/compiler/mlir/xla:hlo_utils", 728 "//tensorflow/compiler/mlir/xla:type_to_shape", 729 "//tensorflow/compiler/xla:shape_util", 730 "//tensorflow/compiler/xla/mlir_hlo", 731 "//tensorflow/compiler/xla/mlir_hlo:lhlo", 732 "//tensorflow/compiler/xla/service:buffer_assignment", 733 "//tensorflow/compiler/xla/service:hlo", 734 "//tensorflow/compiler/xla/service:hlo_parser", 735 "//tensorflow/compiler/xla/service/llvm_ir:llvm_type_conversion_util", 736 "//tensorflow/core/platform:stream_executor_no_cuda", 737 "@llvm-project//llvm:Core", 738 "@llvm-project//mlir:ArithmeticDialect", 739 ], 740) 741 742tf_cc_test( 743 name = "ir_emission_utils_test", 744 srcs = ["ir_emission_utils_test.cc"], 745 deps = [ 746 ":ir_emission_utils", 747 "//tensorflow/compiler/xla/mlir_hlo:lhlo", 748 "//tensorflow/compiler/xla/tests:test_utils", 749 "//tensorflow/compiler/xla/tests:xla_internal_test_main", # fixdeps: keep 750 "//tensorflow/core:test", 751 "@llvm-project//mlir:FuncDialect", 752 "@llvm-project//mlir:IR", 753 "@llvm-project//mlir:Parser", 754 ], 755) 756 757cc_library( 758 name = "cublas_cudnn", 759 srcs = ["cublas_cudnn.cc"], 760 hdrs = ["cublas_cudnn.h"], 761 compatible_with = get_compatible_with_cloud(), 762 deps = [ 763 "//tensorflow/compiler/xla/service:hlo", 764 "//tensorflow/core/platform:statusor", 765 ], 766) 767 768# TODO(ezhulenev): Extract `RunCholesky` into a separate library. 769cc_library( 770 name = "cholesky_thunk", 771 srcs = if_gpu_is_configured(["cholesky_thunk.cc"]), 772 hdrs = if_gpu_is_configured(["cholesky_thunk.h"]), 773 deps = if_gpu_is_configured([ 774 ":buffer_allocations", 775 ":cusolver_context", 776 ":precompiled_kernels", 777 ":thunk", 778 "@com_google_absl//absl/base", 779 "@com_google_absl//absl/strings", 780 "@com_google_absl//absl/strings:str_format", 781 "@com_google_absl//absl/types:optional", 782 "//tensorflow/compiler/xla:types", 783 "//tensorflow/compiler/xla:util", 784 "//tensorflow/compiler/xla:xla_data_proto_cc", 785 "//tensorflow/compiler/xla/service:buffer_assignment", 786 "//tensorflow/compiler/xla/service:hlo", 787 "//tensorflow/core:lib_proto_parsing", 788 "//tensorflow/core:portable_gif_internal", 789 "//tensorflow/core/platform:stream_executor_no_cuda", 790 "//tensorflow/stream_executor:device_memory", 791 "//tensorflow/stream_executor:stream_header", 792 "//tensorflow/stream_executor/gpu:gpu_asm_opts", 793 ]), 794) 795 796# TODO(ezhulenev): Extract `RunTriangularSolve` into a separate library. 797cc_library( 798 name = "triangular_solve_thunk", 799 srcs = if_gpu_is_configured(["triangular_solve_thunk.cc"]), 800 hdrs = if_gpu_is_configured(["triangular_solve_thunk.h"]), 801 deps = if_gpu_is_configured([ 802 ":buffer_allocations", 803 ":cusolver_context", 804 ":precompiled_kernels", 805 ":thunk", 806 "@com_google_absl//absl/base", 807 "@com_google_absl//absl/strings", 808 "@com_google_absl//absl/strings:str_format", 809 "@com_google_absl//absl/types:optional", 810 "//tensorflow/compiler/xla:types", 811 "//tensorflow/compiler/xla:util", 812 "//tensorflow/compiler/xla:xla_data_proto_cc", 813 "//tensorflow/compiler/xla/service:buffer_assignment", 814 "//tensorflow/compiler/xla/service:hlo", 815 "//tensorflow/core:lib_proto_parsing", 816 "//tensorflow/core:portable_gif_internal", 817 "//tensorflow/core/platform:stream_executor_no_cuda", 818 "//tensorflow/stream_executor:device_memory", 819 "//tensorflow/stream_executor:stream_header", 820 "//tensorflow/stream_executor/gpu:gpu_asm_opts", 821 ]), 822) 823 824cc_library( 825 name = "fft_thunk", 826 srcs = ["fft_thunk.cc"], 827 hdrs = ["fft_thunk.h"], 828 deps = [ 829 ":buffer_allocations", 830 ":cusolver_context", 831 ":precompiled_kernels", 832 ":thunk", 833 "//tensorflow/compiler/xla:types", 834 "//tensorflow/compiler/xla:util", 835 "//tensorflow/compiler/xla:xla_data_proto_cc", 836 "//tensorflow/compiler/xla/service:buffer_assignment", 837 "//tensorflow/compiler/xla/service:hlo", 838 "//tensorflow/core:lib_proto_parsing", 839 "//tensorflow/core:portable_gif_internal", 840 "//tensorflow/core/platform:stream_executor_no_cuda", 841 "//tensorflow/stream_executor:device_memory", 842 "//tensorflow/stream_executor:scratch_allocator", 843 "//tensorflow/stream_executor:stream_header", 844 "//tensorflow/stream_executor/gpu:gpu_asm_opts", 845 "@com_google_absl//absl/base", 846 "@com_google_absl//absl/container:flat_hash_map", 847 "@com_google_absl//absl/strings", 848 "@com_google_absl//absl/strings:str_format", 849 "@com_google_absl//absl/types:optional", 850 ], 851) 852 853cc_library( 854 name = "gemm_rewriter", 855 srcs = ["gemm_rewriter.cc"], 856 hdrs = ["gemm_rewriter.h"], 857 deps = [ 858 ":backend_configs_cc", 859 ":cublas_cudnn", 860 ":ir_emission_utils", 861 "//tensorflow/compiler/xla:status_macros", 862 "//tensorflow/compiler/xla:statusor", 863 "//tensorflow/compiler/xla:xla_data_proto_cc", 864 "//tensorflow/compiler/xla/service:hlo", 865 "//tensorflow/compiler/xla/service:hlo_creation_utils", 866 "//tensorflow/compiler/xla/service:hlo_evaluator", 867 "//tensorflow/compiler/xla/service:hlo_pass", 868 "//tensorflow/compiler/xla/service:pattern_matcher", 869 "//tensorflow/core:lib", 870 "//tensorflow/stream_executor/lib", 871 "@com_google_absl//absl/algorithm:container", 872 ], 873) 874 875cc_library( 876 name = "gemm_thunk", 877 srcs = ["gemm_thunk.cc"], 878 hdrs = ["gemm_thunk.h"], 879 deps = [ 880 ":matmul_utils", 881 ":thunk", 882 "//tensorflow/compiler/xla:status", 883 "//tensorflow/compiler/xla/service:buffer_assignment", 884 "//tensorflow/core:tflite_portable_logging", 885 "//tensorflow/stream_executor:device_memory", 886 "//tensorflow/stream_executor:stream_header", 887 ], 888) 889 890cc_library( 891 name = "cublas_lt_matmul_thunk", 892 srcs = if_cuda_is_configured(["cublas_lt_matmul_thunk.cc"]), 893 hdrs = if_cuda_is_configured(["cublas_lt_matmul_thunk.h"]), 894 deps = if_cuda_is_configured([ 895 ":matmul_utils", 896 ":thunk", 897 "//tensorflow/compiler/xla/service:buffer_assignment", 898 "//tensorflow/compiler/xla:status", 899 "//tensorflow/core:tflite_portable_logging", 900 "//tensorflow/tsl/platform/default/build_config:cublas_plugin", 901 "//tensorflow/stream_executor:device_memory", 902 "//tensorflow/stream_executor:stream_header", 903 "//tensorflow/stream_executor/cuda:cublas_lt_header", 904 ]), 905) 906 907cc_library( 908 name = "gemm_algorithm_picker", 909 srcs = if_cuda_is_configured(["gemm_algorithm_picker.cc"]), 910 hdrs = if_cuda_is_configured(["gemm_algorithm_picker.h"]), 911 deps = if_cuda_is_configured([ 912 ":backend_configs_cc", 913 ":buffer_comparator", 914 ":gemm_thunk", 915 ":gpu_asm_opts_util", 916 ":gpu_conv_runner", 917 ":ir_emission_utils", 918 ":matmul_utils", 919 ":stream_executor_util", 920 "//tensorflow/compiler/xla:status_macros", 921 "//tensorflow/compiler/xla:util", 922 "//tensorflow/compiler/xla/service:hlo", 923 "//tensorflow/compiler/xla/service:hlo_pass", 924 "//tensorflow/core:lib", 925 "//tensorflow/core/platform:stream_executor_no_cuda", 926 "//tensorflow/core/protobuf:autotuning_proto_cc", 927 "//tensorflow/core/util/proto:proto_utils", 928 "//tensorflow/stream_executor:blas", 929 "//tensorflow/tsl/platform/default/build_config:cublas_plugin", 930 "//tensorflow/stream_executor/cuda:cublas_lt_header", 931 "//tensorflow/stream_executor:device_memory", 932 "//tensorflow/stream_executor:device_memory_allocator", 933 "//tensorflow/stream_executor/gpu:redzone_allocator", 934 ]), 935) 936 937cc_library( 938 name = "matmul_utils", 939 srcs = ["matmul_utils.cc"], 940 hdrs = ["matmul_utils.h"], 941 defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]), 942 deps = [ 943 ":backend_configs_cc", 944 ":ir_emission_utils", 945 "@com_google_absl//absl/algorithm:container", 946 "@com_google_absl//absl/types:span", 947 "//tensorflow/compiler/xla/mlir_hlo", 948 "//tensorflow/compiler/xla/mlir_hlo:lhlo_gpu", 949 "//tensorflow/compiler/xla:shape_util", 950 "//tensorflow/compiler/xla:status_macros", 951 "//tensorflow/compiler/xla:statusor", 952 "//tensorflow/compiler/xla:types", 953 "//tensorflow/compiler/xla:util", 954 "//tensorflow/compiler/xla:xla_data_proto_cc", 955 "//tensorflow/compiler/xla/service:hlo", 956 "//tensorflow/core/platform:statusor", 957 "//tensorflow/stream_executor:stream_header", 958 ] + if_cuda_is_configured([ 959 "//tensorflow/stream_executor/cuda:cublas_lt_header", 960 "//tensorflow/tsl/platform/default/build_config:cublas_plugin", 961 "//tensorflow/stream_executor:host_or_device_scalar", 962 "//tensorflow/stream_executor:scratch_allocator", 963 ]), 964) 965 966tf_cc_test( 967 name = "matmul_utils_test", 968 srcs = ["matmul_utils_test.cc"], 969 deps = [ 970 ":matmul_utils", 971 "//tensorflow/compiler/xla:test", 972 "//tensorflow/compiler/xla/service:hlo_parser", 973 "//tensorflow/compiler/xla/tests:xla_internal_test_main", # build_cleaner: keep 974 "//tensorflow/core/platform:status_matchers", 975 "@com_google_absl//absl/strings", 976 ], 977) 978 979cc_library( 980 name = "gpu_conv_algorithm_picker", 981 srcs = ["gpu_conv_algorithm_picker.cc"], 982 hdrs = ["gpu_conv_algorithm_picker.h"], 983 copts = if_cuda_is_configured(["-DGOOGLE_CUDA=1"]), 984 deps = [ 985 ":backend_configs_cc", 986 ":gpu_asm_opts_util", 987 ":gpu_autotuning_proto_cc", 988 ":gpu_conv_runner", 989 ":gpu_executable", 990 ":hlo_algorithm_denylist", 991 ":ir_emission_utils", 992 ":stream_executor_util", 993 "@com_google_absl//absl/algorithm:container", 994 "@com_google_absl//absl/strings", 995 "@com_google_absl//absl/strings:str_format", 996 "@com_google_absl//absl/time", 997 "//tensorflow/compiler/xla:literal_util", 998 "//tensorflow/compiler/xla:status_macros", 999 "//tensorflow/compiler/xla:util", 1000 "//tensorflow/compiler/xla/service:compiler", 1001 "//tensorflow/compiler/xla/service:hlo", 1002 "//tensorflow/compiler/xla/service:hlo_pass", 1003 "//tensorflow/compiler/xla:xla_data_proto_cc", 1004 "//tensorflow/core/protobuf:autotuning_proto_cc", 1005 "//tensorflow/core:lib", 1006 "//tensorflow/core:lib_internal", 1007 "//tensorflow/core/platform:stream_executor_no_cuda", 1008 "//tensorflow/core/util/proto:proto_utils", 1009 "//tensorflow/stream_executor:device_memory_allocator", 1010 "//tensorflow/compiler/xla/stream_executor:dnn_proto_cc", 1011 ] + if_cuda_is_configured([ 1012 ":buffer_comparator", 1013 "@local_config_cuda//cuda:cudnn_header", 1014 "//tensorflow/stream_executor/gpu:redzone_allocator", 1015 ]), 1016) 1017 1018cc_library( 1019 name = "gpu_conv_runner", 1020 srcs = ["gpu_conv_runner.cc"], 1021 hdrs = ["gpu_conv_runner.h"], 1022 deps = [ 1023 ":backend_configs_cc", 1024 ":cublas_cudnn", 1025 ":stream_executor_util", 1026 "//tensorflow/compiler/xla:shape_util", 1027 "//tensorflow/compiler/xla:status", 1028 "//tensorflow/compiler/xla:status_macros", 1029 "//tensorflow/compiler/xla:statusor", 1030 "//tensorflow/compiler/xla:types", 1031 "//tensorflow/compiler/xla:util", 1032 "//tensorflow/compiler/xla:xla_data_proto_cc", 1033 "//tensorflow/compiler/xla/service:hlo", 1034 "//tensorflow/compiler/xla/stream_executor:lazy_op_runner", 1035 "//tensorflow/core/platform:stream_executor_no_cuda", 1036 "//tensorflow/stream_executor:dnn", 1037 "@com_google_absl//absl/strings", 1038 ], 1039) 1040 1041cc_library( 1042 name = "gpu_conv_rewriter", 1043 srcs = ["gpu_conv_rewriter.cc"], 1044 hdrs = ["gpu_conv_rewriter.h"], 1045 deps = [ 1046 ":backend_configs_cc", 1047 ":cublas_cudnn", 1048 "//tensorflow/compiler/xla:literal", 1049 "//tensorflow/compiler/xla:permutation_util", 1050 "//tensorflow/compiler/xla:util", 1051 "//tensorflow/compiler/xla:window_util", 1052 "//tensorflow/compiler/xla:xla_data_proto_cc", 1053 "//tensorflow/compiler/xla/service:hlo", 1054 "//tensorflow/compiler/xla/service:hlo_pass", 1055 "//tensorflow/core:lib", 1056 ], 1057) 1058 1059tf_cc_test( 1060 name = "gpu_conv_rewriter_test", 1061 srcs = ["gpu_conv_rewriter_test.cc"], 1062 tags = tf_cuda_tests_tags(), 1063 deps = [ 1064 ":cublas_cudnn", 1065 ":gpu_conv_rewriter", 1066 "//tensorflow/compiler/jit:xla_gpu_jit", 1067 "//tensorflow/compiler/xla:protobuf_util", 1068 "//tensorflow/compiler/xla:test", 1069 "//tensorflow/compiler/xla:test_helpers", 1070 "//tensorflow/compiler/xla/service:hlo", 1071 "//tensorflow/compiler/xla/service:hlo_matchers", 1072 "//tensorflow/compiler/xla/service:shape_inference", 1073 "//tensorflow/compiler/xla/tests:hlo_test_base", 1074 "//tensorflow/compiler/xla/tests:xla_internal_test_main", # fixdeps: keep 1075 "//tensorflow/core:test", 1076 ], 1077) 1078 1079cc_library( 1080 name = "cusolver_context", 1081 srcs = if_gpu_is_configured(["cusolver_context.cc"]), 1082 hdrs = if_gpu_is_configured(["cusolver_context.h"]), 1083 deps = [ 1084 "//tensorflow/compiler/xla:comparison_util", 1085 "//tensorflow/compiler/xla:statusor", 1086 "//tensorflow/compiler/xla:types", 1087 "//tensorflow/compiler/xla:util", 1088 "//tensorflow/core:lib", 1089 "//tensorflow/core/platform:stream_executor_no_cuda", 1090 "//tensorflow/stream_executor:blas", 1091 ] + if_cuda_is_configured([ 1092 "@local_config_cuda//cuda:cuda_headers", 1093 "//tensorflow/stream_executor/cuda:cusolver_lib", 1094 ]) + if_rocm_is_configured([ 1095 "@local_config_rocm//rocm:rocm_headers", 1096 "//tensorflow/stream_executor/rocm:rocblas_wrapper", 1097 "//tensorflow/stream_executor/rocm:rocsolver_wrapper", 1098 "//tensorflow/stream_executor/rocm:hipsolver_wrapper", 1099 ]), 1100) 1101 1102cc_library( 1103 name = "cusolver_rewriter", 1104 srcs = if_gpu_is_configured(["cusolver_rewriter.cc"]), 1105 hdrs = if_gpu_is_configured(["cusolver_rewriter.h"]), 1106 deps = if_gpu_is_configured([ 1107 ":cusolver_context", 1108 ":ir_emission_utils", 1109 "//tensorflow/compiler/xla:literal", 1110 "//tensorflow/compiler/xla:literal_util", 1111 "//tensorflow/compiler/xla:util", 1112 "//tensorflow/compiler/xla:xla_data_proto_cc", 1113 "//tensorflow/compiler/xla/service:hlo", 1114 "//tensorflow/compiler/xla/service:hlo_pass", 1115 "//tensorflow/core:lib", 1116 "//tensorflow/core/platform:stream_executor_no_cuda", 1117 "//tensorflow/stream_executor:blas", 1118 "//tensorflow/stream_executor:device_memory_allocator", 1119 "@com_google_absl//absl/algorithm:container", 1120 ]), 1121) 1122 1123cc_library( 1124 name = "instruction_fusion", 1125 srcs = ["instruction_fusion.cc"], 1126 hdrs = ["instruction_fusion.h"], 1127 deps = [ 1128 ":gpu_fusible", 1129 ":ir_emission_utils", 1130 "//tensorflow/compiler/xla:shape_util", 1131 "//tensorflow/compiler/xla:xla_data_proto_cc", 1132 "//tensorflow/compiler/xla/service:fusion_node_indexing_evaluation", 1133 "//tensorflow/compiler/xla/service:hlo", 1134 "//tensorflow/compiler/xla/service:hlo_query", 1135 "//tensorflow/compiler/xla/service:instruction_fusion", 1136 "//tensorflow/compiler/xla/service:pattern_matcher", 1137 "//tensorflow/compiler/xla/service/llvm_ir:fused_ir_emitter", 1138 "@com_google_absl//absl/container:flat_hash_map", 1139 "@com_google_absl//absl/container:flat_hash_set", 1140 ], 1141) 1142 1143tf_cc_test( 1144 name = "instruction_fusion_test", 1145 srcs = ["instruction_fusion_test.cc"], 1146 tags = ["no_pip"], 1147 deps = [ 1148 ":gpu_fusible", 1149 ":instruction_fusion", 1150 "//tensorflow/compiler/xla:status_macros", 1151 "//tensorflow/compiler/xla:util", 1152 "//tensorflow/compiler/xla/service:hlo", 1153 "//tensorflow/compiler/xla/service:hlo_matchers", 1154 "//tensorflow/compiler/xla/service:hlo_parser", 1155 "//tensorflow/compiler/xla/tests:hlo_test_base", 1156 "//tensorflow/compiler/xla/tests:test_utils", 1157 "//tensorflow/compiler/xla/tests:xla_internal_test_main", 1158 ], 1159) 1160 1161cc_library( 1162 name = "multi_output_fusion", 1163 srcs = ["multi_output_fusion.cc"], 1164 hdrs = ["multi_output_fusion.h"], 1165 deps = [ 1166 ":gpu_fusible", 1167 ":instruction_fusion", 1168 ":ir_emission_utils", 1169 "//tensorflow/compiler/xla:debug_options_flags", 1170 "//tensorflow/compiler/xla:shape_util", 1171 "//tensorflow/compiler/xla:statusor", 1172 "//tensorflow/compiler/xla/service:hlo", 1173 "//tensorflow/compiler/xla/service:hlo_graph_dumper", 1174 "//tensorflow/compiler/xla/service:hlo_pass", 1175 "//tensorflow/compiler/xla/service:hlo_reachability", 1176 "//tensorflow/compiler/xla/service/llvm_ir:fused_ir_emitter", 1177 "//tensorflow/core:lib", 1178 "@com_google_absl//absl/algorithm:container", 1179 "@com_google_absl//absl/container:flat_hash_map", 1180 "@com_google_absl//absl/container:flat_hash_set", 1181 "@com_google_absl//absl/strings", 1182 ], 1183) 1184 1185tf_cc_test( 1186 name = "multi_output_fusion_test", 1187 srcs = ["multi_output_fusion_test.cc"], 1188 tags = ["no_pip"], 1189 deps = [ 1190 ":gpu_fusible", 1191 ":instruction_fusion", 1192 ":multi_output_fusion", 1193 "//tensorflow/compiler/xla:status_macros", 1194 "//tensorflow/compiler/xla:util", 1195 "//tensorflow/compiler/xla/service:hlo", 1196 "//tensorflow/compiler/xla/service:hlo_matchers", 1197 "//tensorflow/compiler/xla/service:hlo_parser", 1198 "//tensorflow/compiler/xla/tests:hlo_test_base", 1199 "//tensorflow/compiler/xla/tests:xla_internal_test_main", 1200 "//tensorflow/core:lib", 1201 "@com_google_absl//absl/strings", 1202 ], 1203) 1204 1205cc_library( 1206 name = "gpu_sanitize_constant_names", 1207 srcs = ["gpu_sanitize_constant_names.cc"], 1208 hdrs = ["gpu_sanitize_constant_names.h"], 1209 deps = [ 1210 "//tensorflow/compiler/xla/service:hlo", 1211 "//tensorflow/compiler/xla/service:hlo_pass", 1212 "//tensorflow/compiler/xla/service/llvm_ir:buffer_assignment_util", 1213 "//tensorflow/core:lib", 1214 ], 1215) 1216 1217tf_cc_test( 1218 name = "gpu_sanitize_constant_names_test", 1219 srcs = ["gpu_sanitize_constant_names_test.cc"], 1220 tags = tf_cuda_tests_tags(), 1221 deps = [ 1222 ":gpu_sanitize_constant_names", 1223 ":ir_emission_utils", 1224 "//tensorflow/compiler/xla:shape_layout", 1225 "//tensorflow/compiler/xla:shape_util", 1226 "//tensorflow/compiler/xla:status_macros", 1227 "//tensorflow/compiler/xla:test_helpers", 1228 "//tensorflow/compiler/xla:util", 1229 "//tensorflow/compiler/xla:xla_data_proto_cc", 1230 "//tensorflow/compiler/xla/service:computation_layout", 1231 "//tensorflow/compiler/xla/service:hlo", 1232 "//tensorflow/compiler/xla/service:hlo_matchers", 1233 "//tensorflow/compiler/xla/service:hlo_module_config", 1234 "//tensorflow/compiler/xla/service:hlo_parser", 1235 "//tensorflow/compiler/xla/tests:hlo_test_base", 1236 "//tensorflow/compiler/xla/tests:test_utils", 1237 "//tensorflow/compiler/xla/tests:xla_internal_test_main", 1238 "//tensorflow/core:test", 1239 "@com_google_absl//absl/strings", 1240 ], 1241) 1242 1243cc_library( 1244 name = "fusion_bitcast_lift", 1245 srcs = ["fusion_bitcast_lift.cc"], 1246 hdrs = ["fusion_bitcast_lift.h"], 1247 deps = [ 1248 "//tensorflow/compiler/xla:shape_util", 1249 "//tensorflow/compiler/xla/service:hlo", 1250 "//tensorflow/compiler/xla/service:hlo_dce", 1251 "//tensorflow/compiler/xla/service:hlo_pass", 1252 "//tensorflow/compiler/xla/service:hlo_verifier", 1253 "//tensorflow/core/platform:errors", 1254 "@com_google_absl//absl/types:span", 1255 ], 1256) 1257 1258tf_cc_test( 1259 name = "fusion_bitcast_lift_test", 1260 srcs = ["fusion_bitcast_lift_test.cc"], 1261 tags = ["no_pip"], 1262 deps = [ 1263 ":fusion_bitcast_lift", 1264 "//tensorflow/compiler/xla/service:hlo_dce", 1265 "//tensorflow/compiler/xla/service:hlo_parser", 1266 "//tensorflow/compiler/xla/tests:filecheck", 1267 "//tensorflow/compiler/xla/tests:hlo_test_base", 1268 "//tensorflow/compiler/xla/tests:xla_internal_test_main", 1269 "@com_google_absl//absl/types:span", 1270 ], 1271) 1272 1273cc_library( 1274 name = "fusion_merger", 1275 srcs = ["fusion_merger.cc"], 1276 hdrs = ["fusion_merger.h"], 1277 deps = [ 1278 ":gpu_fusible", 1279 ":instruction_fusion", 1280 "//tensorflow/compiler/xla:shape_util", 1281 "//tensorflow/compiler/xla:util", 1282 "//tensorflow/compiler/xla/service:hlo", 1283 "//tensorflow/compiler/xla/service:hlo_cost_analysis", 1284 "//tensorflow/compiler/xla/service:hlo_graph_dumper", 1285 "//tensorflow/compiler/xla/service:hlo_pass", 1286 "//tensorflow/compiler/xla/service/llvm_ir:fused_ir_emitter", 1287 "//tensorflow/core:lib", 1288 "@com_google_absl//absl/algorithm:container", 1289 "@com_google_absl//absl/strings", 1290 ], 1291) 1292 1293tf_cc_test( 1294 name = "fusion_merger_test", 1295 srcs = ["fusion_merger_test.cc"], 1296 tags = ["no_pip"], 1297 deps = [ 1298 ":fusion_merger", 1299 ":gpu_fusible", 1300 ":instruction_fusion", 1301 "//tensorflow/compiler/xla:test_helpers", 1302 "//tensorflow/compiler/xla/service:hlo_matchers", 1303 "//tensorflow/compiler/xla/service:hlo_parser", 1304 "//tensorflow/compiler/xla/tests:hlo_test_base", 1305 "//tensorflow/compiler/xla/tests:xla_internal_test_main", 1306 "@com_google_absl//absl/types:span", 1307 ], 1308) 1309 1310cc_library( 1311 name = "gpu_conv_padding_legalization", 1312 srcs = ["gpu_conv_padding_legalization.cc"], 1313 hdrs = ["gpu_conv_padding_legalization.h"], 1314 deps = [ 1315 ":cublas_cudnn", 1316 "//tensorflow/compiler/xla:literal", 1317 "//tensorflow/compiler/xla:literal_util", 1318 "//tensorflow/compiler/xla:util", 1319 "//tensorflow/compiler/xla:window_util", 1320 "//tensorflow/compiler/xla:xla_data_proto_cc", 1321 "//tensorflow/compiler/xla/service:hlo", 1322 "//tensorflow/compiler/xla/service:hlo_creation_utils", 1323 "//tensorflow/compiler/xla/service:hlo_pass", 1324 "//tensorflow/compiler/xla/service:shape_inference", 1325 "@com_google_absl//absl/memory", 1326 ], 1327) 1328 1329tf_cc_test( 1330 name = "gpu_conv_padding_legalization_test", 1331 srcs = ["gpu_conv_padding_legalization_test.cc"], 1332 tags = tf_cuda_tests_tags(), 1333 deps = [ 1334 ":cublas_cudnn", 1335 ":gpu_conv_padding_legalization", 1336 "//tensorflow/compiler/xla:shape_util", 1337 "//tensorflow/compiler/xla:test", 1338 "//tensorflow/compiler/xla:xla_data_proto_cc", 1339 "//tensorflow/compiler/xla/service:hlo", 1340 "//tensorflow/compiler/xla/service:hlo_matchers", 1341 "//tensorflow/compiler/xla/tests:hlo_test_base", 1342 "//tensorflow/compiler/xla/tests:xla_internal_test_main", # fixdeps: keep 1343 "//tensorflow/core:test", 1344 ], 1345) 1346 1347cc_library( 1348 name = "cudnn_support_utils", 1349 srcs = ["cudnn_support_utils.cc"], 1350 hdrs = ["cudnn_support_utils.h"], 1351 deps = [ 1352 ":cublas_cudnn", 1353 "//tensorflow/compiler/xla:comparison_util", 1354 "//tensorflow/compiler/xla:util", 1355 "//tensorflow/compiler/xla:window_util", 1356 "//tensorflow/compiler/xla/service:hlo", 1357 "//tensorflow/core/platform:status", 1358 "//tensorflow/stream_executor:stream_header", 1359 ], 1360) 1361 1362tf_cc_test( 1363 name = "cudnn_support_utils_test", 1364 srcs = ["cudnn_support_utils_test.cc"], 1365 tags = tf_cuda_tests_tags(), 1366 deps = [ 1367 ":cudnn_support_utils", 1368 "//tensorflow/compiler/xla:status_macros", 1369 "//tensorflow/compiler/xla:test", 1370 "//tensorflow/compiler/xla:util", 1371 "//tensorflow/compiler/xla/service:hlo", 1372 "//tensorflow/compiler/xla/service:hlo_parser", 1373 "//tensorflow/compiler/xla/service:pattern_matcher", 1374 "//tensorflow/compiler/xla/service:pattern_matcher_gmock", 1375 "//tensorflow/compiler/xla/tests:hlo_test_base", 1376 "//tensorflow/compiler/xla/tests:verified_hlo_module", 1377 "//tensorflow/compiler/xla/tests:xla_internal_test_main", 1378 "//tensorflow/core/platform:errors", 1379 "//tensorflow/core/platform:status", 1380 "//tensorflow/core/platform:status_matchers", 1381 "//tensorflow/stream_executor:device_description", 1382 "//tensorflow/stream_executor:stream_header", 1383 "@com_google_absl//absl/status", 1384 "@com_google_absl//absl/strings", 1385 ], 1386) 1387 1388cc_library( 1389 name = "cudnn_pad_for_convolutions", 1390 srcs = ["cudnn_pad_for_convolutions.cc"], 1391 hdrs = ["cudnn_pad_for_convolutions.h"], 1392 deps = [ 1393 ":cudnn_support_utils", 1394 ":ir_emission_utils", 1395 ":stream_executor_util", 1396 "//tensorflow/compiler/xla:literal_util", 1397 "//tensorflow/compiler/xla:util", 1398 "//tensorflow/compiler/xla:window_util", 1399 "//tensorflow/compiler/xla/service:hlo", 1400 "//tensorflow/compiler/xla/service:hlo_pass", 1401 "//tensorflow/core/platform:status", 1402 "//tensorflow/stream_executor:stream_header", 1403 "@com_google_absl//absl/functional:bind_front", 1404 ], 1405) 1406 1407tf_cc_test( 1408 name = "cudnn_pad_for_convolutions_test", 1409 srcs = ["cudnn_pad_for_convolutions_test.cc"], 1410 tags = tf_cuda_tests_tags(), 1411 deps = [ 1412 ":cublas_cudnn", 1413 ":cudnn_pad_for_convolutions", 1414 "//tensorflow/compiler/xla:status_macros", 1415 "//tensorflow/compiler/xla:test", 1416 "//tensorflow/compiler/xla:util", 1417 "//tensorflow/compiler/xla/service:hlo_parser", 1418 "//tensorflow/compiler/xla/service:pattern_matcher", 1419 "//tensorflow/compiler/xla/service:pattern_matcher_gmock", 1420 "//tensorflow/compiler/xla/tests:hlo_test_base", 1421 "//tensorflow/compiler/xla/tests:xla_internal_test_main", # build_cleaner: keep 1422 "//tensorflow/core:test", 1423 ], 1424) 1425 1426cc_library( 1427 name = "cudnn_vectorize_convolutions", 1428 srcs = ["cudnn_vectorize_convolutions.cc"], 1429 hdrs = ["cudnn_vectorize_convolutions.h"], 1430 deps = [ 1431 ":cudnn_support_utils", 1432 ":stream_executor_util", 1433 "//tensorflow/compiler/xla:statusor", 1434 "//tensorflow/compiler/xla/client:xla_builder", 1435 "//tensorflow/compiler/xla/service:call_inliner", 1436 "//tensorflow/compiler/xla/service:hlo", 1437 "//tensorflow/compiler/xla/service:hlo_pass", 1438 ], 1439) 1440 1441tf_cc_test( 1442 name = "cudnn_vectorize_convolutions_test", 1443 srcs = ["cudnn_vectorize_convolutions_test.cc"], 1444 tags = tf_cuda_tests_tags(), 1445 deps = [ 1446 ":cublas_cudnn", 1447 ":cudnn_vectorize_convolutions", 1448 "//tensorflow/compiler/xla:status_macros", 1449 "//tensorflow/compiler/xla:util", 1450 "//tensorflow/compiler/xla/service:call_inliner", 1451 "//tensorflow/compiler/xla/service:hlo_parser", 1452 "//tensorflow/compiler/xla/service:pattern_matcher", 1453 "//tensorflow/compiler/xla/service:pattern_matcher_gmock", 1454 "//tensorflow/compiler/xla/tests:hlo_test_base", 1455 "//tensorflow/compiler/xla/tests:xla_internal_test_main", # build_cleaner: keep 1456 "//tensorflow/core/platform:statusor", 1457 ], 1458) 1459 1460cc_library( 1461 name = "cudnn_simplify_padding", 1462 srcs = ["cudnn_simplify_padding.cc"], 1463 hdrs = ["cudnn_simplify_padding.h"], 1464 deps = [ 1465 ":cublas_cudnn", 1466 "//tensorflow/compiler/xla:statusor", 1467 "//tensorflow/compiler/xla:xla_data_proto_cc", 1468 "//tensorflow/compiler/xla/service:hlo_creation_utils", 1469 "//tensorflow/compiler/xla/service:hlo_pass", 1470 "//tensorflow/compiler/xla/service:pattern_matcher", 1471 ], 1472) 1473 1474tf_cc_test( 1475 name = "cudnn_simplify_padding_test", 1476 srcs = ["cudnn_simplify_padding_test.cc"], 1477 tags = tf_cuda_tests_tags(), 1478 deps = [ 1479 ":cublas_cudnn", 1480 ":cudnn_pad_for_convolutions", 1481 ":cudnn_simplify_padding", 1482 ":cudnn_vectorize_convolutions", 1483 "//tensorflow/compiler/xla:status_macros", 1484 "//tensorflow/compiler/xla:util", 1485 "//tensorflow/compiler/xla/service:algebraic_simplifier", 1486 "//tensorflow/compiler/xla/service:call_inliner", 1487 "//tensorflow/compiler/xla/service:hlo_pass", 1488 "//tensorflow/compiler/xla/service:pattern_matcher", 1489 "//tensorflow/compiler/xla/service:pattern_matcher_gmock", 1490 "//tensorflow/compiler/xla/service:tuple_simplifier", 1491 "//tensorflow/compiler/xla/tests:hlo_test_base", 1492 "//tensorflow/compiler/xla/tests:xla_internal_test_main", # build_cleaner: keep 1493 "//tensorflow/core:test", 1494 "//tensorflow/core/platform:statusor", 1495 ], 1496) 1497 1498cc_library( 1499 name = "cublas_pad_for_gemms", 1500 srcs = ["cublas_pad_for_gemms.cc"], 1501 hdrs = ["cublas_pad_for_gemms.h"], 1502 deps = [ 1503 ":ir_emission_utils", 1504 "//tensorflow/compiler/xla:literal_util", 1505 "//tensorflow/compiler/xla:util", 1506 "//tensorflow/compiler/xla:window_util", 1507 "//tensorflow/compiler/xla/service:hlo", 1508 "//tensorflow/compiler/xla/service:hlo_pass", 1509 ], 1510) 1511 1512tf_cc_test( 1513 name = "cublas_pad_for_gemms_test", 1514 srcs = ["cublas_pad_for_gemms_test.cc"], 1515 tags = ["no_pip"], 1516 deps = [ 1517 ":cublas_pad_for_gemms", 1518 ":ir_emission_utils", 1519 "//tensorflow/compiler/xla:status_macros", 1520 "//tensorflow/compiler/xla:util", 1521 "//tensorflow/compiler/xla/service:hlo_matchers", 1522 "//tensorflow/compiler/xla/service:hlo_parser", 1523 "//tensorflow/compiler/xla/tests:hlo_test_base", 1524 "//tensorflow/compiler/xla/tests:test_utils", 1525 "//tensorflow/compiler/xla/tests:xla_internal_test_main", # build_cleaner: keep 1526 ], 1527) 1528 1529cc_library( 1530 name = "target_constants", 1531 hdrs = ["target_constants.h"], 1532) 1533 1534cc_library( 1535 name = "gpu_transfer_manager", 1536 srcs = ["gpu_transfer_manager.cc"], 1537 hdrs = ["gpu_transfer_manager.h"], 1538 deps = [ 1539 ":io_feed_manager", 1540 ":target_constants", 1541 "//tensorflow/compiler/xla:literal", 1542 "//tensorflow/compiler/xla:literal_util", 1543 "//tensorflow/compiler/xla:shape_tree", 1544 "//tensorflow/compiler/xla:shape_util", 1545 "//tensorflow/compiler/xla:status_macros", 1546 "//tensorflow/compiler/xla:statusor", 1547 "//tensorflow/compiler/xla:types", 1548 "//tensorflow/compiler/xla:util", 1549 "//tensorflow/compiler/xla:xla_data_proto_cc", 1550 "//tensorflow/compiler/xla/service:compiler", 1551 "//tensorflow/compiler/xla/service:generic_transfer_manager", 1552 "//tensorflow/compiler/xla/service:transfer_manager", 1553 "//tensorflow/core:lib", 1554 "//tensorflow/core/platform:stream_executor_no_cuda", 1555 "//tensorflow/stream_executor:stream_header", 1556 "@com_google_absl//absl/cleanup", 1557 "@com_google_absl//absl/memory", 1558 "@llvm-project//llvm:Core", 1559 ], 1560 alwayslink = True, # Contains per-platform transfer manager registration 1561) 1562 1563cc_library( 1564 name = "gpu_reduce_scatter_creator", 1565 srcs = ["gpu_reduce_scatter_creator.cc"], 1566 hdrs = ["gpu_reduce_scatter_creator.h"], 1567 deps = [ 1568 "//tensorflow/compiler/xla/service:hlo", 1569 "//tensorflow/compiler/xla/service:hlo_pass", 1570 "//tensorflow/compiler/xla/service:hlo_query", 1571 "//tensorflow/compiler/xla/service:reduce_scatter_utils", 1572 ], 1573) 1574 1575cc_library( 1576 name = "gpu_compiler", 1577 srcs = [ 1578 "gpu_compiler.cc", 1579 ], 1580 hdrs = [ 1581 "gpu_compiler.h", 1582 ], 1583 local_defines = select({ 1584 ":is_xlir_enabled": ["XLA_ENABLE_XLIR=1"], 1585 "//conditions:default": [], 1586 }), 1587 deps = [ 1588 ":alias_passthrough_params", 1589 ":all_reduce_blueconnect", 1590 ":fusion_bitcast_lift", 1591 ":fusion_merger", 1592 ":gemm_broadcast_folding_rewriter", 1593 ":gemm_rewriter", 1594 ":gpu_constants", 1595 ":gpu_conv_algorithm_picker", 1596 ":gpu_conv_rewriter", 1597 ":gpu_device_info", 1598 ":gpu_executable", 1599 ":gpu_hlo_schedule", 1600 ":gpu_layout_assignment", 1601 ":gpu_reduce_scatter_creator", 1602 ":gpu_sanitize_constant_names", 1603 ":gpu_scatter_expander", 1604 ":gpu_shape_verifier", 1605 ":matmul_utils", 1606 "@llvm-project//mlir:FuncDialect", 1607 "//tensorflow/compiler/xla/service/spmd:stateful_rng_spmd_partitioner", 1608 ":gpu_hlo_cost_analysis", 1609 ":horizontal_input_fusion", 1610 ":horizontal_loop_fusion", 1611 ":instruction_fusion", 1612 ":ir_emission_utils", 1613 ":ir_emitter", 1614 ":launch_dimensions", 1615 ":metrics", 1616 ":multi_output_fusion", 1617 ":nccl_collective_thunks", 1618 ":reduction_degenerate_dim_remover", 1619 ":reduction_dimension_grouper", 1620 ":reduction_layout_normalizer", 1621 ":reduction_splitter", 1622 ":hlo_fusion_stats", 1623 ":stream_executor_util", 1624 ":target_constants", 1625 ":tree_reduction_rewriter", 1626 ":variadic_op_splitter", 1627 "//tensorflow/compiler/xla/service:gather_simplifier", 1628 "//tensorflow/compiler/xla/service:layout_normalization", 1629 "@com_google_absl//absl/memory", 1630 "@com_google_absl//absl/strings", 1631 "@com_google_absl//absl/types:variant", 1632 "@llvm-project//llvm:AsmParser", 1633 "@llvm-project//llvm:BitReader", 1634 "@llvm-project//llvm:BitWriter", 1635 "@llvm-project//llvm:Core", 1636 "@llvm-project//llvm:TransformUtils", 1637 "@llvm-project//mlir:AllPassesAndDialects", 1638 "@llvm-project//mlir:ArithmeticDialect", 1639 "@llvm-project//mlir:GPUTransforms", 1640 "@llvm-project//mlir:IR", 1641 "@llvm-project//mlir:Pass", 1642 "@llvm-project//mlir:Transforms", 1643 "//tensorflow/compiler/mlir:name_utils", 1644 "//tensorflow/compiler/xla/mlir_hlo:gpu_fusion_rewrite", 1645 "//tensorflow/compiler/mlir/xla:hlo_utils", 1646 "//tensorflow/compiler/mlir/xla:mhlo_to_lhlo_with_xla", 1647 "//tensorflow/compiler/mlir/xla:type_to_shape", 1648 "//tensorflow/compiler/xla/service:bitcast_dtypes_expander", 1649 "//tensorflow/compiler/xla/service:scatter_simplifier", 1650 "//tensorflow/compiler/xla/service:simplify_fp_conversions", 1651 "//tensorflow/compiler/xla:protobuf_util", 1652 "//tensorflow/compiler/xla:status_macros", 1653 "//tensorflow/compiler/xla:statusor", 1654 "//tensorflow/compiler/xla:types", 1655 "//tensorflow/compiler/xla:util", 1656 "//tensorflow/compiler/xla/service:broadcast_canonicalizer", 1657 "//tensorflow/compiler/xla/service:reduce_decomposer", 1658 "//tensorflow/compiler/xla/service:algebraic_simplifier", 1659 "//tensorflow/compiler/xla/service:all_gather_broadcast_reorder", 1660 "//tensorflow/compiler/xla/service:dynamic_dimension_simplifier", 1661 "//tensorflow/compiler/xla/service:all_gather_combiner", 1662 "//tensorflow/compiler/xla/service:all_gather_decomposer", 1663 "//tensorflow/compiler/xla/service:all_reduce_combiner", 1664 "//tensorflow/compiler/xla/service:all_reduce_contiguous", 1665 "//tensorflow/compiler/xla/service:all_reduce_folder", 1666 "//tensorflow/compiler/xla/service:all_reduce_reassociate", 1667 "//tensorflow/compiler/xla/service:all_to_all_decomposer", 1668 "//tensorflow/compiler/xla/service:async_collective_creator", 1669 "//tensorflow/compiler/xla/service:batchnorm_expander", 1670 "//tensorflow/compiler/xla/service:bfloat16_normalization", 1671 "//tensorflow/compiler/xla/service:bitcast_decomposer", 1672 "//tensorflow/compiler/xla/service:buffer_assignment", 1673 "//tensorflow/compiler/xla/service:call_inliner", 1674 "//tensorflow/compiler/xla/service:collectives_schedule_linearizer", 1675 "//tensorflow/compiler/xla/service:comparison_expander", 1676 "//tensorflow/compiler/xla/service:conditional_canonicalizer", 1677 "//tensorflow/compiler/xla/service:convert_mover", 1678 "//tensorflow/compiler/xla/service:conditional_simplifier", 1679 "//tensorflow/compiler/xla/service:convolution_4d_expander", 1680 "//tensorflow/compiler/xla/service:convolution_pred_expander", 1681 "//tensorflow/compiler/xla/service:copy_insertion", 1682 "//tensorflow/compiler/xla/service:dot_decomposer", 1683 "//tensorflow/compiler/xla/service:dot_merger", 1684 "//tensorflow/compiler/xla/service:dump", 1685 "//tensorflow/compiler/xla/service:dynamic_index_splitter", 1686 "//tensorflow/compiler/xla/service:dynamic_padder", 1687 "//tensorflow/compiler/xla/service:eigh_expander", 1688 "//tensorflow/compiler/xla/service:executable", 1689 "//tensorflow/compiler/xla/service:flatten_call_graph", 1690 "//tensorflow/compiler/xla/service:gather_expander", 1691 "//tensorflow/compiler/xla/service:hlo", 1692 "//tensorflow/compiler/xla/service:hlo_constant_folding", 1693 "//tensorflow/compiler/xla/service:hlo_cse", 1694 "//tensorflow/compiler/xla/service:hlo_dataflow_analysis", 1695 "//tensorflow/compiler/xla/service:hlo_dce", 1696 "//tensorflow/compiler/xla/service:hlo_parser", 1697 "//tensorflow/compiler/xla/service:hlo_pass", 1698 "//tensorflow/compiler/xla/service:hlo_pass_pipeline", 1699 "//tensorflow/compiler/xla/service:hlo_proto_util", 1700 "//tensorflow/compiler/xla/service:hlo_subcomputation_unification", 1701 "//tensorflow/compiler/xla/service:hlo_verifier", 1702 "//tensorflow/compiler/xla/service:llvm_compiler", 1703 "//tensorflow/compiler/xla/service:logistic_expander", 1704 "//tensorflow/compiler/xla/service:loop_schedule_linearizer", 1705 "//tensorflow/compiler/xla/service:optimization_barrier_expander", 1706 "//tensorflow/compiler/xla/service:operand_upcaster", 1707 "//tensorflow/compiler/xla/service:qr_expander", 1708 "//tensorflow/compiler/xla/service:real_imag_expander", 1709 "//tensorflow/compiler/xla/service:reduce_scatter_combiner", 1710 "//tensorflow/compiler/xla/service:reshape_decomposer", 1711 "//tensorflow/compiler/xla/service:reshape_mover", 1712 "//tensorflow/compiler/xla/service:result_caster", 1713 "//tensorflow/compiler/xla/service:rng_bit_generator_expander", 1714 "//tensorflow/compiler/xla/service:rng_expander", 1715 "//tensorflow/compiler/xla/service:scatter_expander", 1716 "//tensorflow/compiler/xla/service:sharding_propagation", 1717 "//tensorflow/compiler/xla/service:sharding_remover", 1718 "//tensorflow/compiler/xla/service:slice_sinker", 1719 "//tensorflow/compiler/xla/service:slow_operation_alarm", 1720 "//tensorflow/compiler/xla/service:sort_simplifier", 1721 "//tensorflow/compiler/xla/service:stable_sort_expander", 1722 "//tensorflow/compiler/xla/service:transpose_folding", 1723 "//tensorflow/compiler/xla/service:tuple_simplifier", 1724 "//tensorflow/compiler/xla/service:while_loop_constant_sinking", 1725 "//tensorflow/compiler/xla/service:while_loop_simplifier", 1726 "//tensorflow/compiler/xla/service:while_loop_trip_count_annotator", 1727 "//tensorflow/compiler/xla/service:zero_sized_hlo_elimination", 1728 "//tensorflow/compiler/xla/service/gpu/llvm_gpu_backend", 1729 "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", 1730 "//tensorflow/core:lib", 1731 "//tensorflow/core:lib_internal", 1732 "//tensorflow/core/platform:regexp", 1733 "//tensorflow/core/platform:stream_executor_no_cuda", 1734 "//tensorflow/core/profiler/lib:traceme", 1735 "//tensorflow/stream_executor:stream_executor_headers", 1736 ":runtime_intrinsics", 1737 ] + select({ 1738 ":is_xlir_enabled": [ 1739 ":jitrt_custom_calls", 1740 "//tensorflow/compiler/xla/runtime:jit_executable", 1741 "//tensorflow/compiler/mlir/tfrt/transforms/lmhlo_to_gpu:pass_utils", 1742 "//tensorflow/compiler/xla/mlir/transforms/runtime:compilation_pipeline", 1743 ], 1744 "//conditions:default": [], 1745 }), 1746) 1747 1748cc_library( 1749 name = "nvptx_compiler", 1750 srcs = if_cuda_is_configured([ 1751 "nvptx_compiler_registration.cc", 1752 ]), 1753 deps = if_cuda_is_configured([ 1754 ":nvptx_compiler_impl", 1755 ]), 1756 alwayslink = True, # Contains compiler registration 1757) 1758 1759cc_library( 1760 name = "nvptx_helper", 1761 srcs = ["nvptx_helper.cc"], 1762 hdrs = ["nvptx_helper.h"], 1763 deps = [ 1764 "//tensorflow/compiler/xla/service:hlo_module_config", 1765 "//tensorflow/core:lib", 1766 "//tensorflow/core:lib_internal", 1767 "//tensorflow/core/platform:cuda_libdevice_path", 1768 "@com_google_absl//absl/strings", 1769 ], 1770) 1771 1772cc_library( 1773 name = "nvptx_compiler_impl", 1774 srcs = if_cuda_is_configured([ 1775 "nvptx_compiler.cc", 1776 ]), 1777 hdrs = if_cuda_is_configured([ 1778 "nvptx_compiler.h", 1779 ]), 1780 deps = if_cuda_is_configured([ 1781 ":cublas_cudnn", 1782 ":cublas_pad_for_gemms", 1783 ":cudnn_fused_conv_rewriter", 1784 ":cudnn_pad_for_convolutions", 1785 ":cudnn_simplify_padding", 1786 ":cudnn_vectorize_convolutions", 1787 ":cusolver_rewriter", 1788 ":gemm_algorithm_picker", 1789 ":gpu_asm_opts_util", 1790 ":gpu_compiler", 1791 ":gpu_conv_padding_legalization", 1792 ":gpu_conv_rewriter", 1793 ":gpu_executable", 1794 ":gpu_layout_assignment", 1795 ":ir_emission_utils", 1796 ":metrics", 1797 ":nvptx_helper", 1798 ":target_constants", 1799 "@com_google_absl//absl/base", 1800 "@com_google_absl//absl/container:node_hash_map", 1801 "@llvm-project//llvm:IRReader", 1802 "@llvm-project//llvm:Support", 1803 "//tensorflow/compiler/xla/service:algebraic_simplifier", 1804 "//tensorflow/compiler/xla/service:call_inliner", 1805 "//tensorflow/compiler/xla/service:dump", 1806 "//tensorflow/compiler/xla/service/gpu/llvm_gpu_backend", 1807 "//tensorflow/compiler/xla/service:hlo", 1808 "//tensorflow/compiler/xla/service:hlo_constant_folding", 1809 "//tensorflow/compiler/xla/service:hlo_cse", 1810 "//tensorflow/compiler/xla/service:hlo_pass", 1811 "//tensorflow/compiler/xla/service:hlo_pass_pipeline", 1812 "//tensorflow/compiler/xla/service:hlo_proto_cc", 1813 "//tensorflow/compiler/xla/service:hlo_verifier", 1814 "//tensorflow/compiler/xla/service:llvm_compiler", 1815 "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", 1816 "//tensorflow/compiler/xla/service:tuple_simplifier", 1817 "//tensorflow/compiler/xla:status_macros", 1818 "//tensorflow/compiler/xla:statusor", 1819 "//tensorflow/compiler/xla:types", 1820 "//tensorflow/compiler/xla:util", 1821 "//tensorflow/core:lib", 1822 "//tensorflow/core:lib_internal", 1823 "//tensorflow/core/platform:cuda_libdevice_path", 1824 "//tensorflow/core/profiler/lib:traceme", 1825 "//tensorflow/stream_executor/cuda:cuda_diagnostics", 1826 "//tensorflow/stream_executor/gpu:asm_compiler", 1827 "//tensorflow/stream_executor/gpu:gpu_driver_header", 1828 "//tensorflow/stream_executor:stream_executor_headers", 1829 ":triangular_solve_rewriter", 1830 ]), 1831) 1832 1833tf_cc_test( 1834 name = "nvptx_compiler_test", 1835 srcs = if_gpu_is_configured([ 1836 "nvptx_compiler_test.cc", 1837 ]), 1838 tags = [ 1839 "gpu", 1840 "no_rocm", 1841 "nomsan", # Pulls in precompiled NVIDIA libraries which cause false 1842 # positives in msan. 1843 ], 1844 deps = [ 1845 ":nvptx_compiler_impl", 1846 "//tensorflow/compiler/xla:status_macros", 1847 "//tensorflow/compiler/xla:util", 1848 "//tensorflow/compiler/xla/service:buffer_assignment", 1849 "//tensorflow/compiler/xla/service:hlo", 1850 "//tensorflow/compiler/xla/service:hlo_parser", 1851 "//tensorflow/compiler/xla/tests:hlo_test_base", 1852 "//tensorflow/compiler/xla/tests:xla_internal_test_main", # build_cleaner: keep 1853 ], 1854) 1855 1856# TODO(ezhulenev): This test breaks MacOS build, try to re-enable it later. 1857# copybara:uncomment_begin 1858# tf_cc_test( 1859# name = "gpu_aot_compilation_test", 1860# srcs = [ 1861# "gpu_aot_compilation_test.cc", 1862# ], 1863# env = { 1864# "XLA_FLAGS": "--xla_gpu_jitrt_executable", 1865# }, 1866# tags = [ 1867# "gpu", 1868# "no_oss", 1869# "no_rocm", 1870# "nomsan", # Pulls in precompiled NVIDIA libraries which cause false positives in msan. 1871# "requires-gpu-nvidia", 1872# ], 1873# deps = [ 1874# ":nvptx_compiler_impl", 1875# "//tensorflow/compiler/xla/tests:hlo_test_base", 1876# "//tensorflow/compiler/xla/tests:xla_internal_test_main", # build_cleaner: keep 1877# ], 1878# ) 1879# copybara:uncomment_end 1880 1881cc_library( 1882 name = "amdgpu_compiler", 1883 srcs = if_rocm_is_configured([ 1884 "amdgpu_compiler_registration.cc", 1885 ]), 1886 deps = if_rocm_is_configured([ 1887 ":amdgpu_compiler_impl", 1888 ]), 1889 alwayslink = True, # Contains compiler registration 1890) 1891 1892cc_library( 1893 name = "amdgpu_compiler_impl", 1894 srcs = if_rocm_is_configured([ 1895 "amdgpu_compiler.cc", 1896 ]), 1897 hdrs = if_rocm_is_configured([ 1898 "amdgpu_compiler.h", 1899 ]), 1900 deps = if_rocm_is_configured([ 1901 ":cusolver_rewriter", 1902 ":gemm_rewriter", 1903 ":gpu_compiler", 1904 ":gpu_conv_algorithm_picker", 1905 ":gpu_conv_padding_legalization", 1906 ":gpu_conv_rewriter", 1907 ":gpu_layout_assignment", 1908 ":reduction_degenerate_dim_remover", 1909 ":reduction_dimension_grouper", 1910 ":reduction_layout_normalizer", 1911 ":target_constants", 1912 ":tree_reduction_rewriter", 1913 ":triangular_solve_rewriter", 1914 "//tensorflow/compiler/xla:statusor", 1915 "//tensorflow/compiler/xla/service:algebraic_simplifier", 1916 "//tensorflow/compiler/xla/service:call_inliner", 1917 "//tensorflow/compiler/xla/service:hlo", 1918 "//tensorflow/compiler/xla/service:hlo_constant_folding", 1919 "//tensorflow/compiler/xla/service:hlo_cse", 1920 "//tensorflow/compiler/xla/service:hlo_pass", 1921 "//tensorflow/compiler/xla/service:hlo_pass_pipeline", 1922 "//tensorflow/compiler/xla/service:hlo_verifier", 1923 "//tensorflow/compiler/xla/service:tuple_simplifier", 1924 "//tensorflow/compiler/xla/service/gpu/llvm_gpu_backend", 1925 "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", 1926 "//tensorflow/core/platform:rocm_rocdl_path", 1927 ]), 1928) 1929 1930cc_library( 1931 name = "all_reduce_blueconnect", 1932 srcs = ["all_reduce_blueconnect.cc"], 1933 hdrs = ["all_reduce_blueconnect.h"], 1934 deps = [ 1935 "//tensorflow/compiler/xla:shape_util", 1936 "//tensorflow/compiler/xla:status_macros", 1937 "//tensorflow/compiler/xla:statusor", 1938 "//tensorflow/compiler/xla/service:hlo", 1939 "//tensorflow/compiler/xla/service:hlo_creation_utils", 1940 "//tensorflow/compiler/xla/service:hlo_pass", 1941 "//tensorflow/compiler/xla/service:hlo_query", 1942 "@com_google_absl//absl/algorithm:container", 1943 "@com_google_absl//absl/container:btree", 1944 "@com_google_absl//absl/types:span", 1945 ], 1946) 1947 1948tf_cc_test( 1949 name = "all_reduce_blueconnect_test", 1950 srcs = ["all_reduce_blueconnect_test.cc"], 1951 deps = [ 1952 ":all_reduce_blueconnect", 1953 "//tensorflow/compiler/xla/service:hlo", 1954 "//tensorflow/compiler/xla/service:hlo_matchers", 1955 "//tensorflow/compiler/xla/tests:hlo_test_base", 1956 "//tensorflow/compiler/xla/tests:test_utils", 1957 "//tensorflow/core:test_main", 1958 "//tensorflow/core/platform:status_matchers", 1959 ], 1960) 1961 1962cc_library( 1963 name = "xfeed_queue", 1964 hdrs = ["xfeed_queue.h"], 1965 deps = [ 1966 "//tensorflow/core:lib", 1967 "@com_google_absl//absl/base:core_headers", 1968 ], 1969) 1970 1971cc_library( 1972 name = "io_feed_manager", 1973 srcs = [ 1974 "infeed_manager.cc", 1975 "outfeed_manager.cc", 1976 "xla_executor_state.h", 1977 ], 1978 hdrs = [ 1979 "infeed_manager.h", 1980 "outfeed_manager.h", 1981 ], 1982 copts = if_cuda_is_configured(["-DGOOGLE_CUDA=1"]), 1983 deps = [ 1984 ":xfeed_queue", 1985 "//tensorflow/compiler/xla:literal", 1986 "//tensorflow/compiler/xla:shape_tree", 1987 "//tensorflow/compiler/xla:shape_util", 1988 "//tensorflow/compiler/xla:types", 1989 "//tensorflow/compiler/xla:util", 1990 "//tensorflow/core:lib", 1991 "//tensorflow/core/platform:stream_executor_no_cuda", 1992 "//tensorflow/stream_executor/gpu:gpu_executor_header", 1993 "@com_google_absl//absl/base:core_headers", 1994 "@com_google_absl//absl/memory", 1995 ], 1996) 1997 1998cc_library( 1999 name = "gpu_shape_verifier", 2000 srcs = ["gpu_shape_verifier.cc"], 2001 hdrs = ["gpu_shape_verifier.h"], 2002 deps = [ 2003 "//tensorflow/compiler/xla/service:hlo_verifier", 2004 ], 2005) 2006 2007cc_library( 2008 name = "gpu_layout_assignment", 2009 srcs = ["gpu_layout_assignment.cc"], 2010 hdrs = ["gpu_layout_assignment.h"], 2011 deps = [ 2012 ":backend_configs_cc", 2013 ":ir_emission_utils", 2014 ":matmul_utils", 2015 ":stream_executor_util", 2016 "//tensorflow/compiler/xla:shape_util", 2017 "//tensorflow/compiler/xla:status_macros", 2018 "//tensorflow/compiler/xla:window_util", 2019 "//tensorflow/compiler/xla:xla_data_proto_cc", 2020 "//tensorflow/compiler/xla/service:computation_layout", 2021 "//tensorflow/compiler/xla/service:hlo", 2022 "//tensorflow/compiler/xla/service:layout_assignment", 2023 "//tensorflow/core:lib", 2024 "//tensorflow/core/platform:stream_executor_no_cuda", 2025 "@com_google_absl//absl/algorithm:container", 2026 "@com_google_absl//absl/types:span", 2027 ], 2028) 2029 2030tf_cc_test( 2031 name = "gpu_layout_assignment_test", 2032 srcs = ["gpu_layout_assignment_test.cc"], 2033 tags = tf_cuda_tests_tags(), 2034 deps = [ 2035 ":cublas_cudnn", 2036 ":gemm_rewriter", 2037 ":gpu_layout_assignment", 2038 "//tensorflow/compiler/xla:shape_layout", 2039 "//tensorflow/compiler/xla:shape_util", 2040 "//tensorflow/compiler/xla:xla_data_proto_cc", 2041 "//tensorflow/compiler/xla/service:computation_layout", 2042 "//tensorflow/compiler/xla/service:hlo", 2043 "//tensorflow/compiler/xla/service:hlo_matchers", 2044 "//tensorflow/compiler/xla/service:hlo_parser", 2045 "//tensorflow/compiler/xla/tests:hlo_test_base", 2046 "//tensorflow/compiler/xla/tests:xla_internal_test_main", # build_cleaner: keep 2047 "//tensorflow/core/platform:status_matchers", 2048 "//tensorflow/stream_executor/lib", 2049 "@com_google_absl//absl/strings", 2050 ], 2051) 2052 2053cc_library( 2054 name = "gpu_hlo_schedule", 2055 srcs = ["gpu_hlo_schedule.cc"], 2056 hdrs = ["gpu_hlo_schedule.h"], 2057 deps = [ 2058 "//tensorflow/compiler/xla:statusor", 2059 "//tensorflow/compiler/xla/service:buffer_value", 2060 "//tensorflow/compiler/xla/service:hlo", 2061 "//tensorflow/compiler/xla/service:hlo_memory_scheduler", 2062 "//tensorflow/compiler/xla/service:hlo_ordering", 2063 "@com_google_absl//absl/container:flat_hash_map", 2064 "@com_google_absl//absl/memory", 2065 ], 2066) 2067 2068tf_cc_test( 2069 name = "gpu_hlo_schedule_test", 2070 srcs = [ 2071 "gpu_hlo_schedule_test.cc", 2072 ], 2073 tags = ["no_pip"], 2074 deps = [ 2075 ":gpu_hlo_schedule", 2076 "//tensorflow/compiler/xla:test_helpers", 2077 "//tensorflow/compiler/xla:types", 2078 "//tensorflow/compiler/xla/service:hlo", 2079 "//tensorflow/compiler/xla/tests:hlo_test_base", 2080 "//tensorflow/compiler/xla/tests:test_utils", 2081 "//tensorflow/compiler/xla/tests:xla_internal_test_main", 2082 "@com_google_absl//absl/container:flat_hash_set", 2083 "@com_google_absl//absl/memory", 2084 "@com_google_absl//absl/strings:str_format", 2085 ], 2086) 2087 2088tf_cc_test( 2089 name = "while_transformer_test", 2090 srcs = ["while_transformer_test.cc"], 2091 tags = ["no_pip"], 2092 deps = [ 2093 ":instruction_fusion", 2094 "//tensorflow/compiler/xla:shape_util", 2095 "//tensorflow/compiler/xla:test", 2096 "//tensorflow/compiler/xla:test_helpers", 2097 "//tensorflow/compiler/xla/service:copy_insertion", 2098 "//tensorflow/compiler/xla/service:hlo_verifier", 2099 "//tensorflow/compiler/xla/service:while_loop_analysis", 2100 "//tensorflow/compiler/xla/tests:hlo_test_base", 2101 "//tensorflow/compiler/xla/tests:xla_internal_test_main", 2102 "//tensorflow/core:test", 2103 ], 2104) 2105 2106cc_library( 2107 name = "stream_executor_util", 2108 srcs = ["stream_executor_util.cc"], 2109 hdrs = ["stream_executor_util.h"], 2110 copts = tf_copts(), 2111 deps = [ 2112 ":cublas_cudnn", 2113 ":launch_dimensions", 2114 "//tensorflow/compiler/xla:shape_util", 2115 "//tensorflow/compiler/xla:statusor", 2116 "//tensorflow/compiler/xla:types", 2117 "//tensorflow/compiler/xla:util", 2118 "//tensorflow/compiler/xla:xla_data_proto_cc", 2119 "//tensorflow/compiler/xla/service:hlo", 2120 "//tensorflow/compiler/xla/service:hlo_module_config", 2121 "//tensorflow/core:lib", 2122 "//tensorflow/core:lib_internal", 2123 "//tensorflow/core/platform:cuda_libdevice_path", 2124 "//tensorflow/core/platform:regexp", 2125 "//tensorflow/core/platform:stream_executor_no_cuda", 2126 "//tensorflow/core/profiler/lib:traceme", 2127 "//tensorflow/core/protobuf:autotuning_proto_cc", 2128 "//tensorflow/core/util:determinism_for_kernels", 2129 "//tensorflow/core/util/proto:proto_utils", 2130 "//tensorflow/stream_executor:kernel_spec", 2131 "@com_google_absl//absl/memory", 2132 "@com_google_absl//absl/strings", 2133 "@com_google_absl//absl/types:span", 2134 ], 2135) 2136 2137cc_library( 2138 name = "gpu_asm_opts_util", 2139 srcs = ["gpu_asm_opts_util.cc"], 2140 hdrs = ["gpu_asm_opts_util.h"], 2141 copts = tf_copts(), 2142 deps = [ 2143 "//tensorflow/compiler/xla:xla_proto_cc", 2144 "//tensorflow/stream_executor/gpu:gpu_asm_opts", 2145 "@com_google_absl//absl/strings", 2146 ], 2147) 2148 2149cc_library( 2150 name = "gpu_hlo_cost_analysis", 2151 srcs = ["gpu_hlo_cost_analysis.cc"], 2152 hdrs = ["gpu_hlo_cost_analysis.h"], 2153 compatible_with = get_compatible_with_cloud(), 2154 deps = [ 2155 ":backend_configs_cc", 2156 ":cublas_cudnn", 2157 "//tensorflow/compiler/xla/service:hlo_cost_analysis", 2158 ], 2159) 2160 2161tf_cc_test( 2162 name = "gpu_hlo_cost_analysis_test", 2163 srcs = ["gpu_hlo_cost_analysis_test.cc"], 2164 deps = [ 2165 ":gpu_hlo_cost_analysis", 2166 "//tensorflow/compiler/xla/tests:hlo_test_base", 2167 "//tensorflow/compiler/xla/tests:xla_internal_test_main", 2168 ], 2169) 2170 2171cc_library( 2172 name = "buffer_comparator", 2173 srcs = if_cuda_is_configured(["buffer_comparator.cc"]), 2174 hdrs = if_cuda_is_configured(["buffer_comparator.h"]), 2175 deps = if_cuda_is_configured([ 2176 ":launch_dimensions", 2177 ":gpu_asm_opts_util", 2178 "@com_google_absl//absl/base", 2179 "@com_google_absl//absl/strings", 2180 "//tensorflow/compiler/xla:shape_util", 2181 "//tensorflow/compiler/xla:status_macros", 2182 "//tensorflow/compiler/xla:util", 2183 "//tensorflow/compiler/xla/service:hlo_module_config", 2184 "//tensorflow/core/platform:stream_executor_no_cuda", 2185 "//tensorflow/stream_executor:stream_executor_headers", 2186 "//tensorflow/stream_executor/gpu:asm_compiler", 2187 ]), 2188) 2189 2190tf_cc_test( 2191 name = "buffer_comparator_test", 2192 srcs = if_cuda_is_configured(["buffer_comparator_test.cc"]), 2193 tags = tf_cuda_tests_tags(), 2194 deps = [ 2195 "//tensorflow/core:test_main", 2196 "//tensorflow/compiler/xla:shape_util", 2197 "//tensorflow/compiler/xla:types", 2198 "//tensorflow/core:test", 2199 ] + if_cuda_is_configured([ 2200 ":buffer_comparator", 2201 "//tensorflow/tsl/platform/default/build_config:stream_executor_cuda", # build_cleaner: keep 2202 "//tensorflow/stream_executor:device_memory", 2203 ]), 2204) 2205 2206cc_library( 2207 name = "gpu_fusible", 2208 srcs = ["gpu_fusible.cc"], 2209 hdrs = ["gpu_fusible.h"], 2210 deps = [ 2211 ":ir_emission_utils", 2212 "//tensorflow/compiler/xla:shape_util", 2213 "//tensorflow/compiler/xla/service:hlo", 2214 "//tensorflow/compiler/xla/service:instruction_fusion", 2215 ], 2216) 2217 2218tf_cc_test( 2219 name = "gpu_fusible_test", 2220 srcs = ["gpu_fusible_test.cc"], 2221 tags = ["no_pip"], 2222 deps = [ 2223 ":gpu_fusible", 2224 "//tensorflow/compiler/xla/service:hlo", 2225 "//tensorflow/compiler/xla/service:hlo_parser", 2226 "//tensorflow/compiler/xla/tests:hlo_test_base", 2227 "//tensorflow/compiler/xla/tests:xla_internal_test_main", 2228 "@com_google_absl//absl/strings", 2229 ], 2230) 2231 2232cc_library( 2233 name = "cudnn_fused_conv_rewriter", 2234 srcs = ["cudnn_fused_conv_rewriter.cc"], 2235 hdrs = ["cudnn_fused_conv_rewriter.h"], 2236 deps = [ 2237 ":backend_configs_cc", 2238 ":cublas_cudnn", 2239 "//tensorflow/compiler/xla:comparison_util", 2240 "//tensorflow/compiler/xla:literal_util", 2241 "//tensorflow/compiler/xla/service:hlo", 2242 "//tensorflow/compiler/xla/service:hlo_creation_utils", 2243 "//tensorflow/compiler/xla/service:hlo_pass", 2244 "//tensorflow/compiler/xla/service:pattern_matcher", 2245 "//tensorflow/compiler/xla/stream_executor:dnn_proto_cc", 2246 "//tensorflow/core/platform:errors", 2247 "//tensorflow/core/platform:statusor", 2248 "//tensorflow/core/platform:stream_executor_no_cuda", 2249 ], 2250) 2251 2252tf_cc_test( 2253 name = "cudnn_fused_conv_rewriter_test", 2254 srcs = ["cudnn_fused_conv_rewriter_test.cc"], 2255 tags = [ 2256 "gpu", 2257 "no_oss", 2258 "noasan", 2259 "nomsan", 2260 "requires-gpu-sm70", 2261 ], 2262 deps = [ 2263 ":backend_configs_cc", 2264 ":cublas_cudnn", 2265 ":cudnn_fused_conv_rewriter", 2266 ":gpu_conv_rewriter", 2267 ":ir_emission_utils", 2268 "//tensorflow/compiler/xla:test_helpers", 2269 "//tensorflow/compiler/xla/service:algebraic_simplifier", 2270 "//tensorflow/compiler/xla/service:convert_mover", 2271 "//tensorflow/compiler/xla/service:hlo_constant_folding", 2272 "//tensorflow/compiler/xla/service:hlo_parser", 2273 "//tensorflow/compiler/xla/service:hlo_pass", 2274 "//tensorflow/compiler/xla/service:hlo_pass_pipeline", 2275 "//tensorflow/compiler/xla/service:pattern_matcher", 2276 "//tensorflow/compiler/xla/service:pattern_matcher_gmock", 2277 "//tensorflow/compiler/xla/service:reshape_mover", 2278 "//tensorflow/compiler/xla/service/gpu/tests:gpu_codegen_test", 2279 "//tensorflow/compiler/xla/tests:filecheck", 2280 "//tensorflow/compiler/xla/tests:hlo_test_base", 2281 "//tensorflow/core:test", 2282 "//tensorflow/core:test_main", 2283 "@com_google_absl//absl/strings", 2284 ], 2285) 2286 2287cc_library( 2288 name = "variadic_op_splitter", 2289 srcs = ["variadic_op_splitter.cc"], 2290 hdrs = ["variadic_op_splitter.h"], 2291 deps = [ 2292 "//tensorflow/compiler/xla:statusor", 2293 "//tensorflow/compiler/xla:util", 2294 "//tensorflow/compiler/xla:xla_data_proto_cc", 2295 "//tensorflow/compiler/xla/service:hlo", 2296 "//tensorflow/compiler/xla/service:hlo_pass", 2297 "//tensorflow/core:lib", 2298 "@com_google_absl//absl/strings", 2299 "@com_google_absl//absl/types:span", 2300 ], 2301) 2302 2303cc_library( 2304 name = "gpu_scatter_expander", 2305 srcs = ["gpu_scatter_expander.cc"], 2306 hdrs = ["gpu_scatter_expander.h"], 2307 deps = [ 2308 "//tensorflow/compiler/xla:statusor", 2309 "//tensorflow/compiler/xla/service:hlo", 2310 "//tensorflow/compiler/xla/service:scatter_expander", 2311 "@com_google_absl//absl/algorithm:container", 2312 ], 2313) 2314 2315tf_cc_test( 2316 name = "variadic_op_splitter_test", 2317 srcs = ["variadic_op_splitter_test.cc"], 2318 tags = ["no_pip"], 2319 deps = [ 2320 ":ir_emission_utils", 2321 ":variadic_op_splitter", 2322 "//tensorflow/compiler/xla:literal_util", 2323 "//tensorflow/compiler/xla:shape_util", 2324 "//tensorflow/compiler/xla:status_macros", 2325 "//tensorflow/compiler/xla:util", 2326 "//tensorflow/compiler/xla:xla_data_proto_cc", 2327 "//tensorflow/compiler/xla/service:hlo", 2328 "//tensorflow/compiler/xla/service:hlo_matchers", 2329 "//tensorflow/compiler/xla/service:hlo_parser", 2330 "//tensorflow/compiler/xla/service:pattern_matcher", 2331 "//tensorflow/compiler/xla/tests:hlo_test_base", 2332 "//tensorflow/compiler/xla/tests:xla_internal_test_main", 2333 ], 2334) 2335 2336tf_proto_library( 2337 name = "gpu_autotuning_proto", 2338 srcs = ["gpu_autotuning.proto"], 2339 cc_api_version = 2, 2340 protodeps = [ 2341 "//tensorflow/compiler/xla:xla_data_proto", 2342 "//tensorflow/compiler/xla/service:hlo_proto", 2343 "//tensorflow/core/protobuf:autotuning_proto", 2344 ], 2345) 2346 2347cc_library( 2348 name = "hlo_algorithm_denylist", 2349 srcs = ["hlo_algorithm_denylist.cc"], 2350 hdrs = ["hlo_algorithm_denylist.h"], 2351 deps = [ 2352 ":gpu_autotuning_proto_cc", 2353 "//tensorflow/compiler/xla:debug_options_flags", 2354 "//tensorflow/core/platform:stream_executor_no_cuda", 2355 "//tensorflow/core/protobuf:autotuning_proto_cc", 2356 "@com_google_absl//absl/container:flat_hash_map", 2357 ], 2358) 2359 2360tf_cc_test( 2361 name = "hlo_algorithm_denylist_test", 2362 srcs = ["hlo_algorithm_denylist_test.cc"], 2363 data = ["data/hlo_algorithm_denylist.pbtxt"], 2364 tags = ["no_pip"], 2365 deps = [ 2366 ":hlo_algorithm_denylist", 2367 "//tensorflow/core:lib", 2368 "//tensorflow/core:test", 2369 "//tensorflow/core:test_main", 2370 "//tensorflow/core/platform:resource_loader", 2371 "//tensorflow/stream_executor:dnn", 2372 ], 2373) 2374 2375cc_library( 2376 name = "alias_passthrough_params", 2377 srcs = ["alias_passthrough_params.cc"], 2378 hdrs = ["alias_passthrough_params.h"], 2379 deps = [ 2380 "//tensorflow/compiler/xla:shape_util", 2381 "//tensorflow/compiler/xla/service:hlo", 2382 "//tensorflow/compiler/xla/service:hlo_pass", 2383 ], 2384) 2385 2386tf_cc_test( 2387 name = "alias_passthrough_params_test", 2388 srcs = ["alias_passthrough_params_test.cc"], 2389 tags = ["no_pip"], 2390 deps = [ 2391 ":alias_passthrough_params", 2392 "//tensorflow/compiler/xla/tests:hlo_test_base", 2393 "//tensorflow/compiler/xla/tests:test_utils", 2394 "//tensorflow/compiler/xla/tests:xla_internal_test_main", 2395 "//tensorflow/core:lib", 2396 "//tensorflow/core:test", 2397 ], 2398) 2399 2400cc_library( 2401 name = "horizontal_loop_fusion", 2402 srcs = ["horizontal_loop_fusion.cc"], 2403 hdrs = ["horizontal_loop_fusion.h"], 2404 deps = [ 2405 ":gpu_fusible", 2406 "//tensorflow/compiler/xla:shape_util", 2407 "//tensorflow/compiler/xla:xla_data_proto_cc", 2408 "//tensorflow/compiler/xla/service:hlo", 2409 "//tensorflow/compiler/xla/service:hlo_creation_utils", 2410 "//tensorflow/compiler/xla/service:hlo_pass", 2411 "//tensorflow/core:lib", 2412 "//tensorflow/core:lib_internal", 2413 "@com_google_absl//absl/container:flat_hash_set", 2414 "@com_google_absl//absl/types:span", 2415 ], 2416) 2417 2418tf_cc_test( 2419 name = "horizontal_loop_fusion_test", 2420 srcs = ["horizontal_loop_fusion_test.cc"], 2421 tags = tf_cuda_tests_tags(), 2422 deps = [ 2423 ":fusion_merger", 2424 ":horizontal_loop_fusion", 2425 ":instruction_fusion", 2426 ":multi_output_fusion", 2427 "//tensorflow/compiler/jit:xla_gpu_jit", 2428 "//tensorflow/compiler/xla:literal", 2429 "//tensorflow/compiler/xla:shape_util", 2430 "//tensorflow/compiler/xla:test", 2431 "//tensorflow/compiler/xla:test_helpers", 2432 "//tensorflow/compiler/xla/service:hlo_dce", 2433 "//tensorflow/compiler/xla/service:hlo_matchers", 2434 "//tensorflow/compiler/xla/service:hlo_parser", 2435 "//tensorflow/compiler/xla/service:hlo_pass", 2436 "//tensorflow/compiler/xla/service:hlo_pass_pipeline", 2437 "//tensorflow/compiler/xla/service:tuple_simplifier", 2438 "//tensorflow/compiler/xla/tests:filecheck", 2439 "//tensorflow/compiler/xla/tests:hlo_test_base", 2440 "//tensorflow/compiler/xla/tests:xla_internal_test_main", 2441 ], 2442) 2443 2444cc_library( 2445 name = "horizontal_input_fusion", 2446 srcs = ["horizontal_input_fusion.cc"], 2447 hdrs = ["horizontal_input_fusion.h"], 2448 deps = [ 2449 ":gpu_fusible", 2450 ":ir_emission_utils", 2451 "//tensorflow/compiler/xla:shape_util", 2452 "//tensorflow/compiler/xla/service:hlo", 2453 "//tensorflow/compiler/xla/service:hlo_creation_utils", 2454 "//tensorflow/compiler/xla/service:hlo_pass", 2455 "//tensorflow/core:lib", 2456 "//tensorflow/core:lib_internal", 2457 "@com_google_absl//absl/container:flat_hash_set", 2458 "@com_google_absl//absl/strings", 2459 "@com_google_absl//absl/types:span", 2460 ], 2461) 2462 2463tf_cc_test( 2464 name = "horizontal_input_fusion_test", 2465 srcs = ["horizontal_input_fusion_test.cc"], 2466 tags = tf_cuda_tests_tags(), 2467 deps = [ 2468 ":horizontal_input_fusion", 2469 ":multi_output_fusion", 2470 "//tensorflow/compiler/jit:xla_gpu_jit", 2471 "//tensorflow/compiler/xla:shape_util", 2472 "//tensorflow/compiler/xla:test", 2473 "//tensorflow/compiler/xla:test_helpers", 2474 "//tensorflow/compiler/xla/service:hlo_matchers", 2475 "//tensorflow/compiler/xla/service:hlo_parser", 2476 "//tensorflow/compiler/xla/service:hlo_pass_pipeline", 2477 "//tensorflow/compiler/xla/service/gpu/tests:gpu_codegen_test", 2478 "//tensorflow/compiler/xla/tests:filecheck", 2479 "//tensorflow/compiler/xla/tests:xla_internal_test_main", 2480 ], 2481) 2482 2483cc_library( 2484 name = "reduction_degenerate_dim_remover", 2485 srcs = ["reduction_degenerate_dim_remover.cc"], 2486 hdrs = ["reduction_degenerate_dim_remover.h"], 2487 deps = [ 2488 ":ir_emission_utils", 2489 "//tensorflow/compiler/xla:shape_util", 2490 "//tensorflow/compiler/xla:status_macros", 2491 "//tensorflow/compiler/xla:statusor", 2492 "//tensorflow/compiler/xla/service:hlo", 2493 "//tensorflow/compiler/xla/service:hlo_pass", 2494 "//tensorflow/compiler/xla/service:pattern_matcher", 2495 "//tensorflow/core:lib", 2496 "//tensorflow/stream_executor/lib", 2497 "@com_google_absl//absl/algorithm:container", 2498 "@com_google_absl//absl/strings", 2499 ], 2500) 2501 2502cc_library( 2503 name = "reduction_dimension_grouper", 2504 srcs = ["reduction_dimension_grouper.cc"], 2505 hdrs = ["reduction_dimension_grouper.h"], 2506 deps = [ 2507 "//tensorflow/compiler/xla:shape_util", 2508 "//tensorflow/compiler/xla:statusor", 2509 "//tensorflow/compiler/xla/service:hlo", 2510 "//tensorflow/compiler/xla/service:hlo_pass", 2511 "@com_google_absl//absl/algorithm:container", 2512 ], 2513) 2514 2515cc_library( 2516 name = "reduction_splitter", 2517 srcs = ["reduction_splitter.cc"], 2518 hdrs = ["reduction_splitter.h"], 2519 deps = [ 2520 ":ir_emission_utils", 2521 "//tensorflow/compiler/xla:shape_util", 2522 "//tensorflow/compiler/xla/service:hlo", 2523 "//tensorflow/compiler/xla/service:hlo_pass", 2524 ], 2525) 2526 2527tf_cc_test( 2528 name = "reduction_splitter_test", 2529 srcs = ["reduction_splitter_test.cc"], 2530 deps = [ 2531 ":reduction_splitter", 2532 "//tensorflow/compiler/xla:shape_util", 2533 "//tensorflow/compiler/xla:test", 2534 "//tensorflow/compiler/xla:test_helpers", 2535 "//tensorflow/compiler/xla/service:hlo_matchers", 2536 "//tensorflow/compiler/xla/service:hlo_parser", 2537 "//tensorflow/compiler/xla/tests:hlo_test_base", 2538 "//tensorflow/compiler/xla/tests:xla_internal_test_main", 2539 ], 2540) 2541 2542cc_library( 2543 name = "reduction_layout_normalizer", 2544 srcs = ["reduction_layout_normalizer.cc"], 2545 hdrs = ["reduction_layout_normalizer.h"], 2546 deps = [ 2547 ":ir_emission_utils", 2548 "//tensorflow/compiler/xla:shape_util", 2549 "//tensorflow/compiler/xla:status_macros", 2550 "//tensorflow/compiler/xla:statusor", 2551 "//tensorflow/compiler/xla/service:hlo", 2552 "//tensorflow/compiler/xla/service:hlo_pass", 2553 "//tensorflow/compiler/xla/service:pattern_matcher", 2554 "//tensorflow/core:lib", 2555 "//tensorflow/stream_executor/lib", 2556 "@com_google_absl//absl/algorithm:container", 2557 "@com_google_absl//absl/strings", 2558 ], 2559) 2560 2561cc_library( 2562 name = "tree_reduction_rewriter", 2563 srcs = ["tree_reduction_rewriter.cc"], 2564 hdrs = ["tree_reduction_rewriter.h"], 2565 deps = [ 2566 ":ir_emission_utils", 2567 "//tensorflow/compiler/xla:shape_util", 2568 "//tensorflow/compiler/xla:statusor", 2569 "//tensorflow/compiler/xla:util", 2570 "//tensorflow/compiler/xla:xla_data_proto_cc", 2571 "//tensorflow/compiler/xla/service:collective_ops_utils", 2572 "//tensorflow/compiler/xla/service:hlo", 2573 "//tensorflow/compiler/xla/service:hlo_pass", 2574 "@com_google_absl//absl/algorithm:container", 2575 "@com_google_absl//absl/strings", 2576 ], 2577) 2578 2579cc_library( 2580 name = "gemm_broadcast_folding_rewriter", 2581 srcs = ["gemm_broadcast_folding_rewriter.cc"], 2582 hdrs = ["gemm_broadcast_folding_rewriter.h"], 2583 deps = [ 2584 ":backend_configs_cc", 2585 ":cublas_cudnn", 2586 "//tensorflow/compiler/xla:status_macros", 2587 "//tensorflow/compiler/xla:statusor", 2588 "//tensorflow/compiler/xla/service:hlo", 2589 "//tensorflow/compiler/xla/service:hlo_pass", 2590 "//tensorflow/compiler/xla/service:pattern_matcher", 2591 "//tensorflow/core:lib_proto_parsing", 2592 "//tensorflow/stream_executor/lib", 2593 "@com_google_absl//absl/algorithm:container", 2594 ], 2595) 2596 2597# These tests are intended to be run with --test_env=XLA_FLAGS=--xla_gpu_jitrt_executable 2598# See tap/tensorflow.xla_gpu_jitrt. 2599test_suite( 2600 name = "jitrt_executable_tests", 2601 tests = [ 2602 # copybara:uncomment "//platforms/xla/tests/internal:xfeed_test_gpu", 2603 "//tensorflow/compiler/tests:fft_test_gpu", 2604 "//tensorflow/compiler/xla/python:xla_client_test_gpu", 2605 "//tensorflow/compiler/xla/service/gpu:cudnn_fused_conv_rewriter_test", 2606 "//tensorflow/compiler/xla/service/gpu:custom_call_test", 2607 # copybara:uncomment "//tensorflow/compiler/xla/service/gpu:gpu_aot_compilation_test", 2608 "//tensorflow/compiler/xla/service/gpu/tests:add_preds.hlo.test", 2609 "//tensorflow/compiler/xla/service/gpu/tests:all_reduce.hlo.test", 2610 "//tensorflow/compiler/xla/service/gpu/tests:concat.hlo.test", 2611 "//tensorflow/compiler/xla/service/gpu/tests:constant.hlo.test", 2612 "//tensorflow/compiler/xla/service/gpu/tests:copy.hlo.test", 2613 "//tensorflow/compiler/xla/service/gpu/tests:copy_nested.hlo.test", 2614 "//tensorflow/compiler/xla/service/gpu/tests:dynamic_update_slice_inplace.hlo.test", 2615 "//tensorflow/compiler/xla/service/gpu/tests:element_wise_row_vectorization.hlo.test", 2616 "//tensorflow/compiler/xla/service/gpu/tests:element_wise_row_vectorization_test", 2617 "//tensorflow/compiler/xla/service/gpu/tests:fused_scatter.hlo.test", 2618 "//tensorflow/compiler/xla/service/gpu/tests:fused_slice.hlo.test", 2619 "//tensorflow/compiler/xla/service/gpu/tests:fused_slice_different_operands.hlo.test", 2620 "//tensorflow/compiler/xla/service/gpu/tests:fusion.hlo.test", 2621 "//tensorflow/compiler/xla/service/gpu/tests:fusion_logical_index_test", 2622 "//tensorflow/compiler/xla/service/gpu/tests:gemm_broadcast_folding_rewrite_test", 2623 "//tensorflow/compiler/xla/service/gpu/tests:gemm_rewrite_test", 2624 "//tensorflow/compiler/xla/service/gpu/tests:gpu_alignment_test", 2625 "//tensorflow/compiler/xla/service/gpu/tests:gpu_atomic_test", 2626 "//tensorflow/compiler/xla/service/gpu/tests:gpu_compilation_parallelism_test", 2627 "//tensorflow/compiler/xla/service/gpu/tests:gpu_convolution_regression_test", 2628 "//tensorflow/compiler/xla/service/gpu/tests:gpu_copy_alone_test", 2629 "//tensorflow/compiler/xla/service/gpu/tests:gpu_copy_test", 2630 "//tensorflow/compiler/xla/service/gpu/tests:gpu_dyn_shape_test", 2631 "//tensorflow/compiler/xla/service/gpu/tests:gpu_ftz_test", 2632 "//tensorflow/compiler/xla/service/gpu/tests:gpu_fusion_test", 2633 "//tensorflow/compiler/xla/service/gpu/tests:gpu_index_test", 2634 "//tensorflow/compiler/xla/service/gpu/tests:gpu_infeed_test", 2635 "//tensorflow/compiler/xla/service/gpu/tests:gpu_input_fusible_slice_test", 2636 "//tensorflow/compiler/xla/service/gpu/tests:gpu_kernel_tiling_test", 2637 "//tensorflow/compiler/xla/service/gpu/tests:gpu_ldg_test", 2638 "//tensorflow/compiler/xla/service/gpu/tests:gpu_noalias_test", 2639 "//tensorflow/compiler/xla/service/gpu/tests:gpu_reduce_scatter_creator_test", 2640 "//tensorflow/compiler/xla/service/gpu/tests:gpu_spmd_e2e_compile_test", 2641 "//tensorflow/compiler/xla/service/gpu/tests:gpu_too_many_blocks_test", 2642 "//tensorflow/compiler/xla/service/gpu/tests:gpu_unrolling_test", 2643 "//tensorflow/compiler/xla/service/gpu/tests:in_place_op_test", 2644 "//tensorflow/compiler/xla/service/gpu/tests:kernel_launch_test", 2645 "//tensorflow/compiler/xla/service/gpu/tests:launch_dimensions.hlo.test", 2646 "//tensorflow/compiler/xla/service/gpu/tests:mlir_fft_test", 2647 "//tensorflow/compiler/xla/service/gpu/tests:mlir_gemm_test", 2648 "//tensorflow/compiler/xla/service/gpu/tests:mlir_gpu_compile_test", 2649 "//tensorflow/compiler/xla/service/gpu/tests:mlir_sorting_test", 2650 "//tensorflow/compiler/xla/service/gpu/tests:pad_to_static.hlo.test", 2651 "//tensorflow/compiler/xla/service/gpu/tests:parallel_reduction_test", 2652 "//tensorflow/compiler/xla/service/gpu/tests:pred_arithmetic_test", 2653 "//tensorflow/compiler/xla/service/gpu/tests:reduce_unnested.hlo.test", 2654 "//tensorflow/compiler/xla/service/gpu/tests:reduction_degenerate_dim_remover_test", 2655 "//tensorflow/compiler/xla/service/gpu/tests:reduction_dimension_grouper_test", 2656 "//tensorflow/compiler/xla/service/gpu/tests:reduction_layout_normalizer_test", 2657 "//tensorflow/compiler/xla/service/gpu/tests:reduction_vectorization_sm_all.hlo.test", 2658 "//tensorflow/compiler/xla/service/gpu/tests:reduction_vectorization_test", 2659 "//tensorflow/compiler/xla/service/gpu/tests:rng_get_and_update_state.hlo.test", 2660 "//tensorflow/compiler/xla/service/gpu/tests:scatter.hlo.test", 2661 "//tensorflow/compiler/xla/service/gpu/tests:select_and_scatter.hlo.test", 2662 "//tensorflow/compiler/xla/service/gpu/tests:select_and_scatter_test", 2663 "//tensorflow/compiler/xla/service/gpu/tests:slice_to_dynamic.hlo.test", 2664 "//tensorflow/compiler/xla/service/gpu/tests:sorting.hlo.test", 2665 "//tensorflow/compiler/xla/service/gpu/tests:sorting_test", 2666 "//tensorflow/compiler/xla/service/gpu/tests:swap_conv_operands_test", 2667 "//tensorflow/compiler/xla/service/gpu/tests:tree_reduction_rewriter_test", 2668 "//tensorflow/compiler/xla/tests:all_reduce_test_gpu", 2669 "//tensorflow/compiler/xla/tests:array_elementwise_ops_test_gpu", 2670 "//tensorflow/compiler/xla/tests:axpy_simple_test_gpu", 2671 "//tensorflow/compiler/xla/tests:bad_rng_shape_validation_test_gpu", 2672 "//tensorflow/compiler/xla/tests:batch_normalization_test_gpu", 2673 "//tensorflow/compiler/xla/tests:bfloat16_test_gpu", 2674 "//tensorflow/compiler/xla/tests:binop_scaling_test_gpu", 2675 "//tensorflow/compiler/xla/tests:bitcast_convert_test_gpu", 2676 "//tensorflow/compiler/xla/tests:broadcast_simple_test_gpu", 2677 "//tensorflow/compiler/xla/tests:broadcast_test_gpu", 2678 "//tensorflow/compiler/xla/tests:buffer_donation_test_gpu", 2679 "//tensorflow/compiler/xla/tests:call_test_gpu", 2680 "//tensorflow/compiler/xla/tests:check_execution_arity_test_gpu", 2681 "//tensorflow/compiler/xla/tests:cholesky_test_gpu", 2682 "//tensorflow/compiler/xla/tests:client_test_gpu", 2683 "//tensorflow/compiler/xla/tests:compilation_cache_test_gpu", 2684 "//tensorflow/compiler/xla/tests:compute_constant_test_gpu", 2685 "//tensorflow/compiler/xla/tests:concat_test_gpu", 2686 "//tensorflow/compiler/xla/tests:conditional_test_gpu", 2687 "//tensorflow/compiler/xla/tests:constant_reduction_function_test_gpu", 2688 "//tensorflow/compiler/xla/tests:constants_test_gpu", 2689 "//tensorflow/compiler/xla/tests:conv_depthwise_backprop_filter_test_gpu", 2690 "//tensorflow/compiler/xla/tests:conv_depthwise_test_gpu", 2691 "//tensorflow/compiler/xla/tests:convert_test_gpu", 2692 "//tensorflow/compiler/xla/tests:convolution_dimension_numbers_test_gpu", 2693 "//tensorflow/compiler/xla/tests:convolution_test_1d_autotune_disabled_gpu", 2694 "//tensorflow/compiler/xla/tests:convolution_test_1d_gpu_alternative_layout_gpu", 2695 "//tensorflow/compiler/xla/tests:convolution_test_1d_no_vmodule_gpu", 2696 "//tensorflow/compiler/xla/tests:convolution_test_autotune_disabled_gpu", 2697 "//tensorflow/compiler/xla/tests:convolution_test_cudnn_frontend_disabled_gpu", 2698 "//tensorflow/compiler/xla/tests:convolution_test_gpu", 2699 "//tensorflow/compiler/xla/tests:convolution_test_gpu_alternative_layout_gpu", 2700 "//tensorflow/compiler/xla/tests:convolution_variants_test_gpu", 2701 "//tensorflow/compiler/xla/tests:copy_test_gpu", 2702 "//tensorflow/compiler/xla/tests:cpu_gpu_fusion_test_gpu", 2703 "//tensorflow/compiler/xla/tests:deallocation_test_gpu", 2704 "//tensorflow/compiler/xla/tests:deconstruct_tuple_test_gpu", 2705 "//tensorflow/compiler/xla/tests:deep_graph_test_gpu", 2706 "//tensorflow/compiler/xla/tests:dot_operation_single_threaded_runtime_test_gpu", 2707 "//tensorflow/compiler/xla/tests:dot_operation_test_autotune_disabled_gpu", 2708 "//tensorflow/compiler/xla/tests:dot_operation_test_gpu", 2709 "//tensorflow/compiler/xla/tests:dynamic_ops_test_gpu", 2710 "//tensorflow/compiler/xla/tests:execution_profile_test_gpu", 2711 "//tensorflow/compiler/xla/tests:execution_profile_test_with_xla_hlo_profile_gpu", 2712 "//tensorflow/compiler/xla/tests:exhaustive_binary_16_bit_test_gpu", 2713 "//tensorflow/compiler/xla/tests:exhaustive_binary_test_f32_f64_gpu", 2714 "//tensorflow/compiler/xla/tests:exhaustive_unary_test_complex_gpu", 2715 "//tensorflow/compiler/xla/tests:exhaustive_unary_test_f32_or_smaller_gpu", 2716 "//tensorflow/compiler/xla/tests:exhaustive_unary_test_f64_gpu", 2717 "//tensorflow/compiler/xla/tests:floor_ceil_test_gpu", 2718 "//tensorflow/compiler/xla/tests:fmax_fmin_test_gpu", 2719 "//tensorflow/compiler/xla/tests:gather_operation_test_gpu", 2720 "//tensorflow/compiler/xla/tests:get_dimension_size_test_gpu", 2721 "//tensorflow/compiler/xla/tests:grouped_convolution_test_gpu", 2722 "//tensorflow/compiler/xla/tests:half_test_gpu", 2723 "//tensorflow/compiler/xla/tests:iota_test_gpu", 2724 "//tensorflow/compiler/xla/tests:local_client_allocation_test_gpu", 2725 "//tensorflow/compiler/xla/tests:local_client_execute_test_gpu", 2726 "//tensorflow/compiler/xla/tests:log_test_gpu", 2727 "//tensorflow/compiler/xla/tests:map_test_gpu", 2728 "//tensorflow/compiler/xla/tests:matmul_test_gpu", 2729 "//tensorflow/compiler/xla/tests:matrix_ops_simple_test_gpu", 2730 "//tensorflow/compiler/xla/tests:multidimensional_slice_test_gpu", 2731 "//tensorflow/compiler/xla/tests:multioutput_fusion_test_gpu", 2732 "//tensorflow/compiler/xla/tests:outfeed_in_nested_computation_test_gpu", 2733 "//tensorflow/compiler/xla/tests:pad_test_gpu", 2734 "//tensorflow/compiler/xla/tests:params_test_gpu", 2735 "//tensorflow/compiler/xla/tests:pred_test_gpu", 2736 "//tensorflow/compiler/xla/tests:prng_test_gpu", 2737 "//tensorflow/compiler/xla/tests:ptxas_bug_120501638_gpu", 2738 "//tensorflow/compiler/xla/tests:query_inferred_shape_test_gpu", 2739 "//tensorflow/compiler/xla/tests:reduce_hlo_test_gpu", 2740 "//tensorflow/compiler/xla/tests:reduce_precision_test_gpu", 2741 "//tensorflow/compiler/xla/tests:reduce_test_gpu", 2742 "//tensorflow/compiler/xla/tests:reduce_window_test_gpu", 2743 "//tensorflow/compiler/xla/tests:replay_test_gpu", 2744 "//tensorflow/compiler/xla/tests:reshape_motion_test_gpu", 2745 "//tensorflow/compiler/xla/tests:reshape_test_gpu", 2746 "//tensorflow/compiler/xla/tests:reverse_test_gpu", 2747 "//tensorflow/compiler/xla/tests:round_trip_packed_literal_test_gpu", 2748 "//tensorflow/compiler/xla/tests:round_trip_transfer_test_gpu", 2749 "//tensorflow/compiler/xla/tests:sample_text_test_gpu", 2750 "//tensorflow/compiler/xla/tests:scalar_computations_test_gpu", 2751 "//tensorflow/compiler/xla/tests:scatter_test_gpu", 2752 "//tensorflow/compiler/xla/tests:select_and_scatter_test_gpu", 2753 "//tensorflow/compiler/xla/tests:select_test_gpu", 2754 "//tensorflow/compiler/xla/tests:slice_test_gpu", 2755 "//tensorflow/compiler/xla/tests:token_hlo_test_gpu", 2756 "//tensorflow/compiler/xla/tests:transfer_manager_test_gpu", 2757 "//tensorflow/compiler/xla/tests:transpose_test_gpu", 2758 "//tensorflow/compiler/xla/tests:triangular_solve_test_gpu", 2759 "//tensorflow/compiler/xla/tests:tuple_test_gpu", 2760 "//tensorflow/compiler/xla/tests:unary_op_test_gpu", 2761 "//tensorflow/compiler/xla/tests:value_inference_test_gpu", 2762 "//tensorflow/compiler/xla/tests:vector_ops_reduce_test_gpu", 2763 "//tensorflow/compiler/xla/tests:vector_ops_simple_test_gpu", 2764 "//tensorflow/compiler/xla/tests:while_test_gpu", 2765 "//tensorflow/compiler/xla/tests:xla_hlo_profile_test_gpu", 2766 ] + if_google([ 2767 # Currently fails in OSS. 2768 "//tensorflow/python/kernel_tests/signal:fft_ops_test_xla_gpu", 2769 ]), 2770) 2771 2772cc_library( 2773 name = "metrics", 2774 srcs = ["metrics.cc"], 2775 hdrs = ["metrics.h"], 2776 deps = [ 2777 "//tensorflow/core/lib/monitoring:sampler", 2778 ], 2779) 2780 2781cc_library( 2782 name = "precompiled_kernels", 2783 srcs = if_gpu_is_configured(["precompiled_kernels.cc"]), 2784 hdrs = if_gpu_is_configured(["precompiled_kernels.h"]), 2785 deps = if_gpu_is_configured([ 2786 "@com_google_absl//absl/base", 2787 "@com_google_absl//absl/base:core_headers", 2788 "@com_google_absl//absl/container:flat_hash_map", 2789 "//tensorflow/compiler/xla:status", 2790 "//tensorflow/compiler/xla:statusor", 2791 "//tensorflow/compiler/xla:types", 2792 "//tensorflow/compiler/xla:util", 2793 "//tensorflow/stream_executor:device_memory", 2794 "//tensorflow/stream_executor:stream_header", 2795 "//tensorflow/stream_executor/gpu:asm_compiler", 2796 "//tensorflow/stream_executor/gpu:gpu_asm_opts", 2797 ]) + if_rocm_is_configured([ 2798 "//tensorflow/stream_executor/gpu:gpu_stream_header", 2799 ]), 2800) 2801 2802cc_library( 2803 name = "triangular_solve_rewriter", 2804 srcs = ["triangular_solve_rewriter.cc"], 2805 hdrs = ["triangular_solve_rewriter.h"], 2806 deps = [ 2807 ":cublas_cudnn", 2808 "//tensorflow/compiler/xla:statusor", 2809 "//tensorflow/compiler/xla/service:hlo", 2810 "//tensorflow/compiler/xla/service:hlo_creation_utils", 2811 "//tensorflow/compiler/xla/service:hlo_pass", 2812 "@com_google_absl//absl/strings", 2813 ], 2814) 2815 2816tf_cuda_library( 2817 name = "runtime_intrinsics", 2818 srcs = ["runtime_intrinsics.cc"], 2819 hdrs = ["runtime_intrinsics.h"], 2820 deps = [ 2821 "//tensorflow/compiler/xla:shape_util", 2822 "//tensorflow/compiler/xla:status", 2823 "//tensorflow/compiler/xla:statusor", 2824 "//tensorflow/compiler/xla:util", 2825 "//tensorflow/compiler/xla/service:custom_call_status", 2826 "//tensorflow/compiler/xla/service:custom_call_target_registry", 2827 "//tensorflow/stream_executor", 2828 "@com_google_absl//absl/cleanup", 2829 ], 2830 alwayslink = 1, 2831) 2832 2833cc_library( 2834 name = "hlo_fusion_stats", 2835 srcs = ["hlo_fusion_stats.cc"], 2836 hdrs = ["hlo_fusion_stats.h"], 2837 deps = [ 2838 "//tensorflow/compiler/xla:status", 2839 "//tensorflow/compiler/xla:statusor", 2840 "//tensorflow/compiler/xla/service:hlo", 2841 "//tensorflow/compiler/xla/service:hlo_pass", 2842 "//tensorflow/core/platform:errors", 2843 "//tensorflow/core/platform:statusor", 2844 "@com_google_absl//absl/strings", 2845 ], 2846) 2847 2848tf_cc_test( 2849 name = "hlo_fusion_stats_test", 2850 srcs = ["hlo_fusion_stats_test.cc"], 2851 tags = ["no_pip"], 2852 deps = [ 2853 ":hlo_fusion_stats", 2854 "//tensorflow/compiler/xla:status_macros", 2855 "//tensorflow/compiler/xla:test_helpers", 2856 "//tensorflow/compiler/xla/service:hlo", 2857 "//tensorflow/compiler/xla/service:hlo_parser", 2858 "//tensorflow/compiler/xla/tests:hlo_test_base", 2859 "//tensorflow/compiler/xla/tests:xla_internal_test_main", 2860 "//tensorflow/core:test", 2861 ], 2862) 2863