/external/tensorflow/tensorflow/compiler/xla/service/gpu/tests/ |
D | reduction_vectorization_test.cc | 56 int cc_major = 0, cc_minor = 0; in TEST_F() local 57 executor->GetDeviceDescription().cuda_compute_capability(&cc_major, in TEST_F() 60 if (cc_major >= 6) { in TEST_F() 104 int cc_major = 0, cc_minor = 0; in TEST_F() local 105 executor->GetDeviceDescription().cuda_compute_capability(&cc_major, in TEST_F() 108 if (cc_major >= 6) { in TEST_F() 152 int cc_major = 0, cc_minor = 0; in TEST_F() local 153 executor->GetDeviceDescription().cuda_compute_capability(&cc_major, in TEST_F() 156 if (cc_major >= 7) { in TEST_F() 236 int cc_major = 0, cc_minor = 0; in TEST_F() local [all …]
|
D | gpu_atomic_test.cc | 114 int cc_major = 0, cc_minor = 0; in TEST_F() local 115 device_description.cuda_compute_capability(&cc_major, &cc_minor); in TEST_F() 118 if (cc_major < 6) { in TEST_F()
|
D | mlir_gpu_test_base.cc | 60 &cuda_compute_capability.cc_major, &cuda_compute_capability.cc_minor); in CompileMlirModule() 61 if (cuda_compute_capability.cc_major == -1) { in CompileMlirModule()
|
D | hlo_to_llvm_ir.cc | 54 cuda_compute_capability.cc_major = 7; in CompileAndPrintLlvmIr()
|
/external/tensorflow/tensorflow/compiler/xla/service/gpu/ |
D | nvptx_compiler.h | 78 se::StreamExecutor* stream_exec, const string& ptx, int cc_major, 94 CompilationCacheKey(std::string ptx, int cc_major, int cc_minor, in CompilationCacheKey() 97 cc_major(cc_major), in CompilationCacheKey() 101 int cc_major; member 110 key.cc_major), in operator() 118 return a.cc_major == b.cc_major && a.cc_minor == b.cc_minor && in operator()
|
D | nvptx_compiler.cc | 288 int cc_major, cc_minor; in GetGpuVersion() local 289 if (!stream_exec->GetDeviceDescription().cuda_compute_capability(&cc_major, in GetGpuVersion() 293 cc_major = 2; in GetGpuVersion() 297 return std::make_pair(cc_major, cc_minor); in GetGpuVersion() 342 se::StreamExecutor* stream_exec, const string& ptx, int cc_major, in CompileGpuAsmOrGetCachedResult() argument 358 std::forward_as_tuple(ptx, cc_major, cc_minor, relocatable), in CompileGpuAsmOrGetCachedResult()
|
D | gpu_device_info.h | 27 int cc_major; member
|
D | ir_emission_utils.cc | 147 int cc_major = 0; in GetReductionTiling() local 149 cc_major = cuda_compute_capability->cc_major; in GetReductionTiling() 152 if (cc_major >= 6 && smallest_input_dtype_bits == 16) { in GetReductionTiling() 154 } else if (cc_major >= 6 && smallest_input_dtype_bits == 8) { in GetReductionTiling()
|
D | gpu_conv_algorithm_picker.cc | 181 int cc_major, cc_minor; in GetComputeCapability() local 182 stream_executor->GetDeviceDescription().cuda_compute_capability(&cc_major, in GetComputeCapability() 184 cc.set_major(cc_major); in GetComputeCapability()
|
/external/tensorflow/tensorflow/core/grappler/ |
D | devices.cc | 52 int cc_major = 0; in GetNumAvailableGPUs() local 54 desc->cuda_compute_capability(&cc_major, &cc_minor); in GetNumAvailableGPUs() 55 std::pair<int, int> cuda_compute_capability(cc_major, cc_minor); in GetNumAvailableGPUs()
|
/external/tensorflow/tensorflow/stream_executor/gpu/ |
D | asm_compiler.cc | 146 int cc_major; in CompileGpuAsm() local 149 gpu::GpuDriver::GetComputeCapability(&cc_major, &cc_minor, handle)); in CompileGpuAsm() 150 return CompileGpuAsm(cc_major, cc_minor, ptx_contents, options); in CompileGpuAsm() 180 static void LogPtxasTooOld(const std::string& ptxas_path, int cc_major, in LogPtxasTooOld() argument 190 if (already_logged->insert({ptxas_path, cc_major, cc_minor}).second) { in LogPtxasTooOld() 193 << cc_major << "." << cc_minor; in LogPtxasTooOld() 198 port::StatusOr<std::vector<uint8>> CompileGpuAsm(int cc_major, int cc_minor, in CompileGpuAsm() argument 233 absl::StrCat("-arch=sm_", cc_major, cc_minor)}; in CompileGpuAsm() 263 LogPtxasTooOld(ptxas_path, cc_major, cc_minor); in CompileGpuAsm()
|
D | asm_compiler.h | 47 port::StatusOr<std::vector<uint8>> CompileGpuAsm(int cc_major, int cc_minor,
|
D | gpu_driver.h | 464 static port::Status GetComputeCapability(int* cc_major, int* cc_minor,
|
/external/tensorflow/tensorflow/core/common_runtime/gpu/ |
D | gpu_device_test.cc | 43 Status GetComputeCapability(PlatformGpuId gpu_id, int* cc_major, in GetComputeCapability() argument 48 if (!se->GetDeviceDescription().cuda_compute_capability(cc_major, cc_minor)) { in GetComputeCapability() 49 *cc_major = 0; in GetComputeCapability() 352 int cc_major, cc_minor; in TEST_F() local 353 TF_ASSERT_OK(GetComputeCapability(PlatformGpuId(0), &cc_major, &cc_minor)); in TEST_F() 355 if (cc_major >= 6) { in TEST_F() 376 int cc_major, cc_minor; in TEST_F() local 377 TF_ASSERT_OK(GetComputeCapability(kPlatformGpuId, &cc_major, &cc_minor)); in TEST_F() 379 if (cc_major < 6) { in TEST_F()
|
D | gpu_device.cc | 910 int64 MinSystemMemory(int64 available_memory, int cc_major) { in MinSystemMemory() argument 922 if (cc_major <= 6) { in MinSystemMemory() 924 } else if (cc_major <= 7) { in MinSystemMemory() 971 int cc_major = 0, cc_minor = 0; in SingleVirtualDeviceMemoryLimit() local 972 if (!se->GetDeviceDescription().cuda_compute_capability(&cc_major, in SingleVirtualDeviceMemoryLimit() 978 if (cc_major < 6) { in SingleVirtualDeviceMemoryLimit() 987 const int64 min_system_memory = MinSystemMemory(available_memory, cc_major); in SingleVirtualDeviceMemoryLimit() 1135 int cc_major, cc_minor; in GetDeviceDetails() local 1136 if (desc->cuda_compute_capability(&cc_major, &cc_minor)) { in GetDeviceDetails() 1137 (*details)["compute_capability"] = strings::StrCat(cc_major, ".", cc_minor); in GetDeviceDetails() [all …]
|
/external/tensorflow/tensorflow/compiler/mlir/tools/kernel_gen/transforms/ |
D | gpu_kernel_to_blob_pass.cc | 184 uint32_t cc_major = arch / 10; in GetGpuBinaryBlob() local 191 std::make_pair(cc_major, cc_minor), in GetGpuBinaryBlob() 203 cc_major, cc_minor, ptx.c_str(), gpu_asm_opts)); in GetGpuBinaryBlob()
|
/external/tensorflow/tensorflow/stream_executor/ |
D | stream_executor_pimpl.cc | 271 int cc_major, cc_minor; in GetConvolveAlgorithms() local 272 GetDeviceDescription().cuda_compute_capability(&cc_major, &cc_minor); in GetConvolveAlgorithms() 273 return dnn_support->GetConvolveAlgorithms(with_winograd_nonfused, cc_major, in GetConvolveAlgorithms() 312 int cc_major, cc_minor; in GetConvolveBackwardDataAlgorithms() local 313 GetDeviceDescription().cuda_compute_capability(&cc_major, &cc_minor); in GetConvolveBackwardDataAlgorithms() 315 with_winograd_nonfused, cc_major, cc_minor, out_algorithms); in GetConvolveBackwardDataAlgorithms() 325 int cc_major, cc_minor; in GetConvolveBackwardFilterAlgorithms() local 326 GetDeviceDescription().cuda_compute_capability(&cc_major, &cc_minor); in GetConvolveBackwardFilterAlgorithms() 328 with_winograd_nonfused, cc_major, cc_minor, out_algorithms); in GetConvolveBackwardFilterAlgorithms()
|
D | dnn.cc | 45 bool with_winograd_nonfused, int cc_major, int cc_minor, in GetConvolveAlgorithms() argument 69 bool with_winograd_nonfused, int cc_major, int cc_minor, in GetConvolveBackwardDataAlgorithms() argument 75 bool with_winograd_nonfused, int cc_major, int cc_minor, in GetConvolveBackwardFilterAlgorithms() argument
|
/external/tensorflow/tensorflow/core/kernels/ |
D | gpu_utils.cc | 116 int cc_major, cc_minor; in GetComputeCapability() local 117 stream_executor->GetDeviceDescription().cuda_compute_capability(&cc_major, in GetComputeCapability() 119 cc.set_major(cc_major); in GetComputeCapability()
|
/external/tensorflow/tensorflow/compiler/mlir/tensorflow/utils/ |
D | device_util_test.cc | 93 ASSERT_EQ(device_meta_1.cc_major().getInt(), 7); in TEST() 201 ASSERT_EQ(meta_0->cc_major().getInt(), 1); in TEST()
|
/external/tensorflow/tensorflow/stream_executor/cuda/ |
D | cuda_dnn.cc | 672 int cc_major, cc_minor; in GetCcMajorMinor() local 673 stream->parent()->GetDeviceDescription().cuda_compute_capability(&cc_major, in GetCcMajorMinor() 675 return std::make_tuple(cc_major, cc_minor); in GetCcMajorMinor() 755 static bool TensorOpMathAvailable(int cc_major) { return cc_major >= 7; } in TensorOpMathAvailable() argument 758 int cc_major, cc_minor; in IsTensorMathEnabled() local 759 std::tie(cc_major, cc_minor) = GetCcMajorMinor(stream); in IsTensorMathEnabled() 760 if (!TensorOpMathAvailable(cc_major)) { in IsTensorMathEnabled() 3295 bool with_winograd_nonfused, int cc_major, int cc_minor, in GetConvolveAlgorithms() argument 3302 bool tensor_op_math_available = TensorOpMathAvailable(cc_major); in GetConvolveAlgorithms() 3365 bool with_winograd_nonfused, int cc_major, int cc_minor, in GetConvolveBackwardDataAlgorithms() argument [all …]
|
D | cuda_gpu_executor.cc | 969 int cc_major; in CreateDeviceDescription() local 971 status = GpuDriver::GetComputeCapability(&cc_major, &cc_minor, device); in CreateDeviceDescription() 1060 absl::StrCat("Compute Capability ", cc_major, ".", cc_minor)); in CreateDeviceDescription() 1067 builder.set_cuda_compute_capability(cc_major, cc_minor); in CreateDeviceDescription()
|
D | cuda_blas.cc | 1832 int cc_major, cc_minor; in DoBlasInternalImpl() local 1834 &cc_major, &cc_minor) && in DoBlasInternalImpl() 1835 cc_major >= 8) { in DoBlasInternalImpl() 2075 int cc_major, cc_minor; in DoBlasInternalImpl() local 2077 &cc_major, &cc_minor) && in DoBlasInternalImpl() 2078 cc_major < 5) { in DoBlasInternalImpl() 2079 VLOG(2) << "DoBlasGemmWithAlgorithm returning false because sm" << cc_major in DoBlasInternalImpl() 2087 if (cc_major < 7) { in DoBlasInternalImpl() 2091 << cc_major << "X devices."; in DoBlasInternalImpl() 2101 if (cc_major < 8) { in DoBlasInternalImpl() [all …]
|
D | cuda_dnn.h | 204 bool with_winograd_nonfused, int cc_major, int cc_minor, 211 bool with_winograd_nonfused, int cc_major, int cc_minor, 215 bool with_winograd_nonfused, int cc_major, int cc_minor,
|
/external/tensorflow/tensorflow/compiler/mlir/tensorflow/tests/ |
D | layout_optimization_layout_assignment_gpu_cc_60.mlir | 4 tf.devices = {"/device:GPU:0" = {cc_major = 6 : i32, cc_minor = 0 : i32}}
|