/external/tensorflow/tensorflow/core/common_runtime/gpu/ |
D | gpu_debug_allocator_test.cc | 49 auto stream_exec = ExecutorForPlatformGpuId(platform_gpu_id); in TEST() local 51 stream_exec, platform_gpu_id, false /*use_unified_memory*/, {}, {}); in TEST() 61 ASSERT_TRUE(stream_exec->SynchronousMemcpy(&gpu_array_ptr, &cpu_array[0], in TEST() 76 auto stream_exec = ExecutorForPlatformGpuId(platform_gpu_id); in TEST() 78 new DeviceMemAllocator(stream_exec, platform_gpu_id, in TEST() 90 ASSERT_TRUE(stream_exec->SynchronousMemcpy( in TEST() 98 stream_exec->SynchronousMemcpy(&gpu_hdr_ptr, &pi, sizeof(float))); in TEST() 112 auto stream_exec = ExecutorForPlatformGpuId(platform_gpu_id); in TEST() 114 new DeviceMemAllocator(stream_exec, platform_gpu_id, in TEST() 126 ASSERT_TRUE(stream_exec->SynchronousMemcpy( in TEST() [all …]
|
/external/tensorflow/tensorflow/compiler/xla/service/gpu/ |
D | gpu_compiler.h | 57 std::unique_ptr<HloModule> module, se::StreamExecutor* stream_exec, 67 se::StreamExecutor* stream_exec, 71 HloModule* hlo_module, se::StreamExecutor* stream_exec, 75 HloModule* hlo_module, se::StreamExecutor* stream_exec, 84 virtual GpuVersion GetGpuVersion(se::StreamExecutor* stream_exec) = 0; 91 se::StreamExecutor* stream_exec, bool relocatable, 97 std::unique_ptr<HloModule> module, se::StreamExecutor* stream_exec, 107 se::StreamExecutor* stream_exec, const CompileOptions& options, 130 se::StreamExecutor* stream_exec, in LinkModules() argument 149 GpuDeviceInfo GetGpuDeviceInfo(se::StreamExecutor* stream_exec); [all …]
|
D | nvptx_compiler.cc | 108 HloModule* hlo_module, se::StreamExecutor* stream_exec, in OptimizeHloConvolutionCanonicalization() argument 120 pipeline.AddPass<CudnnPadForConvolutions>(IsVoltaOrLater(*stream_exec)); in OptimizeHloConvolutionCanonicalization() 161 HloModule* hlo_module, se::StreamExecutor* stream_exec, in OptimizeHloPostLayoutAssignment() argument 167 if (IsVoltaOrLater(*stream_exec)) { in OptimizeHloPostLayoutAssignment() 173 hlo_module, stream_exec, device_allocator)); in OptimizeHloPostLayoutAssignment() 178 post_pipeline.AddPass<GemmAlgorithmPicker>(stream_exec, device_allocator); in OptimizeHloPostLayoutAssignment() 287 GpuVersion NVPTXCompiler::GetGpuVersion(se::StreamExecutor* stream_exec) { in GetGpuVersion() argument 289 if (!stream_exec->GetDeviceDescription().cuda_compute_capability(&cc_major, in GetGpuVersion() 304 se::StreamExecutor* stream_exec, in CompileTargetBinary() argument 334 stream_exec, ptx, compute_capability.first, compute_capability.second, in CompileTargetBinary() [all …]
|
D | amdgpu_compiler.cc | 74 HloModule* hlo_module, se::StreamExecutor* stream_exec, in OptimizeHloConvolutionCanonicalization() argument 95 GpuVersion AMDGPUCompiler::GetGpuVersion(se::StreamExecutor* stream_exec) { in GetGpuVersion() argument 97 if (!stream_exec->GetDeviceDescription().rocm_amdgpu_isa_version( in GetGpuVersion() 104 stream_exec->GetDeviceDescription().rocm_amdgpu_gcn_arch_name(); in GetGpuVersion() 105 if (gcn_arch_name == stream_exec->GetDeviceDescription().kUndefinedString) { in GetGpuVersion() 117 se::StreamExecutor* stream_exec, in CompileTargetBinary() argument
|
D | gpu_compiler.cc | 150 HloModule* hlo_module, se::StreamExecutor* stream_exec, in OptimizeHloModule() argument 286 hlo_module, stream_exec, device_allocator)); in OptimizeHloModule() 301 LayoutAssignment::InstructionCanChangeLayout, stream_exec); in OptimizeHloModule() 306 TF_RETURN_IF_ERROR(OptimizeHloPostLayoutAssignment(hlo_module, stream_exec, in OptimizeHloModule() 403 HloModule* hlo_module, se::StreamExecutor* stream_exec, in OptimizeHloPostLayoutAssignment() argument 478 pipeline.AddPass<GpuConvAlgorithmPicker>(stream_exec, device_allocator); in OptimizeHloPostLayoutAssignment() 490 std::unique_ptr<HloModule> module, se::StreamExecutor* stream_exec, in RunHloPasses() argument 498 OptimizeHloModule(module.get(), stream_exec, options.device_allocator)); in RunHloPasses() 678 se::StreamExecutor* stream_exec, in CompileToTargetBinary() argument 684 [this, stream_exec, &module_config, debug_module]( in CompileToTargetBinary() [all …]
|
D | nvptx_compiler.h | 42 HloModule* hlo_module, se::StreamExecutor* stream_exec, 46 HloModule* hlo_module, se::StreamExecutor* stream_exec, 51 GpuVersion GetGpuVersion(se::StreamExecutor* stream_exec) override; 55 GpuVersion gpu_version, se::StreamExecutor* stream_exec, bool relocatable, 60 se::StreamExecutor* stream_exec, 78 se::StreamExecutor* stream_exec, const string& ptx, int cc_major,
|
D | amdgpu_compiler.h | 37 HloModule* hlo_module, se::StreamExecutor* stream_exec, 40 GpuVersion GetGpuVersion(se::StreamExecutor* stream_exec) override; 44 GpuVersion gpu_version, se::StreamExecutor* stream_exec, bool relocatable,
|
D | stream_executor_util.h | 38 bool IsVoltaOrLater(const se::StreamExecutor& stream_exec); 61 tensorflow::mutex_lock LockGpu(const se::StreamExecutor* stream_exec); 71 absl::Span<const uint8> cubin_data, se::StreamExecutor* stream_exec);
|
D | gemm_algorithm_picker.h | 32 GemmAlgorithmPicker(se::StreamExecutor* stream_exec, in GemmAlgorithmPicker() argument 34 : stream_exec_(stream_exec), allocator_(allocator) {} in GemmAlgorithmPicker()
|
D | gpu_conv_algorithm_picker.h | 40 GpuConvAlgorithmPicker(se::StreamExecutor* stream_exec, in GpuConvAlgorithmPicker() argument 42 : stream_exec_(stream_exec), allocator_(allocator) {} in GpuConvAlgorithmPicker()
|
D | stream_executor_util.cc | 177 tensorflow::mutex_lock LockGpu(const se::StreamExecutor* stream_exec) { in LockGpu() argument 187 std::make_tuple(stream_exec->platform(), in LockGpu() 188 stream_exec->device_ordinal()), in LockGpu() 196 absl::Span<const uint8> cubin_data, se::StreamExecutor* stream_exec) { in CreateKernel() argument 205 auto kernel_base = absl::make_unique<se::KernelBase>(stream_exec); in CreateKernel() 206 TF_RETURN_IF_ERROR(stream_exec->GetKernel(loader_spec, kernel_base.get())); in CreateKernel()
|
/external/tensorflow/tensorflow/compiler/xla/service/interpreter/ |
D | compiler.cc | 105 std::unique_ptr<HloModule> hlo_module, se::StreamExecutor* stream_exec, in RunBackend() argument 107 TF_RET_CHECK(stream_exec != nullptr); in RunBackend() 130 std::vector<std::vector<se::StreamExecutor*>> stream_exec, in Compile() argument 139 if (stream_exec.size() != 1 || stream_exec[0].size() != 1) { in Compile() 145 stream_exec[0][0], options)); in Compile() 147 stream_exec[0][0], options)); in Compile()
|
D | compiler.h | 47 std::unique_ptr<HloModule> hlo_module, se::StreamExecutor* stream_exec, 50 std::unique_ptr<HloModule> hlo_module, se::StreamExecutor* stream_exec, 54 std::vector<std::vector<se::StreamExecutor*>> stream_exec,
|
/external/tensorflow/tensorflow/stream_executor/cuda/ |
D | redzone_allocator_test.cc | 55 StreamExecutor* stream_exec = platform->ExecutorForDevice(0).ValueOrDie(); in TEST() local 57 StreamExecutorMemoryAllocator se_allocator(platform, {stream_exec}); in TEST() 59 Stream stream(stream_exec); in TEST() 130 StreamExecutor* stream_exec = platform->ExecutorForDevice(0).ValueOrDie(); in TEST() local 132 StreamExecutorMemoryAllocator se_allocator(platform, {stream_exec}); in TEST() 133 Stream stream(stream_exec); in TEST()
|
/external/tensorflow/tensorflow/core/kernels/ |
D | gpu_utils.cc | 135 se::StreamExecutor* stream_exec, in LogConvAutotuneResults() argument 152 *log.mutable_cudnn_version() = GetCudnnVersion(stream_exec); in LogConvAutotuneResults() 153 *log.mutable_compute_capability() = GetComputeCapability(stream_exec); in LogConvAutotuneResults() 154 log.set_device_pci_bus_id(stream_exec->GetDeviceDescription().pci_bus_id()); in LogConvAutotuneResults() 157 if (auto* blas = stream_exec->AsBlas()) { in LogConvAutotuneResults() 179 se::StreamExecutor* stream_exec, absl::Span<const AutotuneResult> results) { in LogFusedConvForwardAutotuneResults() argument 199 *log.mutable_cudnn_version() = GetCudnnVersion(stream_exec); in LogFusedConvForwardAutotuneResults() 200 *log.mutable_compute_capability() = GetComputeCapability(stream_exec); in LogFusedConvForwardAutotuneResults() 201 log.set_device_pci_bus_id(stream_exec->GetDeviceDescription().pci_bus_id()); in LogFusedConvForwardAutotuneResults() 204 if (auto* blas = stream_exec->AsBlas()) { in LogFusedConvForwardAutotuneResults()
|
D | conv_ops_gpu.h | 34 inline bool IsVoltaOrLater(const se::StreamExecutor& stream_exec) { in IsVoltaOrLater() argument 36 CHECK(stream_exec // Crash OK in IsVoltaOrLater() 164 se::StreamExecutor* stream_exec) const { in ShouldIncludeWinogradNonfusedAlgo() argument 165 auto* dnn_support = stream_exec->AsDnn(); in ShouldIncludeWinogradNonfusedAlgo()
|
/external/tensorflow/tensorflow/compiler/xla/ |
D | device_util.h | 32 string DeviceIdentifier(se::StreamExecutor* stream_exec) { in DeviceIdentifier() argument 33 return absl::StrCat(stream_exec->platform()->Name(), ":", in DeviceIdentifier() 34 stream_exec->device_ordinal()); in DeviceIdentifier()
|
/external/tensorflow/tensorflow/stream_executor/gpu/ |
D | gpu_activation.cc | 26 GpuExecutor* ExtractGpuExecutor(StreamExecutor* stream_exec); 34 StreamExecutor* stream_exec) in ScopedActivateExecutorContext() argument 35 : ScopedActivateExecutorContext(ExtractGpuExecutor(stream_exec)) {} in ScopedActivateExecutorContext()
|
/external/tensorflow/tensorflow/core/tpu/ |
D | tpu_on_demand_compiler.cc | 277 std::vector<std::vector<stream_executor::StreamExecutor*>> stream_exec, in Compile() argument 296 std::vector<SE_StreamExecutorList> se_lists(stream_exec.size()); in Compile() 298 for (int i = 0; i < stream_exec.size(); ++i) { in Compile() 299 se_lists[i].count = stream_exec[i].size(); in Compile() 300 se_lists_storage.emplace_back(stream_exec[i].size()); in Compile() 302 for (int j = 0; j < stream_exec[i].size(); ++j) { in Compile() 304 stream_exec[i][j]->implementation()) in Compile() 317 compiler_, &se_module_group, se_lists.data(), stream_exec.size(), in Compile()
|
/external/tensorflow/tensorflow/stream_executor/ |
D | event.cc | 24 Event::Event(StreamExecutor* stream_exec) in Event() argument 25 : stream_exec_(stream_exec), in Event()
|
/external/tensorflow/tensorflow/compiler/xla/service/gpu/tests/ |
D | mlir_gpu_test_base.cc | 53 se::StreamExecutor* stream_exec = stream->parent(); in CompileMlirModule() local 54 GpuDeviceInfo gpu_device_info = GetGpuDeviceInfo(stream_exec); in CompileMlirModule() 59 stream_exec->GetDeviceDescription().cuda_compute_capability( in CompileMlirModule() 77 module_config, Compiler::CompileOptions(), "main", stream_exec, in CompileMlirModule()
|
/external/tensorflow/tensorflow/core/common_runtime/device/ |
D | device_host_allocator.h | 29 explicit DeviceHostAllocator(se::StreamExecutor* stream_exec, int numa_node, in DeviceHostAllocator() argument 33 stream_exec_(stream_exec), in DeviceHostAllocator()
|
D | device_mem_allocator.h | 31 explicit DeviceMemAllocator(se::StreamExecutor* stream_exec, in DeviceMemAllocator() argument 37 stream_exec_(stream_exec), in DeviceMemAllocator()
|
D | device_event_mgr_test.cc | 112 auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie(); in TEST() local 113 TEST_EventMgr em(stream_exec, GPUOptions()); in TEST() 121 auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie(); in TEST() local 122 TEST_EventMgr em(stream_exec, GPUOptions()); in TEST() 124 std::unique_ptr<se::Stream> stream(new se::Stream(stream_exec)); in TEST() 443 auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie(); in BM_no_ops() local 444 std::unique_ptr<se::Stream> stream(new se::Stream(stream_exec)); in BM_no_ops() 447 TEST_EventMgr em(stream_exec, GPUOptions()); in BM_no_ops()
|
/external/tensorflow/tensorflow/compiler/xla/tests/ |
D | llvm_compiler_test.cc | 45 HloModule* hlo_module, se::StreamExecutor* stream_exec, in OptimizeHloConvolutionCanonicalization() argument 51 HloModule* hlo_module, se::StreamExecutor* stream_exec, in OptimizeHloPostLayoutAssignment() argument 62 GpuVersion gpu_version, se::StreamExecutor* stream_exec, bool relocatable, in CompileTargetBinary() argument
|