Home
last modified time | relevance | path

Searched refs:stream_exec (Results 1 – 25 of 36) sorted by relevance

12

/external/tensorflow/tensorflow/core/common_runtime/gpu/
Dgpu_debug_allocator_test.cc49 auto stream_exec = ExecutorForPlatformGpuId(platform_gpu_id); in TEST() local
51 stream_exec, platform_gpu_id, false /*use_unified_memory*/, {}, {}); in TEST()
61 ASSERT_TRUE(stream_exec->SynchronousMemcpy(&gpu_array_ptr, &cpu_array[0], in TEST()
76 auto stream_exec = ExecutorForPlatformGpuId(platform_gpu_id); in TEST()
78 new DeviceMemAllocator(stream_exec, platform_gpu_id, in TEST()
90 ASSERT_TRUE(stream_exec->SynchronousMemcpy( in TEST()
98 stream_exec->SynchronousMemcpy(&gpu_hdr_ptr, &pi, sizeof(float))); in TEST()
112 auto stream_exec = ExecutorForPlatformGpuId(platform_gpu_id); in TEST()
114 new DeviceMemAllocator(stream_exec, platform_gpu_id, in TEST()
126 ASSERT_TRUE(stream_exec->SynchronousMemcpy( in TEST()
[all …]
/external/tensorflow/tensorflow/compiler/xla/service/gpu/
Dgpu_compiler.h57 std::unique_ptr<HloModule> module, se::StreamExecutor* stream_exec,
67 se::StreamExecutor* stream_exec,
71 HloModule* hlo_module, se::StreamExecutor* stream_exec,
75 HloModule* hlo_module, se::StreamExecutor* stream_exec,
84 virtual GpuVersion GetGpuVersion(se::StreamExecutor* stream_exec) = 0;
91 se::StreamExecutor* stream_exec, bool relocatable,
97 std::unique_ptr<HloModule> module, se::StreamExecutor* stream_exec,
107 se::StreamExecutor* stream_exec, const CompileOptions& options,
130 se::StreamExecutor* stream_exec, in LinkModules() argument
149 GpuDeviceInfo GetGpuDeviceInfo(se::StreamExecutor* stream_exec);
[all …]
Dnvptx_compiler.cc108 HloModule* hlo_module, se::StreamExecutor* stream_exec, in OptimizeHloConvolutionCanonicalization() argument
120 pipeline.AddPass<CudnnPadForConvolutions>(IsVoltaOrLater(*stream_exec)); in OptimizeHloConvolutionCanonicalization()
161 HloModule* hlo_module, se::StreamExecutor* stream_exec, in OptimizeHloPostLayoutAssignment() argument
167 if (IsVoltaOrLater(*stream_exec)) { in OptimizeHloPostLayoutAssignment()
173 hlo_module, stream_exec, device_allocator)); in OptimizeHloPostLayoutAssignment()
178 post_pipeline.AddPass<GemmAlgorithmPicker>(stream_exec, device_allocator); in OptimizeHloPostLayoutAssignment()
287 GpuVersion NVPTXCompiler::GetGpuVersion(se::StreamExecutor* stream_exec) { in GetGpuVersion() argument
289 if (!stream_exec->GetDeviceDescription().cuda_compute_capability(&cc_major, in GetGpuVersion()
304 se::StreamExecutor* stream_exec, in CompileTargetBinary() argument
334 stream_exec, ptx, compute_capability.first, compute_capability.second, in CompileTargetBinary()
[all …]
Damdgpu_compiler.cc74 HloModule* hlo_module, se::StreamExecutor* stream_exec, in OptimizeHloConvolutionCanonicalization() argument
95 GpuVersion AMDGPUCompiler::GetGpuVersion(se::StreamExecutor* stream_exec) { in GetGpuVersion() argument
97 if (!stream_exec->GetDeviceDescription().rocm_amdgpu_isa_version( in GetGpuVersion()
104 stream_exec->GetDeviceDescription().rocm_amdgpu_gcn_arch_name(); in GetGpuVersion()
105 if (gcn_arch_name == stream_exec->GetDeviceDescription().kUndefinedString) { in GetGpuVersion()
117 se::StreamExecutor* stream_exec, in CompileTargetBinary() argument
Dgpu_compiler.cc150 HloModule* hlo_module, se::StreamExecutor* stream_exec, in OptimizeHloModule() argument
286 hlo_module, stream_exec, device_allocator)); in OptimizeHloModule()
301 LayoutAssignment::InstructionCanChangeLayout, stream_exec); in OptimizeHloModule()
306 TF_RETURN_IF_ERROR(OptimizeHloPostLayoutAssignment(hlo_module, stream_exec, in OptimizeHloModule()
403 HloModule* hlo_module, se::StreamExecutor* stream_exec, in OptimizeHloPostLayoutAssignment() argument
478 pipeline.AddPass<GpuConvAlgorithmPicker>(stream_exec, device_allocator); in OptimizeHloPostLayoutAssignment()
490 std::unique_ptr<HloModule> module, se::StreamExecutor* stream_exec, in RunHloPasses() argument
498 OptimizeHloModule(module.get(), stream_exec, options.device_allocator)); in RunHloPasses()
678 se::StreamExecutor* stream_exec, in CompileToTargetBinary() argument
684 [this, stream_exec, &module_config, debug_module]( in CompileToTargetBinary()
[all …]
Dnvptx_compiler.h42 HloModule* hlo_module, se::StreamExecutor* stream_exec,
46 HloModule* hlo_module, se::StreamExecutor* stream_exec,
51 GpuVersion GetGpuVersion(se::StreamExecutor* stream_exec) override;
55 GpuVersion gpu_version, se::StreamExecutor* stream_exec, bool relocatable,
60 se::StreamExecutor* stream_exec,
78 se::StreamExecutor* stream_exec, const string& ptx, int cc_major,
Damdgpu_compiler.h37 HloModule* hlo_module, se::StreamExecutor* stream_exec,
40 GpuVersion GetGpuVersion(se::StreamExecutor* stream_exec) override;
44 GpuVersion gpu_version, se::StreamExecutor* stream_exec, bool relocatable,
Dstream_executor_util.h38 bool IsVoltaOrLater(const se::StreamExecutor& stream_exec);
61 tensorflow::mutex_lock LockGpu(const se::StreamExecutor* stream_exec);
71 absl::Span<const uint8> cubin_data, se::StreamExecutor* stream_exec);
Dgemm_algorithm_picker.h32 GemmAlgorithmPicker(se::StreamExecutor* stream_exec, in GemmAlgorithmPicker() argument
34 : stream_exec_(stream_exec), allocator_(allocator) {} in GemmAlgorithmPicker()
Dgpu_conv_algorithm_picker.h40 GpuConvAlgorithmPicker(se::StreamExecutor* stream_exec, in GpuConvAlgorithmPicker() argument
42 : stream_exec_(stream_exec), allocator_(allocator) {} in GpuConvAlgorithmPicker()
Dstream_executor_util.cc177 tensorflow::mutex_lock LockGpu(const se::StreamExecutor* stream_exec) { in LockGpu() argument
187 std::make_tuple(stream_exec->platform(), in LockGpu()
188 stream_exec->device_ordinal()), in LockGpu()
196 absl::Span<const uint8> cubin_data, se::StreamExecutor* stream_exec) { in CreateKernel() argument
205 auto kernel_base = absl::make_unique<se::KernelBase>(stream_exec); in CreateKernel()
206 TF_RETURN_IF_ERROR(stream_exec->GetKernel(loader_spec, kernel_base.get())); in CreateKernel()
/external/tensorflow/tensorflow/compiler/xla/service/interpreter/
Dcompiler.cc105 std::unique_ptr<HloModule> hlo_module, se::StreamExecutor* stream_exec, in RunBackend() argument
107 TF_RET_CHECK(stream_exec != nullptr); in RunBackend()
130 std::vector<std::vector<se::StreamExecutor*>> stream_exec, in Compile() argument
139 if (stream_exec.size() != 1 || stream_exec[0].size() != 1) { in Compile()
145 stream_exec[0][0], options)); in Compile()
147 stream_exec[0][0], options)); in Compile()
Dcompiler.h47 std::unique_ptr<HloModule> hlo_module, se::StreamExecutor* stream_exec,
50 std::unique_ptr<HloModule> hlo_module, se::StreamExecutor* stream_exec,
54 std::vector<std::vector<se::StreamExecutor*>> stream_exec,
/external/tensorflow/tensorflow/stream_executor/cuda/
Dredzone_allocator_test.cc55 StreamExecutor* stream_exec = platform->ExecutorForDevice(0).ValueOrDie(); in TEST() local
57 StreamExecutorMemoryAllocator se_allocator(platform, {stream_exec}); in TEST()
59 Stream stream(stream_exec); in TEST()
130 StreamExecutor* stream_exec = platform->ExecutorForDevice(0).ValueOrDie(); in TEST() local
132 StreamExecutorMemoryAllocator se_allocator(platform, {stream_exec}); in TEST()
133 Stream stream(stream_exec); in TEST()
/external/tensorflow/tensorflow/core/kernels/
Dgpu_utils.cc135 se::StreamExecutor* stream_exec, in LogConvAutotuneResults() argument
152 *log.mutable_cudnn_version() = GetCudnnVersion(stream_exec); in LogConvAutotuneResults()
153 *log.mutable_compute_capability() = GetComputeCapability(stream_exec); in LogConvAutotuneResults()
154 log.set_device_pci_bus_id(stream_exec->GetDeviceDescription().pci_bus_id()); in LogConvAutotuneResults()
157 if (auto* blas = stream_exec->AsBlas()) { in LogConvAutotuneResults()
179 se::StreamExecutor* stream_exec, absl::Span<const AutotuneResult> results) { in LogFusedConvForwardAutotuneResults() argument
199 *log.mutable_cudnn_version() = GetCudnnVersion(stream_exec); in LogFusedConvForwardAutotuneResults()
200 *log.mutable_compute_capability() = GetComputeCapability(stream_exec); in LogFusedConvForwardAutotuneResults()
201 log.set_device_pci_bus_id(stream_exec->GetDeviceDescription().pci_bus_id()); in LogFusedConvForwardAutotuneResults()
204 if (auto* blas = stream_exec->AsBlas()) { in LogFusedConvForwardAutotuneResults()
Dconv_ops_gpu.h34 inline bool IsVoltaOrLater(const se::StreamExecutor& stream_exec) { in IsVoltaOrLater() argument
36 CHECK(stream_exec // Crash OK in IsVoltaOrLater()
164 se::StreamExecutor* stream_exec) const { in ShouldIncludeWinogradNonfusedAlgo() argument
165 auto* dnn_support = stream_exec->AsDnn(); in ShouldIncludeWinogradNonfusedAlgo()
/external/tensorflow/tensorflow/compiler/xla/
Ddevice_util.h32 string DeviceIdentifier(se::StreamExecutor* stream_exec) { in DeviceIdentifier() argument
33 return absl::StrCat(stream_exec->platform()->Name(), ":", in DeviceIdentifier()
34 stream_exec->device_ordinal()); in DeviceIdentifier()
/external/tensorflow/tensorflow/stream_executor/gpu/
Dgpu_activation.cc26 GpuExecutor* ExtractGpuExecutor(StreamExecutor* stream_exec);
34 StreamExecutor* stream_exec) in ScopedActivateExecutorContext() argument
35 : ScopedActivateExecutorContext(ExtractGpuExecutor(stream_exec)) {} in ScopedActivateExecutorContext()
/external/tensorflow/tensorflow/core/tpu/
Dtpu_on_demand_compiler.cc277 std::vector<std::vector<stream_executor::StreamExecutor*>> stream_exec, in Compile() argument
296 std::vector<SE_StreamExecutorList> se_lists(stream_exec.size()); in Compile()
298 for (int i = 0; i < stream_exec.size(); ++i) { in Compile()
299 se_lists[i].count = stream_exec[i].size(); in Compile()
300 se_lists_storage.emplace_back(stream_exec[i].size()); in Compile()
302 for (int j = 0; j < stream_exec[i].size(); ++j) { in Compile()
304 stream_exec[i][j]->implementation()) in Compile()
317 compiler_, &se_module_group, se_lists.data(), stream_exec.size(), in Compile()
/external/tensorflow/tensorflow/stream_executor/
Devent.cc24 Event::Event(StreamExecutor* stream_exec) in Event() argument
25 : stream_exec_(stream_exec), in Event()
/external/tensorflow/tensorflow/compiler/xla/service/gpu/tests/
Dmlir_gpu_test_base.cc53 se::StreamExecutor* stream_exec = stream->parent(); in CompileMlirModule() local
54 GpuDeviceInfo gpu_device_info = GetGpuDeviceInfo(stream_exec); in CompileMlirModule()
59 stream_exec->GetDeviceDescription().cuda_compute_capability( in CompileMlirModule()
77 module_config, Compiler::CompileOptions(), "main", stream_exec, in CompileMlirModule()
/external/tensorflow/tensorflow/core/common_runtime/device/
Ddevice_host_allocator.h29 explicit DeviceHostAllocator(se::StreamExecutor* stream_exec, int numa_node, in DeviceHostAllocator() argument
33 stream_exec_(stream_exec), in DeviceHostAllocator()
Ddevice_mem_allocator.h31 explicit DeviceMemAllocator(se::StreamExecutor* stream_exec, in DeviceMemAllocator() argument
37 stream_exec_(stream_exec), in DeviceMemAllocator()
Ddevice_event_mgr_test.cc112 auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie(); in TEST() local
113 TEST_EventMgr em(stream_exec, GPUOptions()); in TEST()
121 auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie(); in TEST() local
122 TEST_EventMgr em(stream_exec, GPUOptions()); in TEST()
124 std::unique_ptr<se::Stream> stream(new se::Stream(stream_exec)); in TEST()
443 auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie(); in BM_no_ops() local
444 std::unique_ptr<se::Stream> stream(new se::Stream(stream_exec)); in BM_no_ops()
447 TEST_EventMgr em(stream_exec, GPUOptions()); in BM_no_ops()
/external/tensorflow/tensorflow/compiler/xla/tests/
Dllvm_compiler_test.cc45 HloModule* hlo_module, se::StreamExecutor* stream_exec, in OptimizeHloConvolutionCanonicalization() argument
51 HloModule* hlo_module, se::StreamExecutor* stream_exec, in OptimizeHloPostLayoutAssignment() argument
62 GpuVersion gpu_version, se::StreamExecutor* stream_exec, bool relocatable, in CompileTargetBinary() argument

12