Home
last modified time | relevance | path

Searched refs:gpu_device (Results 1 – 25 of 33) sorted by relevance

12

/external/eigen/unsupported/test/
Dcxx11_tensor_of_float16_cuda.cu27 Eigen::GpuDevice gpu_device(&stream); in test_cuda_numext() local
30 float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float)); in test_cuda_numext()
31 bool* d_res_half = (bool*)gpu_device.allocate(num_elem * sizeof(bool)); in test_cuda_numext()
32 bool* d_res_float = (bool*)gpu_device.allocate(num_elem * sizeof(bool)); in test_cuda_numext()
41 gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f); in test_cuda_numext()
42 gpu_res_float.device(gpu_device) = gpu_float.unaryExpr(Eigen::internal::scalar_isnan_op<float>()); in test_cuda_numext()
43 …gpu_res_half.device(gpu_device) = gpu_float.cast<Eigen::half>().unaryExpr(Eigen::internal::scalar_… in test_cuda_numext()
47 gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(bool)); in test_cuda_numext()
48 gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(bool)); in test_cuda_numext()
49 gpu_device.synchronize(); in test_cuda_numext()
[all …]
Dcxx11_tensor_complex_cuda.cu41 Eigen::GpuDevice gpu_device(&stream); in test_cuda_nullary() local
50 gpu_in1.device(gpu_device) = gpu_in1.constant(std::complex<float>(3.14f, 2.7f)); in test_cuda_nullary()
51 gpu_out2.device(gpu_device) = gpu_in2.abs(); in test_cuda_nullary()
57 gpu_device.stream()) == cudaSuccess); in test_cuda_nullary()
59 gpu_device.stream()) == cudaSuccess); in test_cuda_nullary()
61 assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); in test_cuda_nullary()
77 Eigen::GpuDevice gpu_device(&stream); in test_cuda_sum_reductions() local
90 …std::complex<float>* gpu_in_ptr = static_cast<std::complex<float>*>(gpu_device.allocate(in_bytes)); in test_cuda_sum_reductions()
91 …std::complex<float>* gpu_out_ptr = static_cast<std::complex<float>*>(gpu_device.allocate(out_bytes… in test_cuda_sum_reductions()
92 gpu_device.memcpyHostToDevice(gpu_in_ptr, in.data(), in_bytes); in test_cuda_sum_reductions()
[all …]
Dcxx11_tensor_cast_float16_cuda.cu26 Eigen::GpuDevice gpu_device(&stream); in test_cuda_conversion() local
32 float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float)); in test_cuda_conversion()
33 Eigen::half* d_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); in test_cuda_conversion()
34 float* d_conv = (float*)gpu_device.allocate(num_elem * sizeof(float)); in test_cuda_conversion()
43 gpu_device.memcpyHostToDevice(d_float, floats.data(), num_elem*sizeof(float)); in test_cuda_conversion()
45 gpu_half.device(gpu_device) = gpu_float.cast<Eigen::half>(); in test_cuda_conversion()
46 gpu_conv.device(gpu_device) = gpu_half.cast<float>(); in test_cuda_conversion()
50 gpu_device.memcpyDeviceToHost(initial.data(), d_float, num_elem*sizeof(float)); in test_cuda_conversion()
51 gpu_device.memcpyDeviceToHost(final.data(), d_conv, num_elem*sizeof(float)); in test_cuda_conversion()
52 gpu_device.synchronize(); in test_cuda_conversion()
[all …]
Dcxx11_tensor_argmax_cuda.cu47 Eigen::GpuDevice gpu_device(&stream); in test_cuda_simple_argmax() local
53 gpu_out_max.device(gpu_device) = gpu_in.argmax(); in test_cuda_simple_argmax()
54 gpu_out_min.device(gpu_device) = gpu_in.argmin(); in test_cuda_simple_argmax()
56 …assert(cudaMemcpyAsync(out_max.data(), d_out_max, out_bytes, cudaMemcpyDeviceToHost, gpu_device.st… in test_cuda_simple_argmax()
57 …assert(cudaMemcpyAsync(out_min.data(), d_out_min, out_bytes, cudaMemcpyDeviceToHost, gpu_device.st… in test_cuda_simple_argmax()
58 assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); in test_cuda_simple_argmax()
109 Eigen::GpuDevice gpu_device(&stream); in test_cuda_argmax_dim() local
114 gpu_out.device(gpu_device) = gpu_in.argmax(dim); in test_cuda_argmax_dim()
116 …assert(cudaMemcpyAsync(tensor_arg.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.str… in test_cuda_argmax_dim()
117 assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); in test_cuda_argmax_dim()
[all …]
Dcxx11_tensor_cuda.cu39 Eigen::GpuDevice gpu_device(&stream); in test_cuda_nullary() local
46 gpu_in1.device(gpu_device) = gpu_in1.constant(3.14f); in test_cuda_nullary()
47 gpu_in2.device(gpu_device) = gpu_in2.random(); in test_cuda_nullary()
53 gpu_device.stream()) == cudaSuccess); in test_cuda_nullary()
55 gpu_device.stream()) == cudaSuccess); in test_cuda_nullary()
57 assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); in test_cuda_nullary()
90 Eigen::GpuDevice gpu_device(&stream); in test_cuda_elementwise_small() local
99 gpu_out.device(gpu_device) = gpu_in1 + gpu_in2; in test_cuda_elementwise_small()
102 gpu_device.stream()) == cudaSuccess); in test_cuda_elementwise_small()
103 assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); in test_cuda_elementwise_small()
[all …]
Dcxx11_tensor_random_cuda.cu34 Eigen::GpuDevice gpu_device(&stream); in test_cuda_random_uniform() local
38 gpu_out.device(gpu_device) = gpu_out.random(); in test_cuda_random_uniform()
40 …assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) … in test_cuda_random_uniform()
41 assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); in test_cuda_random_uniform()
59 Eigen::GpuDevice gpu_device(&stream); in test_cuda_random_normal() local
64 gpu_out.device(gpu_device) = gpu_out.random(gen); in test_cuda_random_normal()
66 …assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) … in test_cuda_random_normal()
67 assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); in test_cuda_random_normal()
Dcxx11_tensor_complex_cwise_ops_cuda.cu35 Eigen::GpuDevice gpu_device(&stream); in test_cuda_complex_cwise_ops() local
47 gpu_in1.device(gpu_device) = gpu_in1.constant(a); in test_cuda_complex_cwise_ops()
48 gpu_in2.device(gpu_device) = gpu_in2.constant(b); in test_cuda_complex_cwise_ops()
62 gpu_out.device(gpu_device) = gpu_in1 + gpu_in2; in test_cuda_complex_cwise_ops()
66 gpu_out.device(gpu_device) = gpu_in1 - gpu_in2; in test_cuda_complex_cwise_ops()
70 gpu_out.device(gpu_device) = gpu_in1 * gpu_in2; in test_cuda_complex_cwise_ops()
74 gpu_out.device(gpu_device) = gpu_in1 / gpu_in2; in test_cuda_complex_cwise_ops()
79 gpu_device.stream()) == cudaSuccess); in test_cuda_complex_cwise_ops()
80 assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); in test_cuda_complex_cwise_ops()
Dcxx11_tensor_reduction_cuda.cu26 Eigen::GpuDevice gpu_device(&stream); in test_full_reductions() local
39 Type* gpu_in_ptr = static_cast<Type*>(gpu_device.allocate(in_bytes)); in test_full_reductions()
40 Type* gpu_out_ptr = static_cast<Type*>(gpu_device.allocate(out_bytes)); in test_full_reductions()
41 gpu_device.memcpyHostToDevice(gpu_in_ptr, in.data(), in_bytes); in test_full_reductions()
46 out_gpu.device(gpu_device) = in_gpu.sum(); in test_full_reductions()
49 gpu_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_ptr, out_bytes); in test_full_reductions()
50 gpu_device.synchronize(); in test_full_reductions()
55 gpu_device.deallocate(gpu_in_ptr); in test_full_reductions()
56 gpu_device.deallocate(gpu_out_ptr); in test_full_reductions()
Dcxx11_tensor_contract_cuda.cu58 Eigen::GpuDevice gpu_device(&stream); in test_cuda_contraction() local
68 gpu_t_result.device(gpu_device) = gpu_t_left.contract(gpu_t_right, dims); in test_cuda_contraction()
122 Eigen::GpuDevice gpu_device(&stream); in test_scalar() local
131 gpu_t_result.device(gpu_device) = gpu_t_left.contract(gpu_t_right, dims); in test_scalar()
Dcxx11_tensor_scan_cuda.cu47 Eigen::GpuDevice gpu_device(&stream); in test_cuda_cumsum() local
54 gpu_t_result.device(gpu_device) = gpu_t_input.cumsum(1); in test_cuda_cumsum()
/external/tensorflow/tensorflow/core/grappler/costs/
Dvirtual_placer_test.cc32 DeviceProperties gpu_device; in TEST() local
33 gpu_device.set_type("GPU"); in TEST()
34 devices["/job:localhost/replica:0/task:0/device:GPU:0"] = gpu_device; in TEST()
62 DeviceProperties gpu_device; in TEST() local
63 gpu_device.set_type("GPU"); in TEST()
64 devices["/GPU:0"] = gpu_device; in TEST()
121 DeviceProperties gpu_device; in TEST() local
122 gpu_device.set_type("GPU"); in TEST()
124 "/replica:0/task:0/device:GPU:0")] = gpu_device; in TEST()
178 DeviceProperties gpu_device; in TEST() local
[all …]
Danalytical_cost_estimator_test.cc38 DeviceProperties gpu_device; in SetUp() local
39 gpu_device.set_type("GPU"); in SetUp()
40 gpu_device.set_num_cores(12); in SetUp()
41 gpu_device.set_frequency(1100); in SetUp()
42 gpu_device.set_bandwidth(180 * 1024 * 1024); in SetUp()
43 (*gpu_device.mutable_environment())["architecture"] = "6"; in SetUp()
44 devices["/job:localhost/replica:0/task:0/device:GPU:0"] = gpu_device; in SetUp()
/external/tensorflow/tensorflow/core/common_runtime/gpu/
Dgpu_util.h47 static void CopyGPUTensorToCPU(Device* gpu_device,
55 static Status Sync(Device* gpu_device);
60 static Status SyncAll(Device* gpu_device);
80 static uint64 Checksum(Device* gpu_device,
90 Device* gpu_device, Tensor* gpu_tensor,
103 static void CopyGPUTensorToSameGPU(Device* gpu_device,
Dgpu_util.cc254 void GPUUtil::CopyGPUTensorToCPU(Device* gpu_device, in CopyGPUTensorToCPU() argument
261 Status s = PrepareCopy(gpu_device, device_context, *gpu_tensor, cpu_tensor, in CopyGPUTensorToCPU()
301 Device* gpu_device, Tensor* gpu_tensor, in CopyCPUTensorToGPU() argument
306 Status s = PrepareCopy(gpu_device, device_context, *cpu_tensor, gpu_tensor, in CopyCPUTensorToGPU()
346 Status GPUUtil::Sync(Device* gpu_device) { in Sync() argument
348 auto* dev_info = gpu_device->tensorflow_gpu_device_info(); in Sync()
355 Status GPUUtil::SyncAll(Device* gpu_device) { in SyncAll() argument
357 auto* dev_info = gpu_device->tensorflow_gpu_device_info(); in SyncAll()
393 uint64 GPUUtil::Checksum(Device* gpu_device, in Checksum() argument
399 CopyGPUTensorToCPU(gpu_device, device_context, &tensor, &copy, in Checksum()
[all …]
DBUILD110 "gpu_device.h",
129 "gpu_device.cc",
/external/tensorflow/tensorflow/core/kernels/
Dconcat_lib_gpu_impl.cu.cc117 const Eigen::GpuDevice& gpu_device, in ConcatGPUSlice() argument
127 To32Bit(*output).slice(offset, size).device(gpu_device) = in ConcatGPUSlice()
130 output->slice(offset, size).device(gpu_device) = *inputs_flat[i]; in ConcatGPUSlice()
138 void ConcatGPUImpl(const Eigen::GpuDevice& gpu_device, in ConcatGPUImpl() argument
144 gpu_device); in ConcatGPUImpl()
149 config.thread_per_block, 0, gpu_device.stream(), input_ptrs, split_size, in ConcatGPUImpl()
153 IntType smem_max = gpu_device.sharedMemPerBlock(); in ConcatGPUImpl()
163 config.thread_per_block, smem_usage, gpu_device.stream(), input_ptrs, in ConcatGPUImpl()
169 config.thread_per_block, 0, gpu_device.stream(), input_ptrs, in ConcatGPUImpl()
178 const Eigen::GpuDevice& gpu_device, \
[all …]
Ddynamic_stitch_op_gpu.cu.cc52 void DynamicStitchGPUImpl(const Eigen::GpuDevice& gpu_device, in DynamicStitchGPUImpl() argument
58 auto config = GetGpuLaunchConfig(output_size, gpu_device); in DynamicStitchGPUImpl()
61 config.thread_per_block, 0, gpu_device.stream(), in DynamicStitchGPUImpl()
68 const Eigen::GpuDevice& gpu_device, const int32 slice_size, \
Dsplit_lib_gpu.cu.cc210 const Eigen::GpuDevice& gpu_device, bool fixed_size, const T* input_ptr, in Run() argument
216 GetGpuLaunchConfig(total_rows * total_cols, gpu_device); in Run()
219 config.thread_per_block, 0, gpu_device.stream(), in Run()
223 auto config = GetGpu2DLaunchConfig(total_cols, total_rows, gpu_device); in Run()
224 IntType smem_max = gpu_device.sharedMemPerBlock(); in Run()
233 config.thread_per_block, smem_usage, gpu_device.stream(), input_ptr, in Run()
238 config.thread_per_block, 0, gpu_device.stream(), input_ptr, in Run()
Dconcat_lib_gpu.h34 const Eigen::GpuDevice& gpu_device,
49 const Eigen::GpuDevice& gpu_device, \
54 const Eigen::GpuDevice& gpu_device, \
Ddynamic_stitch_op.cc139 void DynamicStitchGPUImpl(const Eigen::GpuDevice& gpu_device,
146 const Eigen::GpuDevice& gpu_device, const int32 slice_size, \
/external/crosvm/seccomp/
DAndroid.bp802 name: "gpu_device.policy_inline_x86_64",
804 out: ["gpu_device.policy"],
805 srcs: ["x86_64/gpu_device.policy"],
809 name: "gpu_device.policy_x86_64",
810 filename: "gpu_device.policy",
812 src: ":gpu_device.policy_inline_x86_64",
816 name: "gpu_device.policy_inline_aarch64",
818 out: ["gpu_device.policy"],
819 srcs: ["aarch64/gpu_device.policy"],
823 name: "gpu_device.policy_aarch64",
[all …]
Dcrosvm_seccomp_policy_product_packages.mk24 gpu_device.policy \
48 system/etc/seccomp_policy/crosvm/gpu_device.policy \
/external/tensorflow/tensorflow/compiler/xla/pjrt/
DBUILD298 name = "gpu_device",
299 srcs = ["gpu_device.cc"],
300 hdrs = ["gpu_device.h"],
318 # We actually wish we could write if_cuda(if_nccl(...)) in :gpu_device,
337 ":gpu_device",
/external/tensorflow/tensorflow/core/grappler/optimizers/
Dmemory_optimizer_test.cc213 DeviceProperties gpu_device; in CreateVirtualCluster() local
214 gpu_device.set_type("GPU"); in CreateVirtualCluster()
215 gpu_device.set_frequency(1000); in CreateVirtualCluster()
216 gpu_device.set_num_cores(24); in CreateVirtualCluster()
217 gpu_device.set_bandwidth(128); in CreateVirtualCluster()
218 gpu_device.set_memory_size(1024 * 1024); in CreateVirtualCluster()
219 gpu_device.mutable_environment()->insert({"architecture", "6"}); in CreateVirtualCluster()
222 devices["/job:localhost/replica:0/task:0/gpu:0"] = gpu_device; in CreateVirtualCluster()
Dgeneric_layout_optimizer_test.cc181 DeviceProperties gpu_device; in SetUp() local
182 gpu_device.set_type("GPU"); in SetUp()
183 gpu_device.mutable_environment()->insert({"architecture", "6"}); in SetUp()
187 gpu_device }})); in SetUp()

12