/external/eigen/unsupported/test/ |
D | cxx11_tensor_of_float16_cuda.cu | 27 Eigen::GpuDevice gpu_device(&stream); in test_cuda_numext() local 30 float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float)); in test_cuda_numext() 31 bool* d_res_half = (bool*)gpu_device.allocate(num_elem * sizeof(bool)); in test_cuda_numext() 32 bool* d_res_float = (bool*)gpu_device.allocate(num_elem * sizeof(bool)); in test_cuda_numext() 41 gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f); in test_cuda_numext() 42 gpu_res_float.device(gpu_device) = gpu_float.unaryExpr(Eigen::internal::scalar_isnan_op<float>()); in test_cuda_numext() 43 …gpu_res_half.device(gpu_device) = gpu_float.cast<Eigen::half>().unaryExpr(Eigen::internal::scalar_… in test_cuda_numext() 47 gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(bool)); in test_cuda_numext() 48 gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(bool)); in test_cuda_numext() 49 gpu_device.synchronize(); in test_cuda_numext() [all …]
|
D | cxx11_tensor_complex_cuda.cu | 41 Eigen::GpuDevice gpu_device(&stream); in test_cuda_nullary() local 50 gpu_in1.device(gpu_device) = gpu_in1.constant(std::complex<float>(3.14f, 2.7f)); in test_cuda_nullary() 51 gpu_out2.device(gpu_device) = gpu_in2.abs(); in test_cuda_nullary() 57 gpu_device.stream()) == cudaSuccess); in test_cuda_nullary() 59 gpu_device.stream()) == cudaSuccess); in test_cuda_nullary() 61 assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); in test_cuda_nullary() 77 Eigen::GpuDevice gpu_device(&stream); in test_cuda_sum_reductions() local 90 …std::complex<float>* gpu_in_ptr = static_cast<std::complex<float>*>(gpu_device.allocate(in_bytes)); in test_cuda_sum_reductions() 91 …std::complex<float>* gpu_out_ptr = static_cast<std::complex<float>*>(gpu_device.allocate(out_bytes… in test_cuda_sum_reductions() 92 gpu_device.memcpyHostToDevice(gpu_in_ptr, in.data(), in_bytes); in test_cuda_sum_reductions() [all …]
|
D | cxx11_tensor_cast_float16_cuda.cu | 26 Eigen::GpuDevice gpu_device(&stream); in test_cuda_conversion() local 32 float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float)); in test_cuda_conversion() 33 Eigen::half* d_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); in test_cuda_conversion() 34 float* d_conv = (float*)gpu_device.allocate(num_elem * sizeof(float)); in test_cuda_conversion() 43 gpu_device.memcpyHostToDevice(d_float, floats.data(), num_elem*sizeof(float)); in test_cuda_conversion() 45 gpu_half.device(gpu_device) = gpu_float.cast<Eigen::half>(); in test_cuda_conversion() 46 gpu_conv.device(gpu_device) = gpu_half.cast<float>(); in test_cuda_conversion() 50 gpu_device.memcpyDeviceToHost(initial.data(), d_float, num_elem*sizeof(float)); in test_cuda_conversion() 51 gpu_device.memcpyDeviceToHost(final.data(), d_conv, num_elem*sizeof(float)); in test_cuda_conversion() 52 gpu_device.synchronize(); in test_cuda_conversion() [all …]
|
D | cxx11_tensor_argmax_cuda.cu | 47 Eigen::GpuDevice gpu_device(&stream); in test_cuda_simple_argmax() local 53 gpu_out_max.device(gpu_device) = gpu_in.argmax(); in test_cuda_simple_argmax() 54 gpu_out_min.device(gpu_device) = gpu_in.argmin(); in test_cuda_simple_argmax() 56 …assert(cudaMemcpyAsync(out_max.data(), d_out_max, out_bytes, cudaMemcpyDeviceToHost, gpu_device.st… in test_cuda_simple_argmax() 57 …assert(cudaMemcpyAsync(out_min.data(), d_out_min, out_bytes, cudaMemcpyDeviceToHost, gpu_device.st… in test_cuda_simple_argmax() 58 assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); in test_cuda_simple_argmax() 109 Eigen::GpuDevice gpu_device(&stream); in test_cuda_argmax_dim() local 114 gpu_out.device(gpu_device) = gpu_in.argmax(dim); in test_cuda_argmax_dim() 116 …assert(cudaMemcpyAsync(tensor_arg.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.str… in test_cuda_argmax_dim() 117 assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); in test_cuda_argmax_dim() [all …]
|
D | cxx11_tensor_cuda.cu | 39 Eigen::GpuDevice gpu_device(&stream); in test_cuda_nullary() local 46 gpu_in1.device(gpu_device) = gpu_in1.constant(3.14f); in test_cuda_nullary() 47 gpu_in2.device(gpu_device) = gpu_in2.random(); in test_cuda_nullary() 53 gpu_device.stream()) == cudaSuccess); in test_cuda_nullary() 55 gpu_device.stream()) == cudaSuccess); in test_cuda_nullary() 57 assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); in test_cuda_nullary() 90 Eigen::GpuDevice gpu_device(&stream); in test_cuda_elementwise_small() local 99 gpu_out.device(gpu_device) = gpu_in1 + gpu_in2; in test_cuda_elementwise_small() 102 gpu_device.stream()) == cudaSuccess); in test_cuda_elementwise_small() 103 assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); in test_cuda_elementwise_small() [all …]
|
D | cxx11_tensor_random_cuda.cu | 34 Eigen::GpuDevice gpu_device(&stream); in test_cuda_random_uniform() local 38 gpu_out.device(gpu_device) = gpu_out.random(); in test_cuda_random_uniform() 40 …assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) … in test_cuda_random_uniform() 41 assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); in test_cuda_random_uniform() 59 Eigen::GpuDevice gpu_device(&stream); in test_cuda_random_normal() local 64 gpu_out.device(gpu_device) = gpu_out.random(gen); in test_cuda_random_normal() 66 …assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) … in test_cuda_random_normal() 67 assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); in test_cuda_random_normal()
|
D | cxx11_tensor_complex_cwise_ops_cuda.cu | 35 Eigen::GpuDevice gpu_device(&stream); in test_cuda_complex_cwise_ops() local 47 gpu_in1.device(gpu_device) = gpu_in1.constant(a); in test_cuda_complex_cwise_ops() 48 gpu_in2.device(gpu_device) = gpu_in2.constant(b); in test_cuda_complex_cwise_ops() 62 gpu_out.device(gpu_device) = gpu_in1 + gpu_in2; in test_cuda_complex_cwise_ops() 66 gpu_out.device(gpu_device) = gpu_in1 - gpu_in2; in test_cuda_complex_cwise_ops() 70 gpu_out.device(gpu_device) = gpu_in1 * gpu_in2; in test_cuda_complex_cwise_ops() 74 gpu_out.device(gpu_device) = gpu_in1 / gpu_in2; in test_cuda_complex_cwise_ops() 79 gpu_device.stream()) == cudaSuccess); in test_cuda_complex_cwise_ops() 80 assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); in test_cuda_complex_cwise_ops()
|
D | cxx11_tensor_reduction_cuda.cu | 26 Eigen::GpuDevice gpu_device(&stream); in test_full_reductions() local 39 Type* gpu_in_ptr = static_cast<Type*>(gpu_device.allocate(in_bytes)); in test_full_reductions() 40 Type* gpu_out_ptr = static_cast<Type*>(gpu_device.allocate(out_bytes)); in test_full_reductions() 41 gpu_device.memcpyHostToDevice(gpu_in_ptr, in.data(), in_bytes); in test_full_reductions() 46 out_gpu.device(gpu_device) = in_gpu.sum(); in test_full_reductions() 49 gpu_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_ptr, out_bytes); in test_full_reductions() 50 gpu_device.synchronize(); in test_full_reductions() 55 gpu_device.deallocate(gpu_in_ptr); in test_full_reductions() 56 gpu_device.deallocate(gpu_out_ptr); in test_full_reductions()
|
D | cxx11_tensor_contract_cuda.cu | 58 Eigen::GpuDevice gpu_device(&stream); in test_cuda_contraction() local 68 gpu_t_result.device(gpu_device) = gpu_t_left.contract(gpu_t_right, dims); in test_cuda_contraction() 122 Eigen::GpuDevice gpu_device(&stream); in test_scalar() local 131 gpu_t_result.device(gpu_device) = gpu_t_left.contract(gpu_t_right, dims); in test_scalar()
|
D | cxx11_tensor_scan_cuda.cu | 47 Eigen::GpuDevice gpu_device(&stream); in test_cuda_cumsum() local 54 gpu_t_result.device(gpu_device) = gpu_t_input.cumsum(1); in test_cuda_cumsum()
|
/external/tensorflow/tensorflow/core/grappler/costs/ |
D | virtual_placer_test.cc | 32 DeviceProperties gpu_device; in TEST() local 33 gpu_device.set_type("GPU"); in TEST() 34 devices["/job:localhost/replica:0/task:0/device:GPU:0"] = gpu_device; in TEST() 62 DeviceProperties gpu_device; in TEST() local 63 gpu_device.set_type("GPU"); in TEST() 64 devices["/GPU:0"] = gpu_device; in TEST() 121 DeviceProperties gpu_device; in TEST() local 122 gpu_device.set_type("GPU"); in TEST() 124 "/replica:0/task:0/device:GPU:0")] = gpu_device; in TEST() 178 DeviceProperties gpu_device; in TEST() local [all …]
|
D | analytical_cost_estimator_test.cc | 39 DeviceProperties gpu_device; in SetUp() local 40 gpu_device.set_type("GPU"); in SetUp() 41 gpu_device.set_num_cores(12); in SetUp() 42 gpu_device.set_frequency(1100); in SetUp() 43 gpu_device.set_bandwidth(180 * 1024 * 1024); in SetUp() 44 (*gpu_device.mutable_environment())["architecture"] = "6"; in SetUp() 45 devices["/job:localhost/replica:0/task:0/device:GPU:0"] = gpu_device; in SetUp()
|
/external/tensorflow/tensorflow/core/kernels/ |
D | concat_lib_gpu_impl.cu.cc | 116 const Eigen::GpuDevice& gpu_device, in ConcatGPUSlice() argument 126 To32Bit(*output).slice(offset, size).device(gpu_device) = in ConcatGPUSlice() 129 output->slice(offset, size).device(gpu_device) = *inputs_flat[i]; in ConcatGPUSlice() 137 void ConcatGPUImpl(const Eigen::GpuDevice& gpu_device, in ConcatGPUImpl() argument 143 output->dimension(0), gpu_device); in ConcatGPUImpl() 148 gpu_device.stream()>>>(input_ptrs, split_size, output->dimension(0), in ConcatGPUImpl() 151 IntType smem_max = gpu_device.sharedMemPerBlock(); in ConcatGPUImpl() 161 gpu_device.stream()>>>(input_ptrs, output_scan, in ConcatGPUImpl() 167 gpu_device.stream()>>>(input_ptrs, output_scan, in ConcatGPUImpl() 175 const Eigen::GpuDevice& gpu_device, \ [all …]
|
D | dynamic_stitch_op_gpu.cu.cc | 52 void DynamicStitchGPUImpl(const Eigen::GpuDevice& gpu_device, in DynamicStitchGPUImpl() argument 58 auto config = GetCudaLaunchConfig(output_size, gpu_device); in DynamicStitchGPUImpl() 61 <<<config.block_count, config.thread_per_block, 0, gpu_device.stream()>>>( in DynamicStitchGPUImpl() 67 const Eigen::GpuDevice& gpu_device, const int32 slice_size, \
|
D | split_lib_gpu.cu.cc | 208 void Run(const Eigen::GpuDevice& gpu_device, bool fixed_size, in Run() 214 GetCudaLaunchConfig(total_rows * total_cols, gpu_device); in Run() 217 gpu_device.stream()>>>( in Run() 220 auto config = GetCuda2DLaunchConfig(total_cols, total_rows, gpu_device); in Run() 221 IntType smem_max = gpu_device.sharedMemPerBlock(); in Run() 230 gpu_device.stream()>>>(input_ptr, output_scan, total_rows, in Run() 235 gpu_device.stream()>>>(input_ptr, output_scan, total_rows, in Run()
|
D | concat_lib_gpu.cc | 35 const Eigen::GpuDevice& gpu_device,
|
D | dynamic_stitch_op.cc | 139 void DynamicStitchGPUImpl(const Eigen::GpuDevice& gpu_device,
|
/external/tensorflow/tensorflow/core/common_runtime/gpu/ |
D | gpu_util.h | 49 static void CopyGPUTensorToCPU(Device* gpu_device, 57 static Status Sync(Device* gpu_device); 62 static Status SyncAll(Device* gpu_device); 83 static uint64 Checksum(Device* gpu_device, 93 Device* gpu_device, Tensor* gpu_tensor, 108 static void CopyGPUTensorToSameGPU(Device* gpu_device,
|
D | gpu_util.cc | 259 void GPUUtil::CopyGPUTensorToCPU(Device* gpu_device, in CopyGPUTensorToCPU() argument 266 Status s = PrepareCopy(gpu_device, device_context, *gpu_tensor, cpu_tensor, in CopyGPUTensorToCPU() 306 Device* gpu_device, Tensor* gpu_tensor, in CopyCPUTensorToGPU() argument 311 Status s = PrepareCopy(gpu_device, device_context, *cpu_tensor, gpu_tensor, in CopyCPUTensorToGPU() 349 Status GPUUtil::Sync(Device* gpu_device) { in Sync() argument 351 auto* dev_info = gpu_device->tensorflow_gpu_device_info(); in Sync() 358 Status GPUUtil::SyncAll(Device* gpu_device) { in SyncAll() argument 360 auto* dev_info = gpu_device->tensorflow_gpu_device_info(); in SyncAll() 397 uint64 GPUUtil::Checksum(Device* gpu_device, in Checksum() argument 403 CopyGPUTensorToCPU(gpu_device, device_context, &tensor, ©, in Checksum() [all …]
|
D | gpu_device.cc | 1025 BaseGPUDevice* gpu_device = CreateGPUDevice( in CreateGPUDevice() local 1031 TF_RETURN_IF_ERROR(gpu_device->Init(options)); in CreateGPUDevice() 1032 devices->push_back(gpu_device); in CreateGPUDevice()
|
/external/tensorflow/tensorflow/core/grappler/optimizers/ |
D | memory_optimizer_test.cc | 206 DeviceProperties gpu_device; in CreateVirtualCluster() local 207 gpu_device.set_type("GPU"); in CreateVirtualCluster() 208 gpu_device.set_frequency(1000); in CreateVirtualCluster() 209 gpu_device.set_num_cores(24); in CreateVirtualCluster() 210 gpu_device.set_bandwidth(128); in CreateVirtualCluster() 211 gpu_device.set_memory_size(1024 * 1024); in CreateVirtualCluster() 212 gpu_device.mutable_environment()->insert({"architecture", "6"}); in CreateVirtualCluster() 215 devices["/job:localhost/replica:0/task:0/gpu:0"] = gpu_device; in CreateVirtualCluster()
|
/external/tensorflow/tensorflow/contrib/gdr/ |
D | README.md | 98 2017-08-05 19:10:38.601718: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1045] Creating Tenso… 99 2017-08-05 19:10:38.601728: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1045] Creating Tenso… 100 2017-08-05 19:10:38.601736: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1045] Creating Tenso… 101 2017-08-05 19:10:38.601742: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1045] Creating Tenso…
|
/external/tensorflow/tensorflow/c/ |
D | c_api_test.cc | 1025 const string gpu_device = GPUDeviceName(); in TEST() local 1027 if (gpu_device.empty()) return; in TEST() 1029 RunMinTest(gpu_device, /*use_XLA=*/false); in TEST() 1033 const string gpu_device = GPUDeviceName(); in TEST() local 1035 if (gpu_device.empty()) return; in TEST() 1037 RunMinTest(gpu_device, /*use_XLA=*/true); in TEST()
|
/external/tensorflow/tensorflow/core/graph/ |
D | graph_partition_test.cc | 60 const char gpu_device[] = "/job:a/replica:0/task:0/device:GPU:0"; variable 67 return gpu_device; in DeviceName()
|
/external/tensorflow/tensorflow/contrib/verbs/ |
D | rdma.cc | 965 static void StreamGPUOp(Device* gpu_device, const DeviceContext* device_context, in StreamGPUOp() argument 968 GPUUtil::CopyGPUTensorToCPU(gpu_device, device_context, &dummy1, &dummy2, in StreamGPUOp()
|