/external/pytorch/test/inductor/ |
D | s429861_repro.py | 10 arg0_1: "f32[][]cuda:0", 11 arg1_1: "f32[50][1]cuda:0", 12 arg2_1: "f32[23][1]cuda:0", 13 arg3_1: "f32[38][1]cuda:0", 14 arg4_1: "f32[5][1]cuda:0", 15 arg5_1: "f32[100][1]cuda:0", 16 arg6_1: "f32[50][1]cuda:0", 17 arg7_1: "f32[77][1]cuda:0", 18 arg8_1: "f32[100][1]cuda:0", 19 arg9_1: "f32[100][1]cuda:0", [all …]
|
D | test_torchinductor_codegen_dynamic_shapes.py | 95 ("cpu", "cuda", "xpu"), is_skip=True 98 ("cpu", "cuda", "xpu"), is_skip=True 100 "test_to_device_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu"), is_skip=True), 136 "test_complex_fallback_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 137 "test_adaptive_avg_pool2d2_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 138 "test_adaptive_max_pool2d2_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 139 "test_fractional_max_pool2d2_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 140 "test_argmax_to_float_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 141 "test_avg_pool2d7_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), 142 "test_avg_pool2d_backward4_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")), [all …]
|
D | test_torchinductor_opinfo.py | 185 inductor_skips["cuda"] = { 200 inductor_skips["cuda"]["bfloat16"] = {b8, f16, f32, f64, i32, i64} 204 inductor_skips["cuda"]["logcumsumexp"] = {f32} 205 inductor_skips["cuda"]["special.modified_bessel_i1"] = {f64} 212 }, # half_to_float is only valid for the CUDA implementation 239 inductor_expected_failures_single_sample["cuda"] = { 277 inductor_expected_failures_single_sample["cuda"].update(intentionally_not_handled) 282 inductor_gradient_expected_failures_single_sample["cuda"] = {} 289 inductor_should_fail_with_exception["cuda"] = {} 335 ("cross", "cuda", f16): {"reference_in_float": True}, [all …]
|
D | test_foreach.py | 75 torch.rand(10, 10, device="cuda:0"), 76 torch.rand(20, 20, device="cuda:0"), 80 torch.rand(10, 10, device="cuda:0"), 81 torch.rand(20, 20, device="cuda:0"), 82 torch.rand(10, 10, device="cuda:0"), 83 torch.rand(20, 20, device="cuda:0"), 124 torch.rand(10, 10, device="cuda:0"), 125 torch.rand(20, 20, device="cuda:0"), 131 return op([a0, a1], torch.tensor(3.3, device="cuda:0")) 136 torch.rand(10, 10, device="cuda:0"), [all …]
|
D | test_combo_kernels.py | 55 torch.rand(10, 10, device="cuda"), 56 torch.rand(20, 20, device="cuda"), 57 torch.rand(10, 10, device="cuda"), 77 torch.rand(10, 10, device="cuda"), 78 torch.rand(20, 20, device="cuda"), 79 torch.rand(10, 10, device="cuda"), 80 torch.rand(30, 8, device="cuda"), 100 torch.rand(10, 10, device="cuda"), 101 torch.rand(20, 20, device="cuda"), 102 torch.rand(10, 10, device="cuda"), [all …]
|
/external/pytorch/docs/cpp/source/notes/ |
D | tensor_cuda_stream.rst | 1 Tensor CUDA Stream API 4 A `CUDA Stream`_ is a linear sequence of execution that belongs to a specific CUDA device. 5 The PyTorch C++ API supports CUDA streams with the CUDAStream class and useful helper functions to … 6 …hem in `CUDAStream.h`_. This note provides more details on how to use Pytorch C++ CUDA Stream APIs. 8 .. _CUDA Stream: https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#streams 9 .. _CUDAStream.h: https://pytorch.org/cppdocs/api/file_c10_cuda_CUDAStream.h.html#file-c10-cuda-cud… 12 Acquiring CUDA stream 15 Pytorch's C++ API provides the following ways to acquire CUDA stream: 17 1. Acquire a new stream from the CUDA stream pool, streams are preallocated from the pool and retur… 26 by setting device index (defaulting to the current CUDA stream's device index). [all …]
|
/external/pytorch/aten/src/ATen/test/ |
D | cuda_stream_test.cpp | 3 #include <ATen/cuda/CUDAContext.h> 4 #include <ATen/cuda/CUDAEvent.h> 7 #include <c10/cuda/CUDAGuard.h> 8 #include <c10/cuda/impl/CUDAGuardImpl.h> 35 if (!at::cuda::is_available()) return; in TEST() 40 at::cuda::CUDAStream copyStream = at::cuda::getStreamFromPool(); in TEST() 42 auto s = at::cuda::getStreamFromPool(); in TEST() 56 at::cuda::CUDAStream moveStream = at::cuda::getStreamFromPool(); in TEST() 58 auto s = at::cuda::getStreamFromPool(); in TEST() 74 if (!at::cuda::is_available()) return; in TEST() [all …]
|
/external/pytorch/benchmarks/dynamo/ |
D | expected_ci_perf_inductor_torchbench.csv | 2 cuda,BERT_pytorch,16,2.6028,22.2879,41.0046,1.1965 3 cuda,Background_Matting,4,1.1296,112.7632,27.8916,1.0396 4 cuda,LearningToPaint,96,1.0951,11.3205,13.0241,0.9960 5 cuda,Super_SloMo,6,1.2160,65.3294,27.1633,1.2396 6 cuda,alexnet,128,1.1919,8.2399,6.5561,1.0008 7 cuda,attention_is_all_you_need_pytorch,256,1.4975,36.6682,43.0610,1.1824 8 cuda,dcgan,32,0.9276,2.2476,5.7151,1.0064 9 cuda,demucs,4,1.0313,51.7716,12.8195,0.9971 10 cuda,densenet121,4,1.1976,46.0111,64.0118,0.9945 11 cuda,dlrm,1024,1.3421,3.2177,4.9493,1.0009 [all …]
|
/external/pytorch/test/jit/ |
D | test_cuda.py | 27 print("CUDA not available, skipping tests", file=sys.stderr) 32 # If GPU is available, then initialize the cuda context and check 35 torch.ones(1).cuda() # initialize cuda context 36 TEST_LARGE_TENSOR = torch.cuda.get_device_properties(0).total_memory >= 5e9 48 A suite of tests for the CUDA API in TorchScript. 53 torch.cuda.empty_cache() 63 prev_current_device_index = torch.cuda.current_device() 64 torch.cuda.synchronize() 65 torch.cuda.synchronize("cuda") 66 torch.cuda.synchronize("cuda:0") [all …]
|
/external/pytorch/test/ |
D | test_cuda_multigpu.py | 1 # Owner(s): ["module: cuda"] 17 import torch.cuda.comm as comm 41 torch.cuda.get_allocator_backend() == "cudaMallocAsync" 45 print("CUDA not available, skipping tests", file=sys.stderr) 53 snapshot = torch.cuda.memory_snapshot() 107 stats = torch.cuda.memory_stats(device) 112 torch.cuda.synchronize() 113 torch.cuda.synchronize("cuda") 114 torch.cuda.synchronize("cuda:0") 115 torch.cuda.synchronize(0) [all …]
|
D | test_cuda.py | 1 # Owner(s): ["module: cuda"] 23 import torch.cuda 25 from torch.cuda._memory_viz import ( 96 torch.cuda.get_allocator_backend() == "cudaMallocAsync" 101 TEST_PYNVML = not torch.cuda._HAS_PYNVML 103 TEST_LARGE_TENSOR = torch.cuda.get_device_properties(0).total_memory >= 12e9 104 TEST_MEDIUM_TENSOR = torch.cuda.get_device_properties(0).total_memory >= 6e9 105 TEST_BF16 = torch.cuda.is_bf16_supported() 110 @unittest.skipIf(not TEST_CUDA, "CUDA not available, skipping tests") 128 torch.cuda.memory._set_allocator_settings( [all …]
|
D | test_numba_integration.py | 19 import numba.cuda 24 @unittest.skipIf(not TEST_CUDA, "No cuda") 26 """torch.Tensor exposes __cuda_array_interface__ for cuda tensors. 28 An object t is considered a cuda-tensor if: 31 A cuda-tensor provides a tensor description dict: 39 https://numba.pydata.org/numba-doc/latest/cuda/cuda_array_interface.html 69 # Sparse CPU/CUDA tensors do not implement the interface 79 sparse_cuda_t = torch.sparse_coo_tensor(indices_t, cput).cuda() 86 # CUDA tensors have the attribute and v2 interface 87 cudat = tp(10).cuda() [all …]
|
/external/clang/test/Driver/ |
D | cuda-detect.cu | 5 // # Check that we properly detect CUDA installation. 7 // RUN: --sysroot=%S/no-cuda-there 2>&1 | FileCheck %s -check-prefix NOCUDA 9 // RUN: --sysroot=%S/Inputs/CUDA 2>&1 | FileCheck %s 11 // RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 | FileCheck %s 14 // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_21 \ 15 // RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ 18 // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \ 19 // RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ 22 // Verify that -nocudainc prevents adding include path to CUDA headers. 23 // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \ [all …]
|
/external/tensorflow/tensorflow/compiler/xla/stream_executor/gpu/ |
D | gpu_driver.h | 16 // CUDA userspace driver library wrapper functionality. 50 // The order of parameters is generally kept symmetric with the underlying CUDA 54 // http://docs.nvidia.com/cuda/cuda-driver-api/ 62 …// http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__INITIALIZE.html#group__CUDA__INITIALIZ… 67 …// http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__CTX.html#group__CUDA__CTX_1g4e84b109eb… 70 // Creates a new CUDA stream associated with the given context via 73 …// http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__STREAM.html#group__CUDA__STREAM_1ga581… 77 // Destroys a CUDA stream associated with the given context. 80 …// http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__STREAM.html#group__CUDA__STREAM_1g244c… 83 // CUDA events can explicitly disable event TSC retrieval for some presumed [all …]
|
/external/pytorch/cmake/Modules/ |
D | FindCUDAToolkit.cmake | 13 This script locates the NVIDIA CUDA toolkit and the associated libraries, but 14 does not require the ``CUDA`` language be enabled for a given project. This 15 module does not search for the NVIDIA CUDA Samples. 23 The CUDA Toolkit search behavior uses the following order: 25 1. If the ``CUDA`` language has been enabled we will use the directory 44 the desired path in the event that multiple CUDA Toolkits are installed. 46 5. On Unix systems, if the symbolic link ``/usr/local/cuda`` exists, this is 51 candidate is found, this is used. The default CUDA Toolkit install locations 57 | macOS | ``/Developer/NVIDIA/CUDA-X.Y`` | 59 | Other Unix | ``/usr/local/cuda-X.Y`` | [all …]
|
/external/tensorflow/tensorflow/tools/dockerfiles/partials/ubuntu/ |
D | devel-nvidia.partial.Dockerfile | 2 ARG CUDA=11.2 3 FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}.1-base-ubuntu${UBUNTU_VERSION} as base 4 # ARCH and CUDA are specified again because the FROM directive resets ARGs 7 ARG CUDA 17 RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x8… 21 cuda-command-line-tools-${CUDA/./-} \ 22 libcublas-${CUDA/./-} \ 23 libcublas-dev-${CUDA/./-} \ 24 cuda-nvprune-${CUDA/./-} \ 25 cuda-nvrtc-${CUDA/./-} \ [all …]
|
D | nvidia.partial.Dockerfile | 2 ARG CUDA=11.2 3 FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}.1-base-ubuntu${UBUNTU_VERSION} as base 4 # ARCH and CUDA are specified again because the FROM directive resets ARGs 7 ARG CUDA 20 RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x8… 23 cuda-command-line-tools-${CUDA/./-} \ 24 libcublas-${CUDA/./-} \ 25 cuda-nvrtc-${CUDA/./-} \ 26 libcufft-${CUDA/./-} \ 27 libcurand-${CUDA/./-} \ [all …]
|
/external/pytorch/torch/csrc/jit/runtime/ |
D | register_cuda_ops.cpp | 1 // This file registers special JIT operators used to implement the PyTorch CUDA 4 #include <torch/csrc/jit/cuda/cuda.h> 21 auto current_device_index = c10::cuda::current_device(); in _device_synchronize() 26 c10::cuda::set_device(device_index); in _device_synchronize() 28 c10::cuda::device_synchronize(); in _device_synchronize() 32 c10::cuda::set_device(current_device_index); in _device_synchronize() 38 "cuda::current_stream.device(Device? device) -> __torch__.torch.classes.cuda.Stream", 43 : c10::cuda::current_device(); in __anon3d06fc340202() 44 auto s = c10::cuda::getCurrentCUDAStream(device_index); in __anon3d06fc340202() 50 "cuda::current_stream.int(int? val) -> __torch__.torch.classes.cuda.Stream", [all …]
|
/external/pytorch/docs/source/notes/ |
D | cuda.rst | 2 :description: A guide to torch.cuda, a PyTorch module to run CUDA operations 3 :keywords: memory management, PYTORCH_CUDA_ALLOC_CONF, optimize PyTorch, CUDA 7 CUDA semantics 11 :mod:`torch.cuda` is used to set up and run CUDA operations. It keeps track of 12 the currently selected GPU, and all CUDA tensors you allocate will by default be 14 :any:`torch.cuda.device` context manager. 22 such as :meth:`~torch.Tensor.to` and :meth:`~torch.Tensor.cuda`. 28 cuda = torch.device('cuda') # Default CUDA device 29 cuda0 = torch.device('cuda:0') 30 cuda2 = torch.device('cuda:2') # GPU 2 (these are 0-indexed) [all …]
|
D | hip.rst | 8 to ease conversion of CUDA applications to portable C++ code. HIP is used when 9 converting existing CUDA applications like PyTorch to portable C++ and for new 14 HIP Interfaces Reuse the CUDA Interfaces 17 PyTorch for HIP intentionally reuses the existing :mod:`torch.cuda` interfaces. 21 The example from :ref:`cuda-semantics` will work exactly the same for HIP:: 23 cuda = torch.device('cuda') # Default HIP device 24 cuda0 = torch.device('cuda:0') # 'rocm' or 'hip' are not valid, use 'cuda' 25 cuda2 = torch.device('cuda:2') # GPU 2 (these are 0-indexed) 28 # x.device is device(type='cuda', index=0) 29 y = torch.tensor([1., 2.]).cuda() [all …]
|
/external/tensorflow/third_party/gpus/cuda/ |
D | BUILD.tpl | 9 # Config setting whether TensorFlow is built with CUDA support using clang. 21 # Config setting whether TensorFlow is built with CUDA support using nvcc. 48 # Provides CUDA headers for '#include "third_party/gpus/cuda/include/cuda.h"' 53 "cuda/cuda_config.h", 54 ":cuda-include", 58 ".", # required to include cuda/cuda/cuda_config.h as cuda/config.h 59 "cuda/include", 65 srcs = ["cuda/lib/%{cudart_static_lib}"], 75 srcs = ["cuda/lib/%{cuda_driver_lib}"], 80 srcs = ["cuda/lib/%{cudart_lib}"], [all …]
|
/external/pytorch/test/distributed/_shard/sharding_spec/ |
D | test_sharding_spec.py | 45 @skip_but_pass_in_sandcastle_if(not TEST_MULTIGPU, "2 CUDA GPUs are needed") 48 DevicePlacementSpec("cuda:0") 50 DevicePlacementSpec(torch.device("cuda:0")) 51 DevicePlacementSpec("rank:0/cuda:0") 57 DevicePlacementSpec("cuda:foo") 61 DevicePlacementSpec("rank:0/cuda:foo") 65 @skip_but_pass_in_sandcastle_if(not TEST_MULTIGPU, "2 CUDA GPUs are needed") 69 ChunkShardingSpec(0, [torch.device("cuda:0"), torch.device("cuda:1")]) 70 ChunkShardingSpec(-1, ["cuda:0", "cuda:1"]) 71 ChunkShardingSpec(0, ["rank:0/cuda:0", "rank:0/cuda:1"]) [all …]
|
/external/tensorflow/tensorflow/tools/dockerfiles/dockerfiles/ |
D | devel-gpu.Dockerfile | 25 ARG CUDA=11.2 26 FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}.1-base-ubuntu${UBUNTU_VERSION} as base 27 # ARCH and CUDA are specified again because the FROM directive resets ARGs 30 ARG CUDA 40 RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x8… 44 cuda-command-line-tools-${CUDA/./-} \ 45 libcublas-${CUDA/./-} \ 46 libcublas-dev-${CUDA/./-} \ 47 cuda-nvprune-${CUDA/./-} \ 48 cuda-nvrtc-${CUDA/./-} \ [all …]
|
/external/tensorflow/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/ |
D | devel-gpu-ppc64le.Dockerfile | 25 ARG CUDA=11.2 26 FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}.1-base-ubuntu${UBUNTU_VERSION} as base 27 # ARCH and CUDA are specified again because the FROM directive resets ARGs 30 ARG CUDA 40 RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x8… 44 cuda-command-line-tools-${CUDA/./-} \ 45 libcublas-${CUDA/./-} \ 46 libcublas-dev-${CUDA/./-} \ 47 cuda-nvprune-${CUDA/./-} \ 48 cuda-nvrtc-${CUDA/./-} \ [all …]
|
/external/pytorch/torch/testing/_internal/ |
D | common_cuda.py | 3 r"""This file is allowed to initialize CUDA context when imported.""" 7 import torch.cuda 14 CUDA_ALREADY_INITIALIZED_ON_IMPORT = torch.cuda.is_initialized() 17 TEST_MULTIGPU = TEST_CUDA and torch.cuda.device_count() >= 2 18 CUDA_DEVICE = torch.device("cuda:0") if TEST_CUDA else None 27 SM53OrLater = LazyVal(lambda: torch.cuda.is_available() and torch.cuda.get_device_capability() >= (… 28 SM60OrLater = LazyVal(lambda: torch.cuda.is_available() and torch.cuda.get_device_capability() >= (… 29 SM70OrLater = LazyVal(lambda: torch.cuda.is_available() and torch.cuda.get_device_capability() >= (… 30 SM75OrLater = LazyVal(lambda: torch.cuda.is_available() and torch.cuda.get_device_capability() >= (… 31 SM80OrLater = LazyVal(lambda: torch.cuda.is_available() and torch.cuda.get_device_capability() >= (… [all …]
|