Home
last modified time | relevance | path

Searched full:cuda (Results 1 – 25 of 3367) sorted by relevance

12345678910>>...135

/external/pytorch/test/inductor/
Ds429861_repro.py10 arg0_1: "f32[][]cuda:0",
11 arg1_1: "f32[50][1]cuda:0",
12 arg2_1: "f32[23][1]cuda:0",
13 arg3_1: "f32[38][1]cuda:0",
14 arg4_1: "f32[5][1]cuda:0",
15 arg5_1: "f32[100][1]cuda:0",
16 arg6_1: "f32[50][1]cuda:0",
17 arg7_1: "f32[77][1]cuda:0",
18 arg8_1: "f32[100][1]cuda:0",
19 arg9_1: "f32[100][1]cuda:0",
[all …]
Dtest_torchinductor_codegen_dynamic_shapes.py95 ("cpu", "cuda", "xpu"), is_skip=True
98 ("cpu", "cuda", "xpu"), is_skip=True
100 "test_to_device_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu"), is_skip=True),
136 "test_complex_fallback_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
137 "test_adaptive_avg_pool2d2_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
138 "test_adaptive_max_pool2d2_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
139 "test_fractional_max_pool2d2_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
140 "test_argmax_to_float_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
141 "test_avg_pool2d7_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
142 "test_avg_pool2d_backward4_dynamic_shapes": TestFailure(("cpu", "cuda", "xpu")),
[all …]
Dtest_torchinductor_opinfo.py185 inductor_skips["cuda"] = {
200 inductor_skips["cuda"]["bfloat16"] = {b8, f16, f32, f64, i32, i64}
204 inductor_skips["cuda"]["logcumsumexp"] = {f32}
205 inductor_skips["cuda"]["special.modified_bessel_i1"] = {f64}
212 }, # half_to_float is only valid for the CUDA implementation
239 inductor_expected_failures_single_sample["cuda"] = {
277 inductor_expected_failures_single_sample["cuda"].update(intentionally_not_handled)
282 inductor_gradient_expected_failures_single_sample["cuda"] = {}
289 inductor_should_fail_with_exception["cuda"] = {}
335 ("cross", "cuda", f16): {"reference_in_float": True},
[all …]
Dtest_foreach.py75 torch.rand(10, 10, device="cuda:0"),
76 torch.rand(20, 20, device="cuda:0"),
80 torch.rand(10, 10, device="cuda:0"),
81 torch.rand(20, 20, device="cuda:0"),
82 torch.rand(10, 10, device="cuda:0"),
83 torch.rand(20, 20, device="cuda:0"),
124 torch.rand(10, 10, device="cuda:0"),
125 torch.rand(20, 20, device="cuda:0"),
131 return op([a0, a1], torch.tensor(3.3, device="cuda:0"))
136 torch.rand(10, 10, device="cuda:0"),
[all …]
Dtest_combo_kernels.py55 torch.rand(10, 10, device="cuda"),
56 torch.rand(20, 20, device="cuda"),
57 torch.rand(10, 10, device="cuda"),
77 torch.rand(10, 10, device="cuda"),
78 torch.rand(20, 20, device="cuda"),
79 torch.rand(10, 10, device="cuda"),
80 torch.rand(30, 8, device="cuda"),
100 torch.rand(10, 10, device="cuda"),
101 torch.rand(20, 20, device="cuda"),
102 torch.rand(10, 10, device="cuda"),
[all …]
/external/pytorch/docs/cpp/source/notes/
Dtensor_cuda_stream.rst1 Tensor CUDA Stream API
4 A `CUDA Stream`_ is a linear sequence of execution that belongs to a specific CUDA device.
5 The PyTorch C++ API supports CUDA streams with the CUDAStream class and useful helper functions to …
6 …hem in `CUDAStream.h`_. This note provides more details on how to use Pytorch C++ CUDA Stream APIs.
8 .. _CUDA Stream: https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#streams
9 .. _CUDAStream.h: https://pytorch.org/cppdocs/api/file_c10_cuda_CUDAStream.h.html#file-c10-cuda-cud…
12 Acquiring CUDA stream
15 Pytorch's C++ API provides the following ways to acquire CUDA stream:
17 1. Acquire a new stream from the CUDA stream pool, streams are preallocated from the pool and retur…
26 by setting device index (defaulting to the current CUDA stream's device index).
[all …]
/external/pytorch/aten/src/ATen/test/
Dcuda_stream_test.cpp3 #include <ATen/cuda/CUDAContext.h>
4 #include <ATen/cuda/CUDAEvent.h>
7 #include <c10/cuda/CUDAGuard.h>
8 #include <c10/cuda/impl/CUDAGuardImpl.h>
35 if (!at::cuda::is_available()) return; in TEST()
40 at::cuda::CUDAStream copyStream = at::cuda::getStreamFromPool(); in TEST()
42 auto s = at::cuda::getStreamFromPool(); in TEST()
56 at::cuda::CUDAStream moveStream = at::cuda::getStreamFromPool(); in TEST()
58 auto s = at::cuda::getStreamFromPool(); in TEST()
74 if (!at::cuda::is_available()) return; in TEST()
[all …]
/external/pytorch/benchmarks/dynamo/
Dexpected_ci_perf_inductor_torchbench.csv2 cuda,BERT_pytorch,16,2.6028,22.2879,41.0046,1.1965
3 cuda,Background_Matting,4,1.1296,112.7632,27.8916,1.0396
4 cuda,LearningToPaint,96,1.0951,11.3205,13.0241,0.9960
5 cuda,Super_SloMo,6,1.2160,65.3294,27.1633,1.2396
6 cuda,alexnet,128,1.1919,8.2399,6.5561,1.0008
7 cuda,attention_is_all_you_need_pytorch,256,1.4975,36.6682,43.0610,1.1824
8 cuda,dcgan,32,0.9276,2.2476,5.7151,1.0064
9 cuda,demucs,4,1.0313,51.7716,12.8195,0.9971
10 cuda,densenet121,4,1.1976,46.0111,64.0118,0.9945
11 cuda,dlrm,1024,1.3421,3.2177,4.9493,1.0009
[all …]
/external/pytorch/test/jit/
Dtest_cuda.py27 print("CUDA not available, skipping tests", file=sys.stderr)
32 # If GPU is available, then initialize the cuda context and check
35 torch.ones(1).cuda() # initialize cuda context
36 TEST_LARGE_TENSOR = torch.cuda.get_device_properties(0).total_memory >= 5e9
48 A suite of tests for the CUDA API in TorchScript.
53 torch.cuda.empty_cache()
63 prev_current_device_index = torch.cuda.current_device()
64 torch.cuda.synchronize()
65 torch.cuda.synchronize("cuda")
66 torch.cuda.synchronize("cuda:0")
[all …]
/external/pytorch/test/
Dtest_cuda_multigpu.py1 # Owner(s): ["module: cuda"]
17 import torch.cuda.comm as comm
41 torch.cuda.get_allocator_backend() == "cudaMallocAsync"
45 print("CUDA not available, skipping tests", file=sys.stderr)
53 snapshot = torch.cuda.memory_snapshot()
107 stats = torch.cuda.memory_stats(device)
112 torch.cuda.synchronize()
113 torch.cuda.synchronize("cuda")
114 torch.cuda.synchronize("cuda:0")
115 torch.cuda.synchronize(0)
[all …]
Dtest_cuda.py1 # Owner(s): ["module: cuda"]
23 import torch.cuda
25 from torch.cuda._memory_viz import (
96 torch.cuda.get_allocator_backend() == "cudaMallocAsync"
101 TEST_PYNVML = not torch.cuda._HAS_PYNVML
103 TEST_LARGE_TENSOR = torch.cuda.get_device_properties(0).total_memory >= 12e9
104 TEST_MEDIUM_TENSOR = torch.cuda.get_device_properties(0).total_memory >= 6e9
105 TEST_BF16 = torch.cuda.is_bf16_supported()
110 @unittest.skipIf(not TEST_CUDA, "CUDA not available, skipping tests")
128 torch.cuda.memory._set_allocator_settings(
[all …]
Dtest_numba_integration.py19 import numba.cuda
24 @unittest.skipIf(not TEST_CUDA, "No cuda")
26 """torch.Tensor exposes __cuda_array_interface__ for cuda tensors.
28 An object t is considered a cuda-tensor if:
31 A cuda-tensor provides a tensor description dict:
39 https://numba.pydata.org/numba-doc/latest/cuda/cuda_array_interface.html
69 # Sparse CPU/CUDA tensors do not implement the interface
79 sparse_cuda_t = torch.sparse_coo_tensor(indices_t, cput).cuda()
86 # CUDA tensors have the attribute and v2 interface
87 cudat = tp(10).cuda()
[all …]
/external/clang/test/Driver/
Dcuda-detect.cu5 // # Check that we properly detect CUDA installation.
7 // RUN: --sysroot=%S/no-cuda-there 2>&1 | FileCheck %s -check-prefix NOCUDA
9 // RUN: --sysroot=%S/Inputs/CUDA 2>&1 | FileCheck %s
11 // RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 | FileCheck %s
14 // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_21 \
15 // RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
18 // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
19 // RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
22 // Verify that -nocudainc prevents adding include path to CUDA headers.
23 // RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
[all …]
/external/tensorflow/tensorflow/compiler/xla/stream_executor/gpu/
Dgpu_driver.h16 // CUDA userspace driver library wrapper functionality.
50 // The order of parameters is generally kept symmetric with the underlying CUDA
54 // http://docs.nvidia.com/cuda/cuda-driver-api/
62 …// http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__INITIALIZE.html#group__CUDA__INITIALIZ…
67 …// http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__CTX.html#group__CUDA__CTX_1g4e84b109eb…
70 // Creates a new CUDA stream associated with the given context via
73 …// http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__STREAM.html#group__CUDA__STREAM_1ga581…
77 // Destroys a CUDA stream associated with the given context.
80 …// http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__STREAM.html#group__CUDA__STREAM_1g244c…
83 // CUDA events can explicitly disable event TSC retrieval for some presumed
[all …]
/external/pytorch/cmake/Modules/
DFindCUDAToolkit.cmake13 This script locates the NVIDIA CUDA toolkit and the associated libraries, but
14 does not require the ``CUDA`` language be enabled for a given project. This
15 module does not search for the NVIDIA CUDA Samples.
23 The CUDA Toolkit search behavior uses the following order:
25 1. If the ``CUDA`` language has been enabled we will use the directory
44 the desired path in the event that multiple CUDA Toolkits are installed.
46 5. On Unix systems, if the symbolic link ``/usr/local/cuda`` exists, this is
51 candidate is found, this is used. The default CUDA Toolkit install locations
57 | macOS | ``/Developer/NVIDIA/CUDA-X.Y`` |
59 | Other Unix | ``/usr/local/cuda-X.Y`` |
[all …]
/external/tensorflow/tensorflow/tools/dockerfiles/partials/ubuntu/
Ddevel-nvidia.partial.Dockerfile2 ARG CUDA=11.2
3 FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}.1-base-ubuntu${UBUNTU_VERSION} as base
4 # ARCH and CUDA are specified again because the FROM directive resets ARGs
7 ARG CUDA
17 RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x8…
21 cuda-command-line-tools-${CUDA/./-} \
22 libcublas-${CUDA/./-} \
23 libcublas-dev-${CUDA/./-} \
24 cuda-nvprune-${CUDA/./-} \
25 cuda-nvrtc-${CUDA/./-} \
[all …]
Dnvidia.partial.Dockerfile2 ARG CUDA=11.2
3 FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}.1-base-ubuntu${UBUNTU_VERSION} as base
4 # ARCH and CUDA are specified again because the FROM directive resets ARGs
7 ARG CUDA
20 RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x8…
23 cuda-command-line-tools-${CUDA/./-} \
24 libcublas-${CUDA/./-} \
25 cuda-nvrtc-${CUDA/./-} \
26 libcufft-${CUDA/./-} \
27 libcurand-${CUDA/./-} \
[all …]
/external/pytorch/torch/csrc/jit/runtime/
Dregister_cuda_ops.cpp1 // This file registers special JIT operators used to implement the PyTorch CUDA
4 #include <torch/csrc/jit/cuda/cuda.h>
21 auto current_device_index = c10::cuda::current_device(); in _device_synchronize()
26 c10::cuda::set_device(device_index); in _device_synchronize()
28 c10::cuda::device_synchronize(); in _device_synchronize()
32 c10::cuda::set_device(current_device_index); in _device_synchronize()
38 "cuda::current_stream.device(Device? device) -> __torch__.torch.classes.cuda.Stream",
43 : c10::cuda::current_device(); in __anon3d06fc340202()
44 auto s = c10::cuda::getCurrentCUDAStream(device_index); in __anon3d06fc340202()
50 "cuda::current_stream.int(int? val) -> __torch__.torch.classes.cuda.Stream",
[all …]
/external/pytorch/docs/source/notes/
Dcuda.rst2 :description: A guide to torch.cuda, a PyTorch module to run CUDA operations
3 :keywords: memory management, PYTORCH_CUDA_ALLOC_CONF, optimize PyTorch, CUDA
7 CUDA semantics
11 :mod:`torch.cuda` is used to set up and run CUDA operations. It keeps track of
12 the currently selected GPU, and all CUDA tensors you allocate will by default be
14 :any:`torch.cuda.device` context manager.
22 such as :meth:`~torch.Tensor.to` and :meth:`~torch.Tensor.cuda`.
28 cuda = torch.device('cuda') # Default CUDA device
29 cuda0 = torch.device('cuda:0')
30 cuda2 = torch.device('cuda:2') # GPU 2 (these are 0-indexed)
[all …]
Dhip.rst8 to ease conversion of CUDA applications to portable C++ code. HIP is used when
9 converting existing CUDA applications like PyTorch to portable C++ and for new
14 HIP Interfaces Reuse the CUDA Interfaces
17 PyTorch for HIP intentionally reuses the existing :mod:`torch.cuda` interfaces.
21 The example from :ref:`cuda-semantics` will work exactly the same for HIP::
23 cuda = torch.device('cuda') # Default HIP device
24 cuda0 = torch.device('cuda:0') # 'rocm' or 'hip' are not valid, use 'cuda'
25 cuda2 = torch.device('cuda:2') # GPU 2 (these are 0-indexed)
28 # x.device is device(type='cuda', index=0)
29 y = torch.tensor([1., 2.]).cuda()
[all …]
/external/tensorflow/third_party/gpus/cuda/
DBUILD.tpl9 # Config setting whether TensorFlow is built with CUDA support using clang.
21 # Config setting whether TensorFlow is built with CUDA support using nvcc.
48 # Provides CUDA headers for '#include "third_party/gpus/cuda/include/cuda.h"'
53 "cuda/cuda_config.h",
54 ":cuda-include",
58 ".", # required to include cuda/cuda/cuda_config.h as cuda/config.h
59 "cuda/include",
65 srcs = ["cuda/lib/%{cudart_static_lib}"],
75 srcs = ["cuda/lib/%{cuda_driver_lib}"],
80 srcs = ["cuda/lib/%{cudart_lib}"],
[all …]
/external/pytorch/test/distributed/_shard/sharding_spec/
Dtest_sharding_spec.py45 @skip_but_pass_in_sandcastle_if(not TEST_MULTIGPU, "2 CUDA GPUs are needed")
48 DevicePlacementSpec("cuda:0")
50 DevicePlacementSpec(torch.device("cuda:0"))
51 DevicePlacementSpec("rank:0/cuda:0")
57 DevicePlacementSpec("cuda:foo")
61 DevicePlacementSpec("rank:0/cuda:foo")
65 @skip_but_pass_in_sandcastle_if(not TEST_MULTIGPU, "2 CUDA GPUs are needed")
69 ChunkShardingSpec(0, [torch.device("cuda:0"), torch.device("cuda:1")])
70 ChunkShardingSpec(-1, ["cuda:0", "cuda:1"])
71 ChunkShardingSpec(0, ["rank:0/cuda:0", "rank:0/cuda:1"])
[all …]
/external/tensorflow/tensorflow/tools/dockerfiles/dockerfiles/
Ddevel-gpu.Dockerfile25 ARG CUDA=11.2
26 FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}.1-base-ubuntu${UBUNTU_VERSION} as base
27 # ARCH and CUDA are specified again because the FROM directive resets ARGs
30 ARG CUDA
40 RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x8…
44 cuda-command-line-tools-${CUDA/./-} \
45 libcublas-${CUDA/./-} \
46 libcublas-dev-${CUDA/./-} \
47 cuda-nvprune-${CUDA/./-} \
48 cuda-nvrtc-${CUDA/./-} \
[all …]
/external/tensorflow/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/
Ddevel-gpu-ppc64le.Dockerfile25 ARG CUDA=11.2
26 FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}.1-base-ubuntu${UBUNTU_VERSION} as base
27 # ARCH and CUDA are specified again because the FROM directive resets ARGs
30 ARG CUDA
40 RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x8…
44 cuda-command-line-tools-${CUDA/./-} \
45 libcublas-${CUDA/./-} \
46 libcublas-dev-${CUDA/./-} \
47 cuda-nvprune-${CUDA/./-} \
48 cuda-nvrtc-${CUDA/./-} \
[all …]
/external/pytorch/torch/testing/_internal/
Dcommon_cuda.py3 r"""This file is allowed to initialize CUDA context when imported."""
7 import torch.cuda
14 CUDA_ALREADY_INITIALIZED_ON_IMPORT = torch.cuda.is_initialized()
17 TEST_MULTIGPU = TEST_CUDA and torch.cuda.device_count() >= 2
18 CUDA_DEVICE = torch.device("cuda:0") if TEST_CUDA else None
27 SM53OrLater = LazyVal(lambda: torch.cuda.is_available() and torch.cuda.get_device_capability() >= (…
28 SM60OrLater = LazyVal(lambda: torch.cuda.is_available() and torch.cuda.get_device_capability() >= (…
29 SM70OrLater = LazyVal(lambda: torch.cuda.is_available() and torch.cuda.get_device_capability() >= (…
30 SM75OrLater = LazyVal(lambda: torch.cuda.is_available() and torch.cuda.get_device_capability() >= (…
31 SM80OrLater = LazyVal(lambda: torch.cuda.is_available() and torch.cuda.get_device_capability() >= (…
[all …]

12345678910>>...135