block_work_size (reference) in projects: external

Project(s)

Full Search
Definition
Symbol
File Path
History
Type

Searched refs:block_work_size (Results 1 – 9 of 9) sorted by relevance

/external/pytorch/aten/src/ATen/native/cuda/
D	CUDALoops.cuh	`60 int remaining = N - block_work_size() * blockIdx.x; in C10_LAUNCH_BOUNDS_1() 62 if (remaining < block_work_size()) { // if this block handles the reminder, in C10_LAUNCH_BOUNDS_1() 99 int remaining = N - block_work_size() * blockIdx.x; in C10_LAUNCH_BOUNDS_1() 114 int64_t grid = (N + block_work_size() - 1) / block_work_size(); in launch_vectorized_kernel() 161 int64_t grid = (N + block_work_size() - 1) / block_work_size(); in launch_unrolled_kernel()`
D	MemoryAccess.cuh	`64 auto ptr = reinterpret_cast<arg_t >(self.data[arg_index + 1]) + block_work_size() idx; in apply() 212 int linear_idx = thread_idx + block_work_size() * idx; in load() 227 int linear_idx = thread_idx + block_work_size() * idx; in store() 277 scalar_t to = reinterpret_cast<scalar_t >(data[0]) + block_work_size() * idx; in store() 319 int linear_idx = thread_idx + block_work_size() * idx; in load() 335 int linear_idx = thread_idx + block_work_size() * idx; in store()`
D	CUDAJitLoops.cuh	`93 const uint32_t grid = (N + block_work_size() - 1) / block_work_size(); in launch_jitted_unrolled_kernel() 119 const uint32_t grid = (N + block_work_size() - 1) / block_work_size(); in launch_jitted_vectorized_kernel()`
D	RangeFactories.cu	`36 constexpr int block_work_size = thread_work_size * num_threads(); variable 43 index_t idx = block_work_size * blockIdx.x + num_threads() * i + threadIdx.x; in C10_LAUNCH_BOUNDS_1() 56 int64_t grid = (N + block_work_size - 1) / block_work_size; in gpu_kernel_with_index()`
D	thread_constants.h	`22 constexpr int block_work_size() { return thread_work_size() * num_threads(); } in block_work_size() function`
D	Loops.cuh	`260 int remaining = N - block_work_size() * blockIdx.x; in C10_LAUNCH_BOUNDS_1() 267 int64_t grid = (N + block_work_size() - 1) / block_work_size(); in launch_unrolled_kernel_for_multi_outputs()`
D	DistributionTemplates.h	`196 int base_index = block_work_size() * blockIdx.x; in distribution_binary_elementwise_kernel() 197 int remaining = std::min<int>(numel - base_index, block_work_size()); in distribution_binary_elementwise_kernel() 259 int64_t grid = (numel + block_work_size() - 1) / block_work_size(); in distribution_binary_kernel()`
/external/pytorch/aten/src/ATen/test/
D	cuda_vectorized_test.cu	`95 policy.load_single_arg(accessor, src + block_work_size() * blockIdx.x); in vectorized_copy() 111 vectorized_copy<double, 4><<<total_work_size / block_work_size() , num_threads()>>>(b2, b1); in TEST() 125 vectorized_copy<double, 2><<<total_work_size / block_work_size() , num_threads()>>>(b2, b1); in TEST() 139 vectorized_copy<double, 1><<<total_work_size / block_work_size() , num_threads()>>>(b2, b1); in TEST()`
/external/pytorch/aten/src/ATen/cuda/
D	jiterator.cu	`21 const uint32_t grid = (N + block_work_size() - 1) / block_work_size(); in launch_jitted_vectorized_kernel_dynamic() 126 const uint32_t grid = (N + block_work_size() - 1) / block_work_size(); in launch_jitted_unrolled_kernel_dynamic()`

art
bionic
bootable
build
cts
dalvik
developers
development
device
external
frameworks
hardware
kernel
libcore
libnativehelper
packages
pdk
platform_testing
prebuilts
sdk
system
test
toolchain
tools
trusty