/external/pytorch/c10/xpu/test/impl/ |
D | XPUGuardTest.cpp | 61 constexpr int numel = 1024; in TEST() local 62 int hostData1[numel]; in TEST() 63 initHostData(hostData1, numel); in TEST() 64 int hostData2[numel]; in TEST() 65 clearHostData(hostData2, numel); in TEST() 68 int* deviceData1 = sycl::malloc_device<int>(numel, xpu_stream1); in TEST() 72 xpu_stream1.queue().memcpy(deviceData1, hostData1, sizeof(int) * numel); in TEST() 77 xpu_stream2.queue().memcpy(hostData2, deviceData1, sizeof(int) * numel); in TEST() 81 validateHostData(hostData2, numel); in TEST() 86 clearHostData(hostData2, numel); in TEST() [all …]
|
D | XPUTest.h | 5 static inline void initHostData(int* hostData, int numel) { in initHostData() argument 6 for (const auto i : c10::irange(numel)) { in initHostData() 11 static inline void clearHostData(int* hostData, int numel) { in clearHostData() argument 12 for (const auto i : c10::irange(numel)) { in clearHostData() 17 static inline void validateHostData(int* hostData, int numel) { in validateHostData() argument 18 for (const auto i : c10::irange(numel)) { in validateHostData()
|
D | XPUStreamTest.cpp | 149 constexpr int numel = 1024; in TEST() local 150 int hostData[numel]; in TEST() 151 initHostData(hostData, numel); in TEST() 155 int* deviceData = sycl::malloc_device<int>(numel, stream); in TEST() 158 asyncMemCopy(stream, deviceData, hostData, sizeof(int) * numel); in TEST() 162 clearHostData(hostData, numel); in TEST() 165 asyncMemCopy(stream, hostData, deviceData, sizeof(int) * numel); in TEST() 168 validateHostData(hostData, numel); in TEST() 172 clearHostData(hostData, numel); in TEST() 175 asyncMemCopy(stream, hostData, deviceData, sizeof(int) * numel); in TEST() [all …]
|
/external/executorch/runtime/core/exec_aten/testing_util/ |
D | tensor_util.cpp | 41 size_t numel, in data_is_close() argument 45 numel == 0 || (a != nullptr && b != nullptr), in data_is_close() 46 "Pointers must not be null when numel > 0: numel %zu, a 0x%p, b 0x%p", in data_is_close() 47 numel, in data_is_close() 53 for (size_t i = 0; i < numel; i++) { in data_is_close() 120 a.numel(), in tensors_are_close() 127 a.numel(), in tensors_are_close() 134 a.numel(), in tensors_are_close() 141 a.numel(), in tensors_are_close() 152 * underlying data elements and same numel. Note that this function is mainly [all …]
|
/external/pytorch/aten/src/ATen/ |
D | InferSize.h | 22 NumelType numel, in infer_size_impl() argument 40 if (TORCH_GUARD_SIZE_OBLIVIOUS(sym_eq(numel, newsize)) || in infer_size_impl() 41 (infer_dim && newsize > 0 && numel % newsize == 0)) { in infer_size_impl() 57 res[*infer_dim] = numel / newsize; in infer_size_impl() 63 ss << "shape '" << shape << "' is invalid for input of size " << numel; in infer_size_impl() 67 inline std::vector<int64_t> infer_size(IntArrayRef shape, int64_t numel) { in infer_size() argument 69 infer_size_impl(shape, numel, res); in infer_size() 73 inline at::DimVector infer_size_dv(IntArrayRef shape, int64_t numel) { in infer_size_dv() argument 75 infer_size_impl(shape, numel, res); in infer_size_dv() 81 c10::SymInt numel) { in infer_size_dv() argument [all …]
|
D | TensorUtils.cpp | 126 void checkNumel(CheckedFrom c, const TensorGeometryArg& t, int64_t numel) { in checkNumel() argument 128 t->numel() == numel, in checkNumel() 129 "Expected tensor for ", t, " to have ", numel, in checkNumel() 130 " elements; but it actually has ", t->numel(), " elements", in checkNumel() 136 t1->numel() == t2->numel(), in checkSameNumel() 139 t1->numel(), " does not equal ", t2->numel(), in checkSameNumel() 323 // ``numel'', i.e., number of subspaces, as the corresponding chunk of 329 template <typename ResultVec, typename NewShapeVec, typename Numel> 340 // NOTE: stride is arbitrary in the numel() == 0 case; in computeStride_impl() 345 const Numel numel = c10::multiply_integers(oldshape); in computeStride_impl() local [all …]
|
/external/pytorch/torch/ |
D | _size_docs.py | 13 "numel", 15 numel() -> int 20 ``x.numel() == x.size().numel() == s.numel() == 100`` holds true. 27 >>> s.numel() 29 >>> x.numel() == s.numel()
|
/external/pytorch/aten/src/ATen/native/cuda/ |
D | SortStable.cu | 82 int numel, in C10_LAUNCH_BOUNDS_1() 84 CUDA_KERNEL_LOOP(idx, numel) { in C10_LAUNCH_BOUNDS_1() 95 int numel, in C10_LAUNCH_BOUNDS_1() 97 CUDA_KERNEL_LOOP(idx, numel) { in C10_LAUNCH_BOUNDS_1() 143 const auto numel = nsort * nsegments; in segmented_sort_pairs_by_full_sort() local 145 auto indices_and_segment = cuda_allocator->allocate(numel * sizeof(int2)); in segmented_sort_pairs_by_full_sort() 150 dim3 grid = GET_BLOCKS(numel); in segmented_sort_pairs_by_full_sort() 154 i_s_ptr, numel, nsort_divider); in segmented_sort_pairs_by_full_sort() 191 const auto numel = nsort * nsegments; in segmented_sort_pairs() local 193 auto reverse_indices = cuda_allocator->allocate(numel * sizeof(int64_t)); in segmented_sort_pairs() [all …]
|
D | ScatterGatherKernel.cu | 24 …constexpr C10_DEVICE void operator() (scalar_t* self_data_start, int64_t index, int64_t numel, con… in operator ()() argument 25 (void)numel; // suppress unused warning in operator ()() 34 …constexpr C10_DEVICE void operator() (scalar_t* self_data_start, int64_t index, int64_t numel, con… in operator ()() argument 35 fastAtomicAdd(self_data_start, index, numel, *src_data, true); in operator ()() 43 …constexpr C10_DEVICE void operator() (scalar_t* self_data_start, int64_t index, int64_t numel, con… in operator ()() argument 44 fastAtomicAdd(self_data_start, index, numel, *src_data, true); in operator ()() 52 …constexpr C10_DEVICE void operator() (scalar_t* self_data_start, int64_t index, int64_t numel, con… in operator ()() argument 53 (void)numel; // suppress unused warning in operator ()() 62 …constexpr C10_DEVICE void operator() (scalar_t* self_data_start, int64_t index, int64_t numel, con… in operator ()() argument 63 (void)numel; // suppress unused warning in operator ()() [all …]
|
D | MultiTensorApply.cuh | 142 if (tensor_lists[0][t].numel() == 0) { in multi_tensor_apply() 147 tensor_lists[0][t].numel(); in multi_tensor_apply() 160 const auto numel = tensor_lists[0][t].numel(); in multi_tensor_apply() local 161 const auto chunks = numel / kChunkSize + (numel % kChunkSize != 0); in multi_tensor_apply() 233 if (tensor_lists[0][t].numel() == 0) { in multi_tensor_apply() 237 tensor_lists[0][t].numel(); in multi_tensor_apply() 245 const auto numel = tensor_lists[0][t].numel(); in multi_tensor_apply() local 246 const auto chunks = numel / kChunkSize + (numel % kChunkSize != 0); in multi_tensor_apply() 313 if (tensor_lists[0][tensor_index].numel() == 0) { in multi_tensor_apply_for_fused_optimizer() 319 tensor_lists[0][tensor_index].numel(); in multi_tensor_apply_for_fused_optimizer() [all …]
|
/external/pytorch/aten/src/ATen/test/ |
D | xpu_event_test.cpp | 63 constexpr int numel = 1024; in TEST() local 64 int hostData[numel]; in TEST() 65 initHostData(hostData, numel); in TEST() 68 int* deviceData = sycl::malloc_device<int>(numel, stream); in TEST() 71 stream.queue().memcpy(deviceData, hostData, sizeof(int) * numel); in TEST() 78 clearHostData(hostData, numel); in TEST() 81 stream.queue().memcpy(hostData, deviceData, sizeof(int) * numel); in TEST() 85 validateHostData(hostData, numel); in TEST() 87 clearHostData(hostData, numel); in TEST() 89 stream.queue().memcpy(hostData, deviceData, sizeof(int) * numel); in TEST()
|
D | quantized_test.cpp | 107 int numel = 10; in TEST() local 109 {numel}, at::device(at::kCPU).dtype(kQUInt8), scale, zero_point); in TEST() 112 for (const auto i : c10::irange(numel)) { in TEST() 119 for (const auto i : c10::irange(numel)) { in TEST() 125 int numel = 10; in TEST() local 126 auto scales = rand({numel}).toType(kDouble); in TEST() 131 {numel}, in TEST() 138 for (const auto i : c10::irange(numel)) { in TEST() 145 for (const auto i : c10::irange(numel)) { in TEST() 218 auto numel = c10::multiply_integers(shape); in TEST() local [all …]
|
D | scalar_tensor_test.cpp | 59 const auto numel = c10::multiply_integers(s->begin(), s->end()); in test() local 60 ASSERT_EQ(t.numel(), numel); in test() 128 if (t.numel() != 0) { in test() 135 if (t.numel() != 0) { in test() 145 if (t.dim() > 0 && t.numel() != 0) { in test() 219 ASSERT_NE(lhs.numel(), rhs.numel()), in test() 220 ASSERT_EQ(lhs.numel(), rhs.numel()); in test() 229 ASSERT_EQ(lhs.numel(), 0); ASSERT_NE(rhs.numel(), 0), in test() 239 ASSERT_EQ(lhs.numel(), 0); ASSERT_NE(rhs1.numel(), 0), in test() 249 (lhs.numel() == 0 || rhs.numel() == 0 || in test()
|
/external/pytorch/torch/_inductor/codegen/aoti_runtime/ |
D | implementation.cpp | 39 int64_t numel; in convert_handle_to_arrayref_tensor() local 40 AOTI_TORCH_ERROR_CODE_CHECK(aoti_torch_get_numel(handle, &numel)); in convert_handle_to_arrayref_tensor() 54 MiniArrayRef<T>(reinterpret_cast<T*>(data_ptr), numel), in convert_handle_to_arrayref_tensor() 79 void assert_numel(const ArrayRefTensor<T>& tensor, uint64_t numel) { in assert_numel() argument 80 if (tensor.numel() != numel) { in assert_numel() 82 err << "incorrect numel for input tensor. expected " << numel << ", got " << tensor.numel(); in assert_numel()
|
/external/pytorch/aten/src/ATen/native/quantized/ |
D | QTensor.cpp | 164 if (self.numel() > 0) { in make_per_channel_quantized_tensor_cpu() 260 auto data_size = self.numel() * self.element_size(); in equal_quantized_cpu() 296 int numel, in calculate_quant_loss() argument 319 for (; i < numel; i++) { in calculate_quant_loss() 336 int64_t numel, in choose_qparams_optimized() argument 341 if (numel < 0 || numel > input_tensor.numel()) { in choose_qparams_optimized() 342 TORCH_CHECK(false, "numel is out of the bound of input tensor"); in choose_qparams_optimized() 345 TORCH_CHECK(numel <= input_tensor.numel(), "numel ", numel, in choose_qparams_optimized() 346 " greater than input_tensor.numel() ", input_tensor.numel()); in choose_qparams_optimized() 348 float xmin = *std::min_element(input_row, input_row + numel); in choose_qparams_optimized() [all …]
|
/external/pytorch/aten/src/ATen/native/ |
D | EmbeddingBag.cpp | 120 TORCH_CHECK(select_indices.numel() == add_indices.numel()); in index_select_add() 129 auto numel = add_indices.numel(); in index_select_add() local 137 for (const auto i : c10::irange(numel)) { in index_select_add() 208 int64_t output_size = offsets.numel() - 1; in index_select_add() 213 output_size = offsets.numel() - 1; in index_select_add() 215 output_size = offsets.numel(); in index_select_add() 216 offsets_include_last.resize(offsets.numel() + 1); in index_select_add() 217 if (offsets.numel() > 0) { in index_select_add() 221 sizeof(index_t) * offsets.numel()); in index_select_add() 223 offsets_include_last[offsets.numel()] = select_indices.numel(); in index_select_add() [all …]
|
/external/pytorch/torch/autograd/_functions/ |
D | tensor.py | 37 ctx.numel = reduce(operator.mul, sizes, 1) 38 if tensor.numel() != ctx.numel: 47 ctx.numel, 49 tensor.numel(), 64 assert grad_output.numel() == ctx.numel
|
/external/executorch/backends/vulkan/test/utils/ |
D | test_utils.h | 141 int numel = -1) { 142 if (numel < 0) { 143 numel = staging.numel(); 145 std::vector<float> data(numel); 147 staging.copy_from(data.data(), sizeof(float) * numel); 155 const size_t numel, 160 const size_t numel, 181 int numel = -1) { 182 if (numel < 0) { 183 numel = staging.numel(); [all …]
|
/external/pytorch/test/cpp/api/ |
D | tensor.cpp | 158 ASSERT_EQ(tensor.numel(), 1); in TEST() 163 ASSERT_EQ(tensor.numel(), 1); in TEST() 168 ASSERT_EQ(tensor.numel(), 1); in TEST() 173 ASSERT_EQ(tensor.numel(), 1); in TEST() 178 ASSERT_EQ(tensor.numel(), 1); in TEST() 185 ASSERT_EQ(tensor.numel(), 1); in TEST() 190 ASSERT_EQ(tensor.numel(), 1); in TEST() 197 ASSERT_EQ(tensor.numel(), 1); in TEST() 204 ASSERT_EQ(tensor.numel(), 3); in TEST() 211 ASSERT_EQ(tensor.numel(), 3); in TEST() [all …]
|
/external/executorch/backends/cadence/hifi/operators/ |
D | dequantize_per_tensor.cpp | 33 const size_t numel = out.numel(); in dequantize_per_tensor_out() local 36 dequantize<uint8_t>(out_data, input_data, scale, zero_point, numel); in dequantize_per_tensor_out() 40 out_data, input_data, zero_point, scale, numel); in dequantize_per_tensor_out() 43 dequantize<int16_t>(out_data, input_data, scale, zero_point, numel); in dequantize_per_tensor_out() 48 dequantize<uint16_t>(out_data, input_data, scale, zero_point, numel); in dequantize_per_tensor_out() 51 dequantize<int32_t>(out_data, input_data, scale, zero_point, numel); in dequantize_per_tensor_out()
|
D | quantize_per_tensor.cpp | 34 const size_t numel = out.numel(); in quantize_per_tensor_out() local 38 out_data, input_data, 1. / scale, zero_point, numel); in quantize_per_tensor_out() 42 out_data, input_data, scale, zero_point, numel); in quantize_per_tensor_out() 46 out_data, input_data, 1. / scale, zero_point, numel); in quantize_per_tensor_out() 52 out_data, input_data, 1. / scale, zero_point, numel); in quantize_per_tensor_out() 56 out_data, input_data, 1. / scale, zero_point, numel); in quantize_per_tensor_out()
|
/external/pytorch/torch/csrc/distributed/c10d/ |
D | CUDASymmetricMemoryOps.cu | 39 get_alignment(static_cast<size_t>(input.numel() * input.element_size())); in get_and_verify_alignment() 52 size_t numel, in init_elementwise_launch_config() argument 59 const size_t aligned_numel = at::round_up(numel, alignment * splits); in init_elementwise_launch_config() 80 size_t numel, in multimem_all_reduce_kernel() argument 90 at::round_up(numel, alignment * world_size) / world_size; in multimem_all_reduce_kernel() 96 if (start + i >= numel) { in multimem_all_reduce_kernel() 132 input.numel(), in multimem_all_reduce_() 145 input.numel(), \ in multimem_all_reduce_() 173 size_t numel, in multimem_one_shot_all_reduce_kernel() argument 184 for (size_t i = offset; i < numel; i += stride) { in multimem_one_shot_all_reduce_kernel() [all …]
|
/external/executorch/backends/cadence/reference/operators/ |
D | dequantize_per_tensor.cpp | 30 size_t numel = out.numel(); in dequantize_per_tensor_out() local 35 out_data, input_data, scale, zero_point, numel); in dequantize_per_tensor_out() 39 out_data, input_data, scale, zero_point, numel); in dequantize_per_tensor_out() 45 out_data, input_data, scale, zero_point, numel); in dequantize_per_tensor_out() 49 out_data, input_data, scale, zero_point, numel); in dequantize_per_tensor_out() 53 out_data, input_data, scale, zero_point, numel); in dequantize_per_tensor_out()
|
D | quantize_per_tensor.cpp | 32 size_t numel = out.numel(); in quantize_per_tensor_out() local 37 out_data, input_data, 1. / scale, zero_point, numel); in quantize_per_tensor_out() 41 out_data, input_data, 1. / scale, zero_point, numel); in quantize_per_tensor_out() 47 out_data, input_data, 1. / scale, zero_point, numel); in quantize_per_tensor_out() 51 out_data, input_data, 1. / scale, zero_point, numel); in quantize_per_tensor_out() 55 out_data, input_data, 1. / scale, zero_point, numel); in quantize_per_tensor_out()
|
/external/pytorch/test/distributed/fsdp/ |
D | test_fsdp_ignored_modules.py | 179 total_numel = sum(p.numel() for p in nonwrapped_model.parameters()) 181 p.numel() for p in nonwrapped_model.transformer.parameters() 188 flat_param_numel = flat_param.numel() 190 # Subtract the numel contributed from alignment padding 192 numel 193 for (numel, is_padding) in zip( 253 total_numel = sum(p.numel() for p in nonwrapped_model.parameters()) 254 ignored_numel = sum(p.numel() for p in nonwrapped_model.layer1.parameters()) 262 flat_param_numel = flat_param.numel() 264 # Subtract the numel contributed from alignment padding [all …]
|