Home
last modified time | relevance | path

Searched full:numel (Results 1 – 25 of 1130) sorted by relevance

12345678910>>...46

/external/pytorch/c10/xpu/test/impl/
DXPUGuardTest.cpp61 constexpr int numel = 1024; in TEST() local
62 int hostData1[numel]; in TEST()
63 initHostData(hostData1, numel); in TEST()
64 int hostData2[numel]; in TEST()
65 clearHostData(hostData2, numel); in TEST()
68 int* deviceData1 = sycl::malloc_device<int>(numel, xpu_stream1); in TEST()
72 xpu_stream1.queue().memcpy(deviceData1, hostData1, sizeof(int) * numel); in TEST()
77 xpu_stream2.queue().memcpy(hostData2, deviceData1, sizeof(int) * numel); in TEST()
81 validateHostData(hostData2, numel); in TEST()
86 clearHostData(hostData2, numel); in TEST()
[all …]
DXPUTest.h5 static inline void initHostData(int* hostData, int numel) { in initHostData() argument
6 for (const auto i : c10::irange(numel)) { in initHostData()
11 static inline void clearHostData(int* hostData, int numel) { in clearHostData() argument
12 for (const auto i : c10::irange(numel)) { in clearHostData()
17 static inline void validateHostData(int* hostData, int numel) { in validateHostData() argument
18 for (const auto i : c10::irange(numel)) { in validateHostData()
DXPUStreamTest.cpp149 constexpr int numel = 1024; in TEST() local
150 int hostData[numel]; in TEST()
151 initHostData(hostData, numel); in TEST()
155 int* deviceData = sycl::malloc_device<int>(numel, stream); in TEST()
158 asyncMemCopy(stream, deviceData, hostData, sizeof(int) * numel); in TEST()
162 clearHostData(hostData, numel); in TEST()
165 asyncMemCopy(stream, hostData, deviceData, sizeof(int) * numel); in TEST()
168 validateHostData(hostData, numel); in TEST()
172 clearHostData(hostData, numel); in TEST()
175 asyncMemCopy(stream, hostData, deviceData, sizeof(int) * numel); in TEST()
[all …]
/external/executorch/runtime/core/exec_aten/testing_util/
Dtensor_util.cpp41 size_t numel, in data_is_close() argument
45 numel == 0 || (a != nullptr && b != nullptr), in data_is_close()
46 "Pointers must not be null when numel > 0: numel %zu, a 0x%p, b 0x%p", in data_is_close()
47 numel, in data_is_close()
53 for (size_t i = 0; i < numel; i++) { in data_is_close()
120 a.numel(), in tensors_are_close()
127 a.numel(), in tensors_are_close()
134 a.numel(), in tensors_are_close()
141 a.numel(), in tensors_are_close()
152 * underlying data elements and same numel. Note that this function is mainly
[all …]
/external/pytorch/aten/src/ATen/
DInferSize.h22 NumelType numel, in infer_size_impl() argument
40 if (TORCH_GUARD_SIZE_OBLIVIOUS(sym_eq(numel, newsize)) || in infer_size_impl()
41 (infer_dim && newsize > 0 && numel % newsize == 0)) { in infer_size_impl()
57 res[*infer_dim] = numel / newsize; in infer_size_impl()
63 ss << "shape '" << shape << "' is invalid for input of size " << numel; in infer_size_impl()
67 inline std::vector<int64_t> infer_size(IntArrayRef shape, int64_t numel) { in infer_size() argument
69 infer_size_impl(shape, numel, res); in infer_size()
73 inline at::DimVector infer_size_dv(IntArrayRef shape, int64_t numel) { in infer_size_dv() argument
75 infer_size_impl(shape, numel, res); in infer_size_dv()
81 c10::SymInt numel) { in infer_size_dv() argument
[all …]
DTensorUtils.cpp126 void checkNumel(CheckedFrom c, const TensorGeometryArg& t, int64_t numel) { in checkNumel() argument
128 t->numel() == numel, in checkNumel()
129 "Expected tensor for ", t, " to have ", numel, in checkNumel()
130 " elements; but it actually has ", t->numel(), " elements", in checkNumel()
136 t1->numel() == t2->numel(), in checkSameNumel()
139 t1->numel(), " does not equal ", t2->numel(), in checkSameNumel()
323 // ``numel'', i.e., number of subspaces, as the corresponding chunk of
329 template <typename ResultVec, typename NewShapeVec, typename Numel>
340 // NOTE: stride is arbitrary in the numel() == 0 case; in computeStride_impl()
345 const Numel numel = c10::multiply_integers(oldshape); in computeStride_impl() local
[all …]
/external/pytorch/torch/
D_size_docs.py13 "numel",
15 numel() -> int
20 ``x.numel() == x.size().numel() == s.numel() == 100`` holds true.
27 >>> s.numel()
29 >>> x.numel() == s.numel()
/external/pytorch/aten/src/ATen/native/cuda/
DSortStable.cu82 int numel, in C10_LAUNCH_BOUNDS_1()
84 CUDA_KERNEL_LOOP(idx, numel) { in C10_LAUNCH_BOUNDS_1()
95 int numel, in C10_LAUNCH_BOUNDS_1()
97 CUDA_KERNEL_LOOP(idx, numel) { in C10_LAUNCH_BOUNDS_1()
143 const auto numel = nsort * nsegments; in segmented_sort_pairs_by_full_sort() local
145 auto indices_and_segment = cuda_allocator->allocate(numel * sizeof(int2)); in segmented_sort_pairs_by_full_sort()
150 dim3 grid = GET_BLOCKS(numel); in segmented_sort_pairs_by_full_sort()
154 i_s_ptr, numel, nsort_divider); in segmented_sort_pairs_by_full_sort()
191 const auto numel = nsort * nsegments; in segmented_sort_pairs() local
193 auto reverse_indices = cuda_allocator->allocate(numel * sizeof(int64_t)); in segmented_sort_pairs()
[all …]
DScatterGatherKernel.cu24 …constexpr C10_DEVICE void operator() (scalar_t* self_data_start, int64_t index, int64_t numel, con… in operator ()() argument
25 (void)numel; // suppress unused warning in operator ()()
34 …constexpr C10_DEVICE void operator() (scalar_t* self_data_start, int64_t index, int64_t numel, con… in operator ()() argument
35 fastAtomicAdd(self_data_start, index, numel, *src_data, true); in operator ()()
43 …constexpr C10_DEVICE void operator() (scalar_t* self_data_start, int64_t index, int64_t numel, con… in operator ()() argument
44 fastAtomicAdd(self_data_start, index, numel, *src_data, true); in operator ()()
52 …constexpr C10_DEVICE void operator() (scalar_t* self_data_start, int64_t index, int64_t numel, con… in operator ()() argument
53 (void)numel; // suppress unused warning in operator ()()
62 …constexpr C10_DEVICE void operator() (scalar_t* self_data_start, int64_t index, int64_t numel, con… in operator ()() argument
63 (void)numel; // suppress unused warning in operator ()()
[all …]
DMultiTensorApply.cuh142 if (tensor_lists[0][t].numel() == 0) { in multi_tensor_apply()
147 tensor_lists[0][t].numel(); in multi_tensor_apply()
160 const auto numel = tensor_lists[0][t].numel(); in multi_tensor_apply() local
161 const auto chunks = numel / kChunkSize + (numel % kChunkSize != 0); in multi_tensor_apply()
233 if (tensor_lists[0][t].numel() == 0) { in multi_tensor_apply()
237 tensor_lists[0][t].numel(); in multi_tensor_apply()
245 const auto numel = tensor_lists[0][t].numel(); in multi_tensor_apply() local
246 const auto chunks = numel / kChunkSize + (numel % kChunkSize != 0); in multi_tensor_apply()
313 if (tensor_lists[0][tensor_index].numel() == 0) { in multi_tensor_apply_for_fused_optimizer()
319 tensor_lists[0][tensor_index].numel(); in multi_tensor_apply_for_fused_optimizer()
[all …]
/external/pytorch/aten/src/ATen/test/
Dxpu_event_test.cpp63 constexpr int numel = 1024; in TEST() local
64 int hostData[numel]; in TEST()
65 initHostData(hostData, numel); in TEST()
68 int* deviceData = sycl::malloc_device<int>(numel, stream); in TEST()
71 stream.queue().memcpy(deviceData, hostData, sizeof(int) * numel); in TEST()
78 clearHostData(hostData, numel); in TEST()
81 stream.queue().memcpy(hostData, deviceData, sizeof(int) * numel); in TEST()
85 validateHostData(hostData, numel); in TEST()
87 clearHostData(hostData, numel); in TEST()
89 stream.queue().memcpy(hostData, deviceData, sizeof(int) * numel); in TEST()
Dquantized_test.cpp107 int numel = 10; in TEST() local
109 {numel}, at::device(at::kCPU).dtype(kQUInt8), scale, zero_point); in TEST()
112 for (const auto i : c10::irange(numel)) { in TEST()
119 for (const auto i : c10::irange(numel)) { in TEST()
125 int numel = 10; in TEST() local
126 auto scales = rand({numel}).toType(kDouble); in TEST()
131 {numel}, in TEST()
138 for (const auto i : c10::irange(numel)) { in TEST()
145 for (const auto i : c10::irange(numel)) { in TEST()
218 auto numel = c10::multiply_integers(shape); in TEST() local
[all …]
Dscalar_tensor_test.cpp59 const auto numel = c10::multiply_integers(s->begin(), s->end()); in test() local
60 ASSERT_EQ(t.numel(), numel); in test()
128 if (t.numel() != 0) { in test()
135 if (t.numel() != 0) { in test()
145 if (t.dim() > 0 && t.numel() != 0) { in test()
219 ASSERT_NE(lhs.numel(), rhs.numel()), in test()
220 ASSERT_EQ(lhs.numel(), rhs.numel()); in test()
229 ASSERT_EQ(lhs.numel(), 0); ASSERT_NE(rhs.numel(), 0), in test()
239 ASSERT_EQ(lhs.numel(), 0); ASSERT_NE(rhs1.numel(), 0), in test()
249 (lhs.numel() == 0 || rhs.numel() == 0 || in test()
/external/pytorch/torch/_inductor/codegen/aoti_runtime/
Dimplementation.cpp39 int64_t numel; in convert_handle_to_arrayref_tensor() local
40 AOTI_TORCH_ERROR_CODE_CHECK(aoti_torch_get_numel(handle, &numel)); in convert_handle_to_arrayref_tensor()
54 MiniArrayRef<T>(reinterpret_cast<T*>(data_ptr), numel), in convert_handle_to_arrayref_tensor()
79 void assert_numel(const ArrayRefTensor<T>& tensor, uint64_t numel) { in assert_numel() argument
80 if (tensor.numel() != numel) { in assert_numel()
82 err << "incorrect numel for input tensor. expected " << numel << ", got " << tensor.numel(); in assert_numel()
/external/pytorch/aten/src/ATen/native/quantized/
DQTensor.cpp164 if (self.numel() > 0) { in make_per_channel_quantized_tensor_cpu()
260 auto data_size = self.numel() * self.element_size(); in equal_quantized_cpu()
296 int numel, in calculate_quant_loss() argument
319 for (; i < numel; i++) { in calculate_quant_loss()
336 int64_t numel, in choose_qparams_optimized() argument
341 if (numel < 0 || numel > input_tensor.numel()) { in choose_qparams_optimized()
342 TORCH_CHECK(false, "numel is out of the bound of input tensor"); in choose_qparams_optimized()
345 TORCH_CHECK(numel <= input_tensor.numel(), "numel ", numel, in choose_qparams_optimized()
346 " greater than input_tensor.numel() ", input_tensor.numel()); in choose_qparams_optimized()
348 float xmin = *std::min_element(input_row, input_row + numel); in choose_qparams_optimized()
[all …]
/external/pytorch/aten/src/ATen/native/
DEmbeddingBag.cpp120 TORCH_CHECK(select_indices.numel() == add_indices.numel()); in index_select_add()
129 auto numel = add_indices.numel(); in index_select_add() local
137 for (const auto i : c10::irange(numel)) { in index_select_add()
208 int64_t output_size = offsets.numel() - 1; in index_select_add()
213 output_size = offsets.numel() - 1; in index_select_add()
215 output_size = offsets.numel(); in index_select_add()
216 offsets_include_last.resize(offsets.numel() + 1); in index_select_add()
217 if (offsets.numel() > 0) { in index_select_add()
221 sizeof(index_t) * offsets.numel()); in index_select_add()
223 offsets_include_last[offsets.numel()] = select_indices.numel(); in index_select_add()
[all …]
/external/pytorch/torch/autograd/_functions/
Dtensor.py37 ctx.numel = reduce(operator.mul, sizes, 1)
38 if tensor.numel() != ctx.numel:
47 ctx.numel,
49 tensor.numel(),
64 assert grad_output.numel() == ctx.numel
/external/executorch/backends/vulkan/test/utils/
Dtest_utils.h141 int numel = -1) {
142 if (numel < 0) {
143 numel = staging.numel();
145 std::vector<float> data(numel);
147 staging.copy_from(data.data(), sizeof(float) * numel);
155 const size_t numel,
160 const size_t numel,
181 int numel = -1) {
182 if (numel < 0) {
183 numel = staging.numel();
[all …]
/external/pytorch/test/cpp/api/
Dtensor.cpp158 ASSERT_EQ(tensor.numel(), 1); in TEST()
163 ASSERT_EQ(tensor.numel(), 1); in TEST()
168 ASSERT_EQ(tensor.numel(), 1); in TEST()
173 ASSERT_EQ(tensor.numel(), 1); in TEST()
178 ASSERT_EQ(tensor.numel(), 1); in TEST()
185 ASSERT_EQ(tensor.numel(), 1); in TEST()
190 ASSERT_EQ(tensor.numel(), 1); in TEST()
197 ASSERT_EQ(tensor.numel(), 1); in TEST()
204 ASSERT_EQ(tensor.numel(), 3); in TEST()
211 ASSERT_EQ(tensor.numel(), 3); in TEST()
[all …]
/external/executorch/backends/cadence/hifi/operators/
Ddequantize_per_tensor.cpp33 const size_t numel = out.numel(); in dequantize_per_tensor_out() local
36 dequantize<uint8_t>(out_data, input_data, scale, zero_point, numel); in dequantize_per_tensor_out()
40 out_data, input_data, zero_point, scale, numel); in dequantize_per_tensor_out()
43 dequantize<int16_t>(out_data, input_data, scale, zero_point, numel); in dequantize_per_tensor_out()
48 dequantize<uint16_t>(out_data, input_data, scale, zero_point, numel); in dequantize_per_tensor_out()
51 dequantize<int32_t>(out_data, input_data, scale, zero_point, numel); in dequantize_per_tensor_out()
Dquantize_per_tensor.cpp34 const size_t numel = out.numel(); in quantize_per_tensor_out() local
38 out_data, input_data, 1. / scale, zero_point, numel); in quantize_per_tensor_out()
42 out_data, input_data, scale, zero_point, numel); in quantize_per_tensor_out()
46 out_data, input_data, 1. / scale, zero_point, numel); in quantize_per_tensor_out()
52 out_data, input_data, 1. / scale, zero_point, numel); in quantize_per_tensor_out()
56 out_data, input_data, 1. / scale, zero_point, numel); in quantize_per_tensor_out()
/external/pytorch/torch/csrc/distributed/c10d/
DCUDASymmetricMemoryOps.cu39 get_alignment(static_cast<size_t>(input.numel() * input.element_size())); in get_and_verify_alignment()
52 size_t numel, in init_elementwise_launch_config() argument
59 const size_t aligned_numel = at::round_up(numel, alignment * splits); in init_elementwise_launch_config()
80 size_t numel, in multimem_all_reduce_kernel() argument
90 at::round_up(numel, alignment * world_size) / world_size; in multimem_all_reduce_kernel()
96 if (start + i >= numel) { in multimem_all_reduce_kernel()
132 input.numel(), in multimem_all_reduce_()
145 input.numel(), \ in multimem_all_reduce_()
173 size_t numel, in multimem_one_shot_all_reduce_kernel() argument
184 for (size_t i = offset; i < numel; i += stride) { in multimem_one_shot_all_reduce_kernel()
[all …]
/external/executorch/backends/cadence/reference/operators/
Ddequantize_per_tensor.cpp30 size_t numel = out.numel(); in dequantize_per_tensor_out() local
35 out_data, input_data, scale, zero_point, numel); in dequantize_per_tensor_out()
39 out_data, input_data, scale, zero_point, numel); in dequantize_per_tensor_out()
45 out_data, input_data, scale, zero_point, numel); in dequantize_per_tensor_out()
49 out_data, input_data, scale, zero_point, numel); in dequantize_per_tensor_out()
53 out_data, input_data, scale, zero_point, numel); in dequantize_per_tensor_out()
Dquantize_per_tensor.cpp32 size_t numel = out.numel(); in quantize_per_tensor_out() local
37 out_data, input_data, 1. / scale, zero_point, numel); in quantize_per_tensor_out()
41 out_data, input_data, 1. / scale, zero_point, numel); in quantize_per_tensor_out()
47 out_data, input_data, 1. / scale, zero_point, numel); in quantize_per_tensor_out()
51 out_data, input_data, 1. / scale, zero_point, numel); in quantize_per_tensor_out()
55 out_data, input_data, 1. / scale, zero_point, numel); in quantize_per_tensor_out()
/external/pytorch/test/distributed/fsdp/
Dtest_fsdp_ignored_modules.py179 total_numel = sum(p.numel() for p in nonwrapped_model.parameters())
181 p.numel() for p in nonwrapped_model.transformer.parameters()
188 flat_param_numel = flat_param.numel()
190 # Subtract the numel contributed from alignment padding
192 numel
193 for (numel, is_padding) in zip(
253 total_numel = sum(p.numel() for p in nonwrapped_model.parameters())
254 ignored_numel = sum(p.numel() for p in nonwrapped_model.layer1.parameters())
262 flat_param_numel = flat_param.numel()
264 # Subtract the numel contributed from alignment padding
[all …]

12345678910>>...46