Home
last modified time | relevance | path

Searched refs:tensor_lists (Results 1 – 25 of 25) sorted by relevance

/external/pytorch/aten/src/ATen/native/cuda/
DForeachPointwiseOp.cu30 std::vector<std::vector<at::Tensor>> tensor_lists; in foreach_pointwise_op() local
37 tensor_lists.emplace_back(input.vec()); in foreach_pointwise_op()
38 tensor_lists.emplace_back(tensors1.vec()); in foreach_pointwise_op()
39 tensor_lists.emplace_back(tensors2.vec()); in foreach_pointwise_op()
40 tensor_lists.emplace_back(std::move(vec_res)); in foreach_pointwise_op()
50 tensor_lists, in foreach_pointwise_op()
60 return tensor_lists[3]; in foreach_pointwise_op()
69 std::vector<std::vector<at::Tensor>> tensor_lists; in foreach_pointwise_op_() local
70 tensor_lists.emplace_back(input.vec()); in foreach_pointwise_op_()
71 tensor_lists.emplace_back(tensors1.vec()); in foreach_pointwise_op_()
[all …]
DMultiTensorApply.cuh127 std::vector<std::vector<at::Tensor>>& tensor_lists, in multi_tensor_apply() argument
132 tensor_lists.size() == depth, in multi_tensor_apply()
134 const size_t n_tensors = tensor_lists[0].size(); in multi_tensor_apply()
142 if (tensor_lists[0][t].numel() == 0) { in multi_tensor_apply()
147 tensor_lists[0][t].numel(); in multi_tensor_apply()
150 tensor_lists[d][t].const_data_ptr(); in multi_tensor_apply()
160 const auto numel = tensor_lists[0][t].numel(); in multi_tensor_apply()
219 std::vector<std::vector<at::Tensor>>& tensor_lists, in multi_tensor_apply() argument
223 tensor_lists.size() == depth, in multi_tensor_apply()
225 const size_t n_tensors = tensor_lists[0].size(); in multi_tensor_apply()
[all …]
DForeachTernaryOp.cu39 std::vector<std::vector<at::Tensor>> tensor_lists{ in foreach_tensor_lerp_ternary_cuda() local
50 tensor_lists, in foreach_tensor_lerp_ternary_cuda()
59 return tensor_lists[3]; in foreach_tensor_lerp_ternary_cuda()
71 std::vector<std::vector<at::Tensor>> tensor_lists{ in foreach_tensor_lerp_ternary_cuda_() local
81 tensor_lists, in foreach_tensor_lerp_ternary_cuda_()
106 std::vector<std::vector<at::Tensor>> tensor_lists{ in foreach_tensor_lerp_list_cuda() local
117 tensor_lists, in foreach_tensor_lerp_list_cuda()
127 return tensor_lists[2]; in foreach_tensor_lerp_list_cuda()
139 std::vector<std::vector<at::Tensor>> tensor_lists{ in foreach_tensor_lerp_list_cuda_() local
149 tensor_lists, in foreach_tensor_lerp_list_cuda_()
DAmpKernels.cu106 std::vector<std::vector<at::Tensor>> tensor_lists; in _amp_foreach_non_finite_check_and_unscale_cuda_() local
119 tensor_lists.emplace_back(scaled_grads.vec()); in _amp_foreach_non_finite_check_and_unscale_cuda_()
127 tensor_lists.resize(1); in _amp_foreach_non_finite_check_and_unscale_cuda_()
128 tensor_lists[0].reserve(scaled_grads.size()); in _amp_foreach_non_finite_check_and_unscale_cuda_()
142 tensor_lists[0].push_back(t); in _amp_foreach_non_finite_check_and_unscale_cuda_()
145 if (tensor_lists[0].size() == 0) { in _amp_foreach_non_finite_check_and_unscale_cuda_()
151 tensor_lists[0][0].scalar_type(), in _amp_foreach_non_finite_check_and_unscale_cuda_()
153 [&tensor_lists, &found_inf, &inv_scale] { in _amp_foreach_non_finite_check_and_unscale_cuda_()
160 multi_tensor_apply<1>(tensor_lists, in _amp_foreach_non_finite_check_and_unscale_cuda_()
DForeachBinaryOpScalarTensor.cu38 std::vector<std::vector<at::Tensor>> tensor_lists; in foreach_binary_op() local
45 tensor_lists.emplace_back(tensors.vec()); in foreach_binary_op()
46 tensor_lists.emplace_back(std::move(vec_res)); in foreach_binary_op()
50 tensor_lists, in foreach_binary_op()
59 return tensor_lists[1]; in foreach_binary_op()
80 std::vector<std::vector<at::Tensor>> tensor_lists; in foreach_binary_op_() local
81 tensor_lists.emplace_back(tensors.vec()); in foreach_binary_op_()
85 tensor_lists, in foreach_binary_op_()
DForeachBinaryOpList.cu32 std::vector<std::vector<at::Tensor>> tensor_lists; in foreach_tensor_list_op() local
39 tensor_lists.emplace_back(tensors1.vec()); in foreach_tensor_list_op()
40 tensor_lists.emplace_back(tensors2.vec()); in foreach_tensor_list_op()
41 tensor_lists.emplace_back(std::move(vec_res)); in foreach_tensor_list_op()
45 tensor_lists, in foreach_tensor_list_op()
54 return tensor_lists[2]; in foreach_tensor_list_op()
62 std::vector<std::vector<at::Tensor>> tensor_lists; in foreach_tensor_list_op_() local
63 tensor_lists.emplace_back(tensors1.vec()); in foreach_tensor_list_op_()
64 tensor_lists.emplace_back(tensors2.vec()); in foreach_tensor_list_op_()
68 tensor_lists, in foreach_tensor_list_op_()
[all …]
DForeachUnaryOp.cu48 std::vector<std::vector<at::Tensor>> tensor_lists; in foreach_unary_op() local
55 tensor_lists.emplace_back(tensors.vec()); in foreach_unary_op()
56 tensor_lists.emplace_back(std::move(vec_res)); in foreach_unary_op()
60 tensor_lists, in foreach_unary_op()
68 return tensor_lists[1]; in foreach_unary_op()
73 std::vector<std::vector<at::Tensor>> tensor_lists; in foreach_unary_op_() local
74 tensor_lists.emplace_back(tensors.vec()); in foreach_unary_op_()
77 tensor_lists, in foreach_unary_op_()
388 std::vector<std::vector<at::Tensor>> tensor_lists; in foreach_tensor_zero_cuda_() local
389 tensor_lists.emplace_back(tensors.vec()); in foreach_tensor_zero_cuda_()
[all …]
DForeachBinaryOpScalarList.cu28 std::vector<std::vector<at::Tensor>> tensor_lists; in foreach_binary_op() local
35 tensor_lists.emplace_back(tensors.vec()); in foreach_binary_op()
36 tensor_lists.emplace_back(vec_res); in foreach_binary_op()
40 tensor_lists, in foreach_binary_op()
49 return tensor_lists[1]; in foreach_binary_op()
54 std::vector<std::vector<at::Tensor>> tensor_lists; in foreach_binary_op_() local
55 tensor_lists.emplace_back(tensors.vec()); in foreach_binary_op_()
59 tensor_lists, in foreach_binary_op_()
DForeachBinaryOpScalar.cu28 std::vector<std::vector<at::Tensor>> tensor_lists; in foreach_binary_op() local
35 tensor_lists.emplace_back(tensors.vec()); in foreach_binary_op()
36 tensor_lists.emplace_back(std::move(vec_res)); in foreach_binary_op()
40 tensor_lists, in foreach_binary_op()
48 return tensor_lists[1]; in foreach_binary_op()
53 std::vector<std::vector<at::Tensor>> tensor_lists; in foreach_binary_op_() local
54 tensor_lists.emplace_back(tensors.vec()); in foreach_binary_op_()
58 tensor_lists, in foreach_binary_op_()
Dfused_adamw_impl.cu26 std::vector<std::vector<at::Tensor>> tensor_lists{ in _fused_adamw_cuda_impl_() local
42 tensor_lists, in _fused_adamw_cuda_impl_()
72 std::vector<std::vector<at::Tensor>> tensor_lists{ in _fused_adamw_cuda_impl_() local
88 tensor_lists, in _fused_adamw_cuda_impl_()
Dfused_adam_amsgrad_impl.cu26 std::vector<std::vector<at::Tensor>> tensor_lists{ in _fused_adam_amsgrad_cuda_impl_() local
46 tensor_lists, in _fused_adam_amsgrad_cuda_impl_()
77 std::vector<std::vector<at::Tensor>> tensor_lists{ in _fused_adam_amsgrad_cuda_impl_() local
97 tensor_lists, in _fused_adam_amsgrad_cuda_impl_()
Dfused_adam_impl.cu25 std::vector<std::vector<at::Tensor>> tensor_lists{ in _fused_adam_cuda_impl_() local
41 tensor_lists, in _fused_adam_cuda_impl_()
71 std::vector<std::vector<at::Tensor>> tensor_lists{ in _fused_adam_cuda_impl_() local
87 tensor_lists, in _fused_adam_cuda_impl_()
Dfused_adamw_amsgrad_impl.cu27 std::vector<std::vector<at::Tensor>> tensor_lists{ in _fused_adamw_amsgrad_cuda_impl_() local
47 tensor_lists, in _fused_adamw_amsgrad_cuda_impl_()
78 std::vector<std::vector<at::Tensor>> tensor_lists{ in _fused_adamw_amsgrad_cuda_impl_() local
98 tensor_lists, in _fused_adamw_amsgrad_cuda_impl_()
DFusedSgdKernel.cu167 std::vector<std::vector<at::Tensor>> tensor_lists{ in _fused_sgd_with_momentum_kernel_cuda_() local
176 tensor_lists, in _fused_sgd_with_momentum_kernel_cuda_()
241 std::vector<std::vector<at::Tensor>> tensor_lists{ in _fused_sgd_with_momentum_kernel_cuda_() local
250 tensor_lists, in _fused_sgd_with_momentum_kernel_cuda_()
308 std::vector<std::vector<at::Tensor>> tensor_lists{params.vec(), grads.vec()}; in _fused_sgd_kernel_cuda_() local
316 tensor_lists, in _fused_sgd_kernel_cuda_()
400 std::vector<std::vector<at::Tensor>> tensor_lists{params.vec(), grads.vec()}; in _fused_sgd_kernel_cuda_() local
408 tensor_lists, in _fused_sgd_kernel_cuda_()
DForeachReduceOp.cu172 auto tensor_lists = std::vector<std::vector<Tensor>>{tensors.vec()}; in foreach_tensor_max_cuda()
178 tensor_lists[0][0].scalar_type(), in foreach_tensor_max_cuda()
182 tensor_lists, in foreach_tensor_max_cuda()
466 auto tensor_lists = std::vector<std::vector<Tensor>>{tensors.vec()}; in foreach_tensor_norm_cuda()
471 tensor_lists[0][0].scalar_type(), in foreach_tensor_norm_cuda()
480 tensor_lists, in foreach_tensor_norm_cuda()
486 tensor_lists, in foreach_tensor_norm_cuda()
492 tensor_lists, in foreach_tensor_norm_cuda()
/external/pytorch/aten/src/ATen/native/mps/operations/
DMultiTensorApply.h211 std::vector<std::vector<at::Tensor>>& tensor_lists,
216 const auto num_tensors = tensor_lists[0].size();
223 tensor_lists.size() == depth,
227tensor_lists[d][0].scalar_type() == at::ScalarType::Float || tensor_lists[d][0].scalar_type() == a…
249 getMPSProfiler().beginProfileKernel(fusedOptimizerPSO, kernel_name, {tensor_lists[0]});
264 if (tensor_lists[0][tensor_index].numel() == 0) {
269 … mtl_setBuffer(tensorArgumentEncoder, tensor_lists[d][tensor_index], d * kmaxTensors + tensor_loc);
270 …[computeEncoder useResource:getMTLBufferStorage(tensor_lists[d][tensor_index]) usage:MTLResourceUs…
276 metadata_arguments.numels[tensor_loc] = tensor_lists[0][tensor_index].numel();
280 const auto numel = tensor_lists[0][tensor_index].numel();
[all …]
DFusedSgdKernel.mm32 …std::vector<std::vector<Tensor>> tensor_lists{params.vec(), grads.vec(), momentum_buffer_list.vec(…
39 tensor_lists,
66 …std::vector<std::vector<Tensor>> tensor_lists{params.vec(), grads.vec(), momentum_buffer_list.vec(…
73 tensor_lists,
123 std::vector<std::vector<Tensor>> tensor_lists{params.vec(), grads.vec()};
130 tensor_lists,
188 std::vector<std::vector<Tensor>> tensor_lists{params.vec(), grads.vec()};
195 tensor_lists,
DFusedAdamWAmsgradKernelImpl.mm26 std::vector<std::vector<Tensor>> tensor_lists{
33 tensor_lists,
57 std::vector<std::vector<Tensor>> tensor_lists{
64 tensor_lists,
DFusedAdamAmsgradKernelImpl.mm26 std::vector<std::vector<Tensor>> tensor_lists{
33 tensor_lists,
58 std::vector<std::vector<Tensor>> tensor_lists{
65 tensor_lists,
DFusedAdamWKernelImpl.mm25 …std::vector<std::vector<Tensor>> tensor_lists{params.vec(), grads.vec(), exp_avgs.vec(), exp_avg_s…
31 tensor_lists,
55 …std::vector<std::vector<Tensor>> tensor_lists{params.vec(), grads.vec(), exp_avgs.vec(), exp_avg_s…
61 tensor_lists,
DFusedAdamKernelImpl.mm25 …std::vector<std::vector<Tensor>> tensor_lists{params.vec(), grads.vec(), exp_avgs.vec(), exp_avg_s…
31 tensor_lists,
55 …std::vector<std::vector<Tensor>> tensor_lists{params.vec(), grads.vec(), exp_avgs.vec(), exp_avg_s…
61 tensor_lists,
/external/pytorch/test/distributed/
Dtest_c10d_ops_nccl.py754 tensor_lists = []
761 tensor_lists.append([t.cuda(device=gpu) for t in input_per_gpu])
763 reduce_scatter(output, tensor_lists, c10d.ReduceOp.SUM)
776 reduce_scatter(output, tensor_lists, c10d.ReduceOp.MIN)
783 reduce_scatter(output, tensor_lists, c10d.ReduceOp.MAX)
790 reduce_scatter(output, tensor_lists, c10d.ReduceOp.PRODUCT)
842 tensor_lists = [[t.float() for t in tl] for tl in tensor_lists]
845 [t.float() * factor_ref for t in tl] for tl in tensor_lists
847 reduce_scatter(output, tensor_lists, c10d._make_nccl_premul_sum(factor))
/external/pytorch/torch/_subclasses/
Dfake_impls.py527 tensor_lists = []
534 tensor_lists.append(arg)
545 assert tensor_lists
549 device, _ = FakeTensor._find_common_device(func, [tl[i] for tl in tensor_lists])
/external/pytorch/test/
Dtest_nestedtensor.py4198 tensor_lists = self._get_example_tensor_lists(
4204 tensor_lists, reduce_dims
4269 tensor_lists = self._get_example_tensor_lists(
4275 tensor_lists, reduce_dims
4319 tensor_lists = self._get_example_tensor_lists(
4326 for tensor_list in tensor_lists:
4355 tensor_lists = self._get_example_tensor_lists(
4363 for tensor_list in tensor_lists:
4395 tensor_lists = self._get_example_tensor_lists(
4402 for tensor_list in tensor_lists:
[all …]
/external/pytorch/test/cpp/c10d/
DProcessGroupNCCLTest.cpp148 std::vector<std::vector<at::Tensor>>& tensor_lists) { in getTensorLists() argument
161 outputs[i][j] = tensor_lists[i][j].cpu(); in getTensorLists()