/external/pytorch/aten/src/ATen/native/cuda/ |
D | ForeachPointwiseOp.cu | 30 std::vector<std::vector<at::Tensor>> tensor_lists; in foreach_pointwise_op() local 37 tensor_lists.emplace_back(input.vec()); in foreach_pointwise_op() 38 tensor_lists.emplace_back(tensors1.vec()); in foreach_pointwise_op() 39 tensor_lists.emplace_back(tensors2.vec()); in foreach_pointwise_op() 40 tensor_lists.emplace_back(std::move(vec_res)); in foreach_pointwise_op() 50 tensor_lists, in foreach_pointwise_op() 60 return tensor_lists[3]; in foreach_pointwise_op() 69 std::vector<std::vector<at::Tensor>> tensor_lists; in foreach_pointwise_op_() local 70 tensor_lists.emplace_back(input.vec()); in foreach_pointwise_op_() 71 tensor_lists.emplace_back(tensors1.vec()); in foreach_pointwise_op_() [all …]
|
D | MultiTensorApply.cuh | 127 std::vector<std::vector<at::Tensor>>& tensor_lists, in multi_tensor_apply() argument 132 tensor_lists.size() == depth, in multi_tensor_apply() 134 const size_t n_tensors = tensor_lists[0].size(); in multi_tensor_apply() 142 if (tensor_lists[0][t].numel() == 0) { in multi_tensor_apply() 147 tensor_lists[0][t].numel(); in multi_tensor_apply() 150 tensor_lists[d][t].const_data_ptr(); in multi_tensor_apply() 160 const auto numel = tensor_lists[0][t].numel(); in multi_tensor_apply() 219 std::vector<std::vector<at::Tensor>>& tensor_lists, in multi_tensor_apply() argument 223 tensor_lists.size() == depth, in multi_tensor_apply() 225 const size_t n_tensors = tensor_lists[0].size(); in multi_tensor_apply() [all …]
|
D | ForeachTernaryOp.cu | 39 std::vector<std::vector<at::Tensor>> tensor_lists{ in foreach_tensor_lerp_ternary_cuda() local 50 tensor_lists, in foreach_tensor_lerp_ternary_cuda() 59 return tensor_lists[3]; in foreach_tensor_lerp_ternary_cuda() 71 std::vector<std::vector<at::Tensor>> tensor_lists{ in foreach_tensor_lerp_ternary_cuda_() local 81 tensor_lists, in foreach_tensor_lerp_ternary_cuda_() 106 std::vector<std::vector<at::Tensor>> tensor_lists{ in foreach_tensor_lerp_list_cuda() local 117 tensor_lists, in foreach_tensor_lerp_list_cuda() 127 return tensor_lists[2]; in foreach_tensor_lerp_list_cuda() 139 std::vector<std::vector<at::Tensor>> tensor_lists{ in foreach_tensor_lerp_list_cuda_() local 149 tensor_lists, in foreach_tensor_lerp_list_cuda_()
|
D | AmpKernels.cu | 106 std::vector<std::vector<at::Tensor>> tensor_lists; in _amp_foreach_non_finite_check_and_unscale_cuda_() local 119 tensor_lists.emplace_back(scaled_grads.vec()); in _amp_foreach_non_finite_check_and_unscale_cuda_() 127 tensor_lists.resize(1); in _amp_foreach_non_finite_check_and_unscale_cuda_() 128 tensor_lists[0].reserve(scaled_grads.size()); in _amp_foreach_non_finite_check_and_unscale_cuda_() 142 tensor_lists[0].push_back(t); in _amp_foreach_non_finite_check_and_unscale_cuda_() 145 if (tensor_lists[0].size() == 0) { in _amp_foreach_non_finite_check_and_unscale_cuda_() 151 tensor_lists[0][0].scalar_type(), in _amp_foreach_non_finite_check_and_unscale_cuda_() 153 [&tensor_lists, &found_inf, &inv_scale] { in _amp_foreach_non_finite_check_and_unscale_cuda_() 160 multi_tensor_apply<1>(tensor_lists, in _amp_foreach_non_finite_check_and_unscale_cuda_()
|
D | ForeachBinaryOpScalarTensor.cu | 38 std::vector<std::vector<at::Tensor>> tensor_lists; in foreach_binary_op() local 45 tensor_lists.emplace_back(tensors.vec()); in foreach_binary_op() 46 tensor_lists.emplace_back(std::move(vec_res)); in foreach_binary_op() 50 tensor_lists, in foreach_binary_op() 59 return tensor_lists[1]; in foreach_binary_op() 80 std::vector<std::vector<at::Tensor>> tensor_lists; in foreach_binary_op_() local 81 tensor_lists.emplace_back(tensors.vec()); in foreach_binary_op_() 85 tensor_lists, in foreach_binary_op_()
|
D | ForeachBinaryOpList.cu | 32 std::vector<std::vector<at::Tensor>> tensor_lists; in foreach_tensor_list_op() local 39 tensor_lists.emplace_back(tensors1.vec()); in foreach_tensor_list_op() 40 tensor_lists.emplace_back(tensors2.vec()); in foreach_tensor_list_op() 41 tensor_lists.emplace_back(std::move(vec_res)); in foreach_tensor_list_op() 45 tensor_lists, in foreach_tensor_list_op() 54 return tensor_lists[2]; in foreach_tensor_list_op() 62 std::vector<std::vector<at::Tensor>> tensor_lists; in foreach_tensor_list_op_() local 63 tensor_lists.emplace_back(tensors1.vec()); in foreach_tensor_list_op_() 64 tensor_lists.emplace_back(tensors2.vec()); in foreach_tensor_list_op_() 68 tensor_lists, in foreach_tensor_list_op_() [all …]
|
D | ForeachUnaryOp.cu | 48 std::vector<std::vector<at::Tensor>> tensor_lists; in foreach_unary_op() local 55 tensor_lists.emplace_back(tensors.vec()); in foreach_unary_op() 56 tensor_lists.emplace_back(std::move(vec_res)); in foreach_unary_op() 60 tensor_lists, in foreach_unary_op() 68 return tensor_lists[1]; in foreach_unary_op() 73 std::vector<std::vector<at::Tensor>> tensor_lists; in foreach_unary_op_() local 74 tensor_lists.emplace_back(tensors.vec()); in foreach_unary_op_() 77 tensor_lists, in foreach_unary_op_() 388 std::vector<std::vector<at::Tensor>> tensor_lists; in foreach_tensor_zero_cuda_() local 389 tensor_lists.emplace_back(tensors.vec()); in foreach_tensor_zero_cuda_() [all …]
|
D | ForeachBinaryOpScalarList.cu | 28 std::vector<std::vector<at::Tensor>> tensor_lists; in foreach_binary_op() local 35 tensor_lists.emplace_back(tensors.vec()); in foreach_binary_op() 36 tensor_lists.emplace_back(vec_res); in foreach_binary_op() 40 tensor_lists, in foreach_binary_op() 49 return tensor_lists[1]; in foreach_binary_op() 54 std::vector<std::vector<at::Tensor>> tensor_lists; in foreach_binary_op_() local 55 tensor_lists.emplace_back(tensors.vec()); in foreach_binary_op_() 59 tensor_lists, in foreach_binary_op_()
|
D | ForeachBinaryOpScalar.cu | 28 std::vector<std::vector<at::Tensor>> tensor_lists; in foreach_binary_op() local 35 tensor_lists.emplace_back(tensors.vec()); in foreach_binary_op() 36 tensor_lists.emplace_back(std::move(vec_res)); in foreach_binary_op() 40 tensor_lists, in foreach_binary_op() 48 return tensor_lists[1]; in foreach_binary_op() 53 std::vector<std::vector<at::Tensor>> tensor_lists; in foreach_binary_op_() local 54 tensor_lists.emplace_back(tensors.vec()); in foreach_binary_op_() 58 tensor_lists, in foreach_binary_op_()
|
D | fused_adamw_impl.cu | 26 std::vector<std::vector<at::Tensor>> tensor_lists{ in _fused_adamw_cuda_impl_() local 42 tensor_lists, in _fused_adamw_cuda_impl_() 72 std::vector<std::vector<at::Tensor>> tensor_lists{ in _fused_adamw_cuda_impl_() local 88 tensor_lists, in _fused_adamw_cuda_impl_()
|
D | fused_adam_amsgrad_impl.cu | 26 std::vector<std::vector<at::Tensor>> tensor_lists{ in _fused_adam_amsgrad_cuda_impl_() local 46 tensor_lists, in _fused_adam_amsgrad_cuda_impl_() 77 std::vector<std::vector<at::Tensor>> tensor_lists{ in _fused_adam_amsgrad_cuda_impl_() local 97 tensor_lists, in _fused_adam_amsgrad_cuda_impl_()
|
D | fused_adam_impl.cu | 25 std::vector<std::vector<at::Tensor>> tensor_lists{ in _fused_adam_cuda_impl_() local 41 tensor_lists, in _fused_adam_cuda_impl_() 71 std::vector<std::vector<at::Tensor>> tensor_lists{ in _fused_adam_cuda_impl_() local 87 tensor_lists, in _fused_adam_cuda_impl_()
|
D | fused_adamw_amsgrad_impl.cu | 27 std::vector<std::vector<at::Tensor>> tensor_lists{ in _fused_adamw_amsgrad_cuda_impl_() local 47 tensor_lists, in _fused_adamw_amsgrad_cuda_impl_() 78 std::vector<std::vector<at::Tensor>> tensor_lists{ in _fused_adamw_amsgrad_cuda_impl_() local 98 tensor_lists, in _fused_adamw_amsgrad_cuda_impl_()
|
D | FusedSgdKernel.cu | 167 std::vector<std::vector<at::Tensor>> tensor_lists{ in _fused_sgd_with_momentum_kernel_cuda_() local 176 tensor_lists, in _fused_sgd_with_momentum_kernel_cuda_() 241 std::vector<std::vector<at::Tensor>> tensor_lists{ in _fused_sgd_with_momentum_kernel_cuda_() local 250 tensor_lists, in _fused_sgd_with_momentum_kernel_cuda_() 308 std::vector<std::vector<at::Tensor>> tensor_lists{params.vec(), grads.vec()}; in _fused_sgd_kernel_cuda_() local 316 tensor_lists, in _fused_sgd_kernel_cuda_() 400 std::vector<std::vector<at::Tensor>> tensor_lists{params.vec(), grads.vec()}; in _fused_sgd_kernel_cuda_() local 408 tensor_lists, in _fused_sgd_kernel_cuda_()
|
D | ForeachReduceOp.cu | 172 auto tensor_lists = std::vector<std::vector<Tensor>>{tensors.vec()}; in foreach_tensor_max_cuda() 178 tensor_lists[0][0].scalar_type(), in foreach_tensor_max_cuda() 182 tensor_lists, in foreach_tensor_max_cuda() 466 auto tensor_lists = std::vector<std::vector<Tensor>>{tensors.vec()}; in foreach_tensor_norm_cuda() 471 tensor_lists[0][0].scalar_type(), in foreach_tensor_norm_cuda() 480 tensor_lists, in foreach_tensor_norm_cuda() 486 tensor_lists, in foreach_tensor_norm_cuda() 492 tensor_lists, in foreach_tensor_norm_cuda()
|
/external/pytorch/aten/src/ATen/native/mps/operations/ |
D | MultiTensorApply.h | 211 std::vector<std::vector<at::Tensor>>& tensor_lists, 216 const auto num_tensors = tensor_lists[0].size(); 223 tensor_lists.size() == depth, 227 …tensor_lists[d][0].scalar_type() == at::ScalarType::Float || tensor_lists[d][0].scalar_type() == a… 249 getMPSProfiler().beginProfileKernel(fusedOptimizerPSO, kernel_name, {tensor_lists[0]}); 264 if (tensor_lists[0][tensor_index].numel() == 0) { 269 … mtl_setBuffer(tensorArgumentEncoder, tensor_lists[d][tensor_index], d * kmaxTensors + tensor_loc); 270 …[computeEncoder useResource:getMTLBufferStorage(tensor_lists[d][tensor_index]) usage:MTLResourceUs… 276 metadata_arguments.numels[tensor_loc] = tensor_lists[0][tensor_index].numel(); 280 const auto numel = tensor_lists[0][tensor_index].numel(); [all …]
|
D | FusedSgdKernel.mm | 32 …std::vector<std::vector<Tensor>> tensor_lists{params.vec(), grads.vec(), momentum_buffer_list.vec(… 39 tensor_lists, 66 …std::vector<std::vector<Tensor>> tensor_lists{params.vec(), grads.vec(), momentum_buffer_list.vec(… 73 tensor_lists, 123 std::vector<std::vector<Tensor>> tensor_lists{params.vec(), grads.vec()}; 130 tensor_lists, 188 std::vector<std::vector<Tensor>> tensor_lists{params.vec(), grads.vec()}; 195 tensor_lists,
|
D | FusedAdamWAmsgradKernelImpl.mm | 26 std::vector<std::vector<Tensor>> tensor_lists{ 33 tensor_lists, 57 std::vector<std::vector<Tensor>> tensor_lists{ 64 tensor_lists,
|
D | FusedAdamAmsgradKernelImpl.mm | 26 std::vector<std::vector<Tensor>> tensor_lists{ 33 tensor_lists, 58 std::vector<std::vector<Tensor>> tensor_lists{ 65 tensor_lists,
|
D | FusedAdamWKernelImpl.mm | 25 …std::vector<std::vector<Tensor>> tensor_lists{params.vec(), grads.vec(), exp_avgs.vec(), exp_avg_s… 31 tensor_lists, 55 …std::vector<std::vector<Tensor>> tensor_lists{params.vec(), grads.vec(), exp_avgs.vec(), exp_avg_s… 61 tensor_lists,
|
D | FusedAdamKernelImpl.mm | 25 …std::vector<std::vector<Tensor>> tensor_lists{params.vec(), grads.vec(), exp_avgs.vec(), exp_avg_s… 31 tensor_lists, 55 …std::vector<std::vector<Tensor>> tensor_lists{params.vec(), grads.vec(), exp_avgs.vec(), exp_avg_s… 61 tensor_lists,
|
/external/pytorch/test/distributed/ |
D | test_c10d_ops_nccl.py | 754 tensor_lists = [] 761 tensor_lists.append([t.cuda(device=gpu) for t in input_per_gpu]) 763 reduce_scatter(output, tensor_lists, c10d.ReduceOp.SUM) 776 reduce_scatter(output, tensor_lists, c10d.ReduceOp.MIN) 783 reduce_scatter(output, tensor_lists, c10d.ReduceOp.MAX) 790 reduce_scatter(output, tensor_lists, c10d.ReduceOp.PRODUCT) 842 tensor_lists = [[t.float() for t in tl] for tl in tensor_lists] 845 [t.float() * factor_ref for t in tl] for tl in tensor_lists 847 reduce_scatter(output, tensor_lists, c10d._make_nccl_premul_sum(factor))
|
/external/pytorch/torch/_subclasses/ |
D | fake_impls.py | 527 tensor_lists = [] 534 tensor_lists.append(arg) 545 assert tensor_lists 549 device, _ = FakeTensor._find_common_device(func, [tl[i] for tl in tensor_lists])
|
/external/pytorch/test/ |
D | test_nestedtensor.py | 4198 tensor_lists = self._get_example_tensor_lists( 4204 tensor_lists, reduce_dims 4269 tensor_lists = self._get_example_tensor_lists( 4275 tensor_lists, reduce_dims 4319 tensor_lists = self._get_example_tensor_lists( 4326 for tensor_list in tensor_lists: 4355 tensor_lists = self._get_example_tensor_lists( 4363 for tensor_list in tensor_lists: 4395 tensor_lists = self._get_example_tensor_lists( 4402 for tensor_list in tensor_lists: [all …]
|
/external/pytorch/test/cpp/c10d/ |
D | ProcessGroupNCCLTest.cpp | 148 std::vector<std::vector<at::Tensor>>& tensor_lists) { in getTensorLists() argument 161 outputs[i][j] = tensor_lists[i][j].cpu(); in getTensorLists()
|