Home
last modified time | relevance | path

Searched refs:output_split_sizes (Results 1 – 16 of 16) sorted by relevance

/external/pytorch/torch/distributed/
D_functional_collectives.py446 output_split_sizes: Optional[List[int]],
466 if output_split_sizes is not None:
468 isinstance(size, (int, torch.SymInt)) for size in output_split_sizes
469 ), output_split_sizes
476 if output_split_sizes is None or input_split_sizes is None:
477 assert output_split_sizes is None and input_split_sizes is None, (
481 output_split_sizes = [self.shape[0] // group_size] * group_size
482 input_split_sizes = output_split_sizes
485 output_split_sizes,
494 output_split_sizes: Optional[List[int]],
[all …]
D_functional_collectives_impl.py93 output_split_sizes: Optional[List[int]],
99 if output_split_sizes is None or input_split_sizes is None:
100 assert output_split_sizes is None and input_split_sizes is None, (
104 output_split_sizes = [input.shape[0] // group_size] * group_size
105 input_split_sizes = output_split_sizes
110 output_split_sizes,
Ddistributed_c10d.py3883 output_split_sizes=None, argument
3992 output_split_sizes = [] if output_split_sizes is None else output_split_sizes
3997 output, input, output_split_sizes, input_split_sizes, opts
/external/pytorch/torch/distributed/_shard/sharding_spec/chunk_sharding_spec_ops/
D_common.py190 output_split_sizes = [0] * world_size
192 output_split_sizes[placement.rank()] = get_chunked_dim_size(
198 output, combined_results, output_split_sizes=output_split_sizes, group=pg
211 dim_size = output_split_sizes[placement.rank()]
214 for i, split_size in enumerate(output_split_sizes)
/external/pytorch/torch/distributed/nn/
Dfunctional.py177 output_split_sizes=None, argument
201 group, output, output_split_sizes, input_split_sizes, input
411 def forward(ctx, group, output, output_split_sizes, input_split_sizes, input): argument
414 ctx.output_split_sizes = input_split_sizes
415 ctx.input_split_sizes = output_split_sizes
419 output_split_sizes=output_split_sizes,
434 ctx.output_split_sizes,
/external/pytorch/torch/csrc/distributed/c10d/
DFunctional.cpp259 std::vector<int64_t> output_split_sizes, in all_to_all_single() argument
265 output_split_sizes.begin(), output_split_sizes.end(), int64_t(0)); in all_to_all_single()
273 output_split_sizes, in all_to_all_single()
403 std::vector<int64_t> output_split_sizes, in forward() argument
410 ctx->saved_data["input_split_sizes"] = output_split_sizes; in forward()
416 .call(input, output_split_sizes, input_split_sizes, group_name); in forward()
422 const std::vector<int64_t>& output_split_sizes = in backward() local
435 .call(grad_out, output_split_sizes, input_split_sizes, group_name); in backward()
450 const std::vector<int64_t>& output_split_sizes, in all_to_all_single_autograd() argument
454 input, output_split_sizes, input_split_sizes, group_name); in all_to_all_single_autograd()
DOps.cpp414 std::vector<int64_t> output_split_sizes, \
421 output_split_sizes, \
/external/pytorch/torch/distributed/_shard/sharded_tensor/
Dreshard.py128 output_split_sizes = [0] * world_size
131 output_split_sizes[new_rank] = sharded_dim_size
144 output_split_sizes=output_split_sizes,
/external/pytorch/test/distributed/
Dtest_c10d_functional_native.py356 output_split_sizes = send_sz_matrix[:, self.rank].tolist()
361 output_split_sizes,
369 for rank, sz in enumerate(output_split_sizes)
376 input, output_split_sizes, input_split_sizes, "default"
780 output_split_sizes: torch.Tensor,
785 _tolist_with_constrain_as_size(output_split_sizes),
795 output_split_sizes = send_sz_matrix[:, self.rank].contiguous()
805 compiled, input, output_split_sizes, input_split_sizes
Dtest_functional_api.py525 x, output_split_sizes=split_sizes, input_split_sizes=split_sizes, group=mesh
543 x, output_split_sizes=split_sizes, input_split_sizes=split_sizes, group=mesh
559 x, output_split_sizes=None, input_split_sizes=None, group=mesh
Dtest_c10d_spawn.py242 y, x, output_split_sizes=split_sizes, input_split_sizes=split_sizes
Dtest_inductor_collectives.py410 output_split_sizes = _tolist_with_constrain_as_size(
415 output_split_sizes,
/external/pytorch/torch/testing/_internal/distributed/
Dmulti_threaded_pg.py84 output_buffer, _, output_split_sizes, _ = data[dest_rank]
86 … output_indexes = self._size_cumsum(output_buffer.size(0), output_split_sizes, world_size)
321 output_split_sizes: Optional[List[int]],
326 … res = coll.join(self._rank, (output_buffer, input_buffer, output_split_sizes, input_split_sizes))
Ddistributed_test.py3514 output_split_sizes = []
3516 output_split_sizes.append(dst + 1)
3517 sum_len = sum(output_split_sizes)
3523 output_split_sizes[rank], sum_len, sum_len, dtype=torch.float
3531 list(torch.split(out_tensor, output_split_sizes)),
/external/pytorch/torch/_C/
D_distributed_c10d.pyi462 output_split_sizes: list[int],
471 output_split_sizes: list[int],
/external/pytorch/torch/_inductor/
Dlowering.py6412 def _all_to_all_single(inp, output_split_sizes, input_split_sizes, group_name): argument
6417 output_split_sizes,