/external/pytorch/torch/nn/parallel/ |
D | comm.py | 70 def reduce_add(inputs, destination=None): function 146 result = reduce_add(tensor_at_gpus, destination) # this will be sparse too 159 flat_result = reduce_add(flat_tensors, destination)
|
/external/cronet/tot/third_party/rust/chromium_crates_io/vendor/simd-adler32-0.3.7/src/imp/ |
D | avx2.rs | 136 *a += reduce_add(a_v); in reduce_add_blocks() 137 *b = reduce_add(b_v); in reduce_add_blocks() 143 unsafe fn reduce_add(v: __m256i) -> u32 { in reduce_add() function
|
D | ssse3.rs | 140 *a += reduce_add(a_v); in reduce_add_blocks() 141 *b = reduce_add(b_v); in reduce_add_blocks() 147 unsafe fn reduce_add(v: __m128i) -> u32 { in reduce_add() function
|
D | wasm.rs | 119 *a += reduce_add(a_v); in reduce_add_blocks() 120 *b = reduce_add(b_v); in reduce_add_blocks() 145 fn reduce_add(v: v128) -> u32 { in reduce_add() function
|
D | avx512.rs | 154 *a += reduce_add(a_v); in reduce_add_blocks() 155 *b = reduce_add(b_v); in reduce_add_blocks() 161 unsafe fn reduce_add(v: __m512i) -> u32 { in reduce_add() function
|
D | sse2.rs | 139 *a += reduce_add(a_v); in reduce_add_blocks() 140 *b = reduce_add(b_v); in reduce_add_blocks() 160 unsafe fn reduce_add(v: __m128i) -> u32 { in reduce_add() function
|
D | neon.rs | 153 *a += reduce_add(a_v); in reduce_add_blocks() 154 *b = reduce_add(b_v); in reduce_add_blocks() 160 unsafe fn reduce_add(v: __m512i) -> u32 { in reduce_add() function
|
/external/cronet/stable/third_party/rust/chromium_crates_io/vendor/simd-adler32-0.3.7/src/imp/ |
D | avx2.rs | 136 *a += reduce_add(a_v); in reduce_add_blocks() 137 *b = reduce_add(b_v); in reduce_add_blocks() 143 unsafe fn reduce_add(v: __m256i) -> u32 { in reduce_add() function
|
D | wasm.rs | 119 *a += reduce_add(a_v); in reduce_add_blocks() 120 *b = reduce_add(b_v); in reduce_add_blocks() 145 fn reduce_add(v: v128) -> u32 { in reduce_add() function
|
D | ssse3.rs | 140 *a += reduce_add(a_v); in reduce_add_blocks() 141 *b = reduce_add(b_v); in reduce_add_blocks() 147 unsafe fn reduce_add(v: __m128i) -> u32 { in reduce_add() function
|
D | avx512.rs | 154 *a += reduce_add(a_v); in reduce_add_blocks() 155 *b = reduce_add(b_v); in reduce_add_blocks() 161 unsafe fn reduce_add(v: __m512i) -> u32 { in reduce_add() function
|
D | neon.rs | 153 *a += reduce_add(a_v); in reduce_add_blocks() 154 *b = reduce_add(b_v); in reduce_add_blocks() 160 unsafe fn reduce_add(v: __m512i) -> u32 { in reduce_add() function
|
D | sse2.rs | 139 *a += reduce_add(a_v); in reduce_add_blocks() 140 *b = reduce_add(b_v); in reduce_add_blocks() 160 unsafe fn reduce_add(v: __m128i) -> u32 { in reduce_add() function
|
/external/rust/android-crates-io/crates/simd-adler32/src/imp/ |
D | avx2.rs | 136 *a += reduce_add(a_v); in reduce_add_blocks() 137 *b = reduce_add(b_v); in reduce_add_blocks() 143 unsafe fn reduce_add(v: __m256i) -> u32 { in reduce_add() function
|
D | ssse3.rs | 140 *a += reduce_add(a_v); in reduce_add_blocks() 141 *b = reduce_add(b_v); in reduce_add_blocks() 147 unsafe fn reduce_add(v: __m128i) -> u32 { in reduce_add() function
|
D | wasm.rs | 119 *a += reduce_add(a_v); in reduce_add_blocks() 120 *b = reduce_add(b_v); in reduce_add_blocks() 145 fn reduce_add(v: v128) -> u32 { in reduce_add() function
|
D | neon.rs | 153 *a += reduce_add(a_v); in reduce_add_blocks() 154 *b = reduce_add(b_v); in reduce_add_blocks() 160 unsafe fn reduce_add(v: __m512i) -> u32 { in reduce_add() function
|
D | avx512.rs | 154 *a += reduce_add(a_v); in reduce_add_blocks() 155 *b = reduce_add(b_v); in reduce_add_blocks() 161 unsafe fn reduce_add(v: __m512i) -> u32 { in reduce_add() function
|
D | sse2.rs | 139 *a += reduce_add(a_v); in reduce_add_blocks() 140 *b = reduce_add(b_v); in reduce_add_blocks() 160 unsafe fn reduce_add(v: __m128i) -> u32 { in reduce_add() function
|
/external/pytorch/torch/cuda/ |
D | comm.py | 6 reduce_add,
|
/external/pytorch/aten/src/ATen/native/cuda/ |
D | ScatterGatherKernel.cu | 38 static ReduceAdd reduce_add; variable 497 "scatter_add_cuda_", reduce_add); in scatter_add_cuda_kernel() 508 "scatter_reduce_cuda_add_", reduce_add); in scatter_reduce_cuda_kernel() 525 "scatter_reduce_cuda_sum_", reduce_add); in scatter_reduce_two_cuda_kernel() 553 "scatter_fill_cuda_add_", reduce_add); in scatter_scalar_reduce_cuda_kernel()
|
/external/tensorflow/tensorflow/compiler/xla/tests/ |
D | value_inference_test.cc | 147 XlaBuilder reduce_add("reduce_add"); in TEST_F() local 148 auto p0 = Parameter(&reduce_add, 0, ShapeUtil::MakeScalarShape(S32), "p"); in TEST_F() 149 auto p1 = Parameter(&reduce_add, 1, ShapeUtil::MakeScalarShape(S32), "p"); in TEST_F() 150 auto p2 = Parameter(&reduce_add, 2, ShapeUtil::MakeScalarShape(S32), "p"); in TEST_F() 151 auto p3 = Parameter(&reduce_add, 3, ShapeUtil::MakeScalarShape(S32), "p"); in TEST_F() 156 Tuple(&reduce_add, {reduce_result, reduce_result}); in TEST_F() 159 reduce_add.Build().value(), {1}); in TEST_F()
|
/external/pytorch/aten/src/ATen/native/cpu/ |
D | ScatterGatherKernel.cpp | 54 static ReduceAdd reduce_add; variable 895 "scatter_add_", reduce_add); in scatter_add_cpu_kernel() 903 "scatter_reduce_add_", reduce_add); in scatter_reduce_cpu_kernel() 919 "scatter_reduce_sum_", reduce_add); in scatter_reduce_two_cpu_kernel() 945 "scatter_scalar_reduce_add_", reduce_add); in scatter_scalar_reduce_cpu_kernel()
|
/external/executorch/kernels/portable/cpu/ |
D | op_native_layer_norm.cpp | 74 CTYPE sum = reduce_add(x, normalized); in layer_norm()
|
D | op_native_group_norm.cpp | 79 CTYPE sum = reduce_add(x, inner_size); in group_norm()
|