1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #ifndef _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_TRANSFORM_REDUCE_H
10 #define _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_TRANSFORM_REDUCE_H
11
12 #include <__algorithm/pstl_backends/cpu_backends/backend.h>
13 #include <__config>
14 #include <__iterator/concepts.h>
15 #include <__iterator/iterator_traits.h>
16 #include <__numeric/transform_reduce.h>
17 #include <__type_traits/is_arithmetic.h>
18 #include <__type_traits/is_execution_policy.h>
19 #include <__type_traits/operation_traits.h>
20 #include <__utility/move.h>
21 #include <new>
22 #include <optional>
23
24 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
25 # pragma GCC system_header
26 #endif
27
28 #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
29
30 _LIBCPP_BEGIN_NAMESPACE_STD
31
32 template <typename _DifferenceType,
33 typename _Tp,
34 typename _BinaryOperation,
35 typename _UnaryOperation,
36 typename _UnaryResult = invoke_result_t<_UnaryOperation, _DifferenceType>,
37 __enable_if_t<__desugars_to<__plus_tag, _BinaryOperation, _Tp, _UnaryResult>::value && is_arithmetic_v<_Tp> &&
38 is_arithmetic_v<_UnaryResult>,
39 int> = 0>
40 _LIBCPP_HIDE_FROM_ABI _Tp
__simd_transform_reduce(_DifferenceType __n,_Tp __init,_BinaryOperation,_UnaryOperation __f)41 __simd_transform_reduce(_DifferenceType __n, _Tp __init, _BinaryOperation, _UnaryOperation __f) noexcept {
42 _PSTL_PRAGMA_SIMD_REDUCTION(+ : __init)
43 for (_DifferenceType __i = 0; __i < __n; ++__i)
44 __init += __f(__i);
45 return __init;
46 }
47
48 template <typename _Size,
49 typename _Tp,
50 typename _BinaryOperation,
51 typename _UnaryOperation,
52 typename _UnaryResult = invoke_result_t<_UnaryOperation, _Size>,
53 __enable_if_t<!(__desugars_to<__plus_tag, _BinaryOperation, _Tp, _UnaryResult>::value &&
54 is_arithmetic_v<_Tp> && is_arithmetic_v<_UnaryResult>),
55 int> = 0>
56 _LIBCPP_HIDE_FROM_ABI _Tp
__simd_transform_reduce(_Size __n,_Tp __init,_BinaryOperation __binary_op,_UnaryOperation __f)57 __simd_transform_reduce(_Size __n, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __f) noexcept {
58 const _Size __block_size = __lane_size / sizeof(_Tp);
59 if (__n > 2 * __block_size && __block_size > 1) {
60 alignas(__lane_size) char __lane_buffer[__lane_size];
61 _Tp* __lane = reinterpret_cast<_Tp*>(__lane_buffer);
62
63 // initializer
64 _PSTL_PRAGMA_SIMD
65 for (_Size __i = 0; __i < __block_size; ++__i) {
66 ::new (__lane + __i) _Tp(__binary_op(__f(__i), __f(__block_size + __i)));
67 }
68 // main loop
69 _Size __i = 2 * __block_size;
70 const _Size __last_iteration = __block_size * (__n / __block_size);
71 for (; __i < __last_iteration; __i += __block_size) {
72 _PSTL_PRAGMA_SIMD
73 for (_Size __j = 0; __j < __block_size; ++__j) {
74 __lane[__j] = __binary_op(std::move(__lane[__j]), __f(__i + __j));
75 }
76 }
77 // remainder
78 _PSTL_PRAGMA_SIMD
79 for (_Size __j = 0; __j < __n - __last_iteration; ++__j) {
80 __lane[__j] = __binary_op(std::move(__lane[__j]), __f(__last_iteration + __j));
81 }
82 // combiner
83 for (_Size __j = 0; __j < __block_size; ++__j) {
84 __init = __binary_op(std::move(__init), std::move(__lane[__j]));
85 }
86 // destroyer
87 _PSTL_PRAGMA_SIMD
88 for (_Size __j = 0; __j < __block_size; ++__j) {
89 __lane[__j].~_Tp();
90 }
91 } else {
92 for (_Size __i = 0; __i < __n; ++__i) {
93 __init = __binary_op(std::move(__init), __f(__i));
94 }
95 }
96 return __init;
97 }
98
99 template <class _ExecutionPolicy,
100 class _ForwardIterator1,
101 class _ForwardIterator2,
102 class _Tp,
103 class _BinaryOperation1,
104 class _BinaryOperation2>
__pstl_transform_reduce(__cpu_backend_tag,_ForwardIterator1 __first1,_ForwardIterator1 __last1,_ForwardIterator2 __first2,_Tp __init,_BinaryOperation1 __reduce,_BinaryOperation2 __transform)105 _LIBCPP_HIDE_FROM_ABI optional<_Tp> __pstl_transform_reduce(
106 __cpu_backend_tag,
107 _ForwardIterator1 __first1,
108 _ForwardIterator1 __last1,
109 _ForwardIterator2 __first2,
110 _Tp __init,
111 _BinaryOperation1 __reduce,
112 _BinaryOperation2 __transform) {
113 if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> &&
114 __has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
115 __has_random_access_iterator_category_or_concept<_ForwardIterator2>::value) {
116 return __par_backend::__parallel_transform_reduce(
117 __first1,
118 std::move(__last1),
119 [__first1, __first2, __transform](_ForwardIterator1 __iter) {
120 return __transform(*__iter, *(__first2 + (__iter - __first1)));
121 },
122 std::move(__init),
123 std::move(__reduce),
124 [__first1, __first2, __reduce, __transform](
125 _ForwardIterator1 __brick_first, _ForwardIterator1 __brick_last, _Tp __brick_init) {
126 return *std::__pstl_transform_reduce<__remove_parallel_policy_t<_ExecutionPolicy>>(
127 __cpu_backend_tag{},
128 __brick_first,
129 std::move(__brick_last),
130 __first2 + (__brick_first - __first1),
131 std::move(__brick_init),
132 std::move(__reduce),
133 std::move(__transform));
134 });
135 } else if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> &&
136 __has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
137 __has_random_access_iterator_category_or_concept<_ForwardIterator2>::value) {
138 return std::__simd_transform_reduce(
139 __last1 - __first1, std::move(__init), std::move(__reduce), [&](__iter_diff_t<_ForwardIterator1> __i) {
140 return __transform(__first1[__i], __first2[__i]);
141 });
142 } else {
143 return std::transform_reduce(
144 std::move(__first1),
145 std::move(__last1),
146 std::move(__first2),
147 std::move(__init),
148 std::move(__reduce),
149 std::move(__transform));
150 }
151 }
152
153 template <class _ExecutionPolicy, class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation>
__pstl_transform_reduce(__cpu_backend_tag,_ForwardIterator __first,_ForwardIterator __last,_Tp __init,_BinaryOperation __reduce,_UnaryOperation __transform)154 _LIBCPP_HIDE_FROM_ABI optional<_Tp> __pstl_transform_reduce(
155 __cpu_backend_tag,
156 _ForwardIterator __first,
157 _ForwardIterator __last,
158 _Tp __init,
159 _BinaryOperation __reduce,
160 _UnaryOperation __transform) {
161 if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> &&
162 __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
163 return __par_backend::__parallel_transform_reduce(
164 std::move(__first),
165 std::move(__last),
166 [__transform](_ForwardIterator __iter) { return __transform(*__iter); },
167 std::move(__init),
168 __reduce,
169 [__transform, __reduce](auto __brick_first, auto __brick_last, _Tp __brick_init) {
170 auto __res = std::__pstl_transform_reduce<__remove_parallel_policy_t<_ExecutionPolicy>>(
171 __cpu_backend_tag{},
172 std::move(__brick_first),
173 std::move(__brick_last),
174 std::move(__brick_init),
175 std::move(__reduce),
176 std::move(__transform));
177 _LIBCPP_ASSERT_INTERNAL(__res, "unseq/seq should never try to allocate!");
178 return *std::move(__res);
179 });
180 } else if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> &&
181 __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
182 return std::__simd_transform_reduce(
183 __last - __first,
184 std::move(__init),
185 std::move(__reduce),
186 [=, &__transform](__iter_diff_t<_ForwardIterator> __i) { return __transform(__first[__i]); });
187 } else {
188 return std::transform_reduce(
189 std::move(__first), std::move(__last), std::move(__init), std::move(__reduce), std::move(__transform));
190 }
191 }
192
193 _LIBCPP_END_NAMESPACE_STD
194
195 #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
196
197 #endif // _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_TRANSFORM_REDUCE_H
198