1 // Copyright 2019 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "EmulatedIntrinsics.hpp"
16
17 #include <algorithm>
18 #include <cmath>
19 #include <functional>
20 #include <mutex>
21 #include <utility>
22
23 namespace rr {
24 namespace {
25
26 template<typename T>
27 struct UnderlyingType
28 {
29 using Type = typename decltype(rr::Extract(std::declval<RValue<T>>(), 0))::rvalue_underlying_type;
30 };
31
32 template<typename T>
33 using UnderlyingTypeT = typename UnderlyingType<T>::Type;
34
35 // Call single arg function on a vector type
36 template<typename Func, typename T>
call4(Func func,const RValue<T> & x)37 RValue<T> call4(Func func, const RValue<T> &x)
38 {
39 T result;
40 result = Insert(result, Call(func, Extract(x, 0)), 0);
41 result = Insert(result, Call(func, Extract(x, 1)), 1);
42 result = Insert(result, Call(func, Extract(x, 2)), 2);
43 result = Insert(result, Call(func, Extract(x, 3)), 3);
44 return result;
45 }
46
47 // Call two arg function on a vector type
48 template<typename Func, typename T>
call4(Func func,const RValue<T> & x,const RValue<T> & y)49 RValue<T> call4(Func func, const RValue<T> &x, const RValue<T> &y)
50 {
51 T result;
52 result = Insert(result, Call(func, Extract(x, 0), Extract(y, 0)), 0);
53 result = Insert(result, Call(func, Extract(x, 1), Extract(y, 1)), 1);
54 result = Insert(result, Call(func, Extract(x, 2), Extract(y, 2)), 2);
55 result = Insert(result, Call(func, Extract(x, 3), Extract(y, 3)), 3);
56 return result;
57 }
58
59 template<typename T, typename EL = UnderlyingTypeT<T>>
gather(T & out,RValue<Pointer<EL>> base,RValue<Int4> offsets,RValue<Int4> mask,unsigned int alignment,bool zeroMaskedLanes)60 void gather(T &out, RValue<Pointer<EL>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes)
61 {
62 constexpr bool atomic = false;
63 constexpr std::memory_order order = std::memory_order_relaxed;
64
65 Pointer<Byte> baseBytePtr = base;
66
67 out = T(0);
68 for(int i = 0; i < 4; i++)
69 {
70 If(Extract(mask, i) != 0)
71 {
72 auto offset = Extract(offsets, i);
73 auto el = Load(Pointer<EL>(&baseBytePtr[offset]), alignment, atomic, order);
74 out = Insert(out, el, i);
75 }
76 Else If(zeroMaskedLanes)
77 {
78 out = Insert(out, EL(0), i);
79 }
80 }
81 }
82
83 template<typename T, typename EL = UnderlyingTypeT<T>>
scatter(RValue<Pointer<EL>> base,RValue<T> val,RValue<Int4> offsets,RValue<Int4> mask,unsigned int alignment)84 void scatter(RValue<Pointer<EL>> base, RValue<T> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
85 {
86 constexpr bool atomic = false;
87 constexpr std::memory_order order = std::memory_order_relaxed;
88
89 Pointer<Byte> baseBytePtr = base;
90
91 for(int i = 0; i < 4; i++)
92 {
93 If(Extract(mask, i) != 0)
94 {
95 auto offset = Extract(offsets, i);
96 Store(Extract(val, i), Pointer<EL>(&baseBytePtr[offset]), alignment, atomic, order);
97 }
98 }
99 }
100
101 // TODO(b/148276653): Both atomicMin and atomicMax use a static (global) mutex that makes all min
102 // operations for a given T mutually exclusive, rather than only the ones on the value pointed to
103 // by ptr. Use a CAS loop, as is done for LLVMReactor's min/max atomic for Android.
104 // TODO(b/148207274): Or, move this down into Subzero as a CAS-based operation.
105 template<typename T>
atomicMin(T * ptr,T value)106 static T atomicMin(T *ptr, T value)
107 {
108 static std::mutex m;
109
110 std::lock_guard<std::mutex> lock(m);
111 T origValue = *ptr;
112 *ptr = std::min(origValue, value);
113 return origValue;
114 }
115 template<typename T>
atomicMax(T * ptr,T value)116 static T atomicMax(T *ptr, T value)
117 {
118 static std::mutex m;
119
120 std::lock_guard<std::mutex> lock(m);
121 T origValue = *ptr;
122 *ptr = std::max(origValue, value);
123 return origValue;
124 }
125
126 } // anonymous namespace
127
128 namespace emulated {
129
Gather(RValue<Pointer<Float>> base,RValue<Int4> offsets,RValue<Int4> mask,unsigned int alignment,bool zeroMaskedLanes)130 RValue<Float4> Gather(RValue<Pointer<Float>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
131 {
132 Float4 result{};
133 gather(result, base, offsets, mask, alignment, zeroMaskedLanes);
134 return result;
135 }
136
Gather(RValue<Pointer<Int>> base,RValue<Int4> offsets,RValue<Int4> mask,unsigned int alignment,bool zeroMaskedLanes)137 RValue<Int4> Gather(RValue<Pointer<Int>> base, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment, bool zeroMaskedLanes /* = false */)
138 {
139 Int4 result{};
140 gather(result, base, offsets, mask, alignment, zeroMaskedLanes);
141 return result;
142 }
143
Scatter(RValue<Pointer<Float>> base,RValue<Float4> val,RValue<Int4> offsets,RValue<Int4> mask,unsigned int alignment)144 void Scatter(RValue<Pointer<Float>> base, RValue<Float4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
145 {
146 scatter(base, val, offsets, mask, alignment);
147 }
148
Scatter(RValue<Pointer<Int>> base,RValue<Int4> val,RValue<Int4> offsets,RValue<Int4> mask,unsigned int alignment)149 void Scatter(RValue<Pointer<Int>> base, RValue<Int4> val, RValue<Int4> offsets, RValue<Int4> mask, unsigned int alignment)
150 {
151 scatter<Int4>(base, val, offsets, mask, alignment);
152 }
153
Exp2(RValue<Float> x)154 RValue<Float> Exp2(RValue<Float> x)
155 {
156 return Call(exp2f, x);
157 }
158
Log2(RValue<Float> x)159 RValue<Float> Log2(RValue<Float> x)
160 {
161 return Call(log2f, x);
162 }
163
Sin(RValue<Float4> x)164 RValue<Float4> Sin(RValue<Float4> x)
165 {
166 return call4(sinf, x);
167 }
168
Cos(RValue<Float4> x)169 RValue<Float4> Cos(RValue<Float4> x)
170 {
171 return call4(cosf, x);
172 }
173
Tan(RValue<Float4> x)174 RValue<Float4> Tan(RValue<Float4> x)
175 {
176 return call4(tanf, x);
177 }
178
Asin(RValue<Float4> x)179 RValue<Float4> Asin(RValue<Float4> x)
180 {
181 return call4(asinf, x);
182 }
183
Acos(RValue<Float4> x)184 RValue<Float4> Acos(RValue<Float4> x)
185 {
186 return call4(acosf, x);
187 }
188
Atan(RValue<Float4> x)189 RValue<Float4> Atan(RValue<Float4> x)
190 {
191 return call4(atanf, x);
192 }
193
Sinh(RValue<Float4> x)194 RValue<Float4> Sinh(RValue<Float4> x)
195 {
196 // TODO(b/149110874) Use coshf/sinhf when we've implemented SpirV versions at the SpirV level
197 return Float4(0.5f) * (emulated::Exp(x) - emulated::Exp(-x));
198 }
199
Cosh(RValue<Float4> x)200 RValue<Float4> Cosh(RValue<Float4> x)
201 {
202 // TODO(b/149110874) Use coshf/sinhf when we've implemented SpirV versions at the SpirV level
203 return Float4(0.5f) * (emulated::Exp(x) + emulated::Exp(-x));
204 }
205
Tanh(RValue<Float4> x)206 RValue<Float4> Tanh(RValue<Float4> x)
207 {
208 return call4(tanhf, x);
209 }
210
Asinh(RValue<Float4> x)211 RValue<Float4> Asinh(RValue<Float4> x)
212 {
213 return call4(asinhf, x);
214 }
215
Acosh(RValue<Float4> x)216 RValue<Float4> Acosh(RValue<Float4> x)
217 {
218 return call4(acoshf, x);
219 }
220
Atanh(RValue<Float4> x)221 RValue<Float4> Atanh(RValue<Float4> x)
222 {
223 return call4(atanhf, x);
224 }
225
Atan2(RValue<Float4> x,RValue<Float4> y)226 RValue<Float4> Atan2(RValue<Float4> x, RValue<Float4> y)
227 {
228 return call4(atan2f, x, y);
229 }
230
Pow(RValue<Float4> x,RValue<Float4> y)231 RValue<Float4> Pow(RValue<Float4> x, RValue<Float4> y)
232 {
233 return call4(powf, x, y);
234 }
235
Exp(RValue<Float4> x)236 RValue<Float4> Exp(RValue<Float4> x)
237 {
238 return call4(expf, x);
239 }
240
Log(RValue<Float4> x)241 RValue<Float4> Log(RValue<Float4> x)
242 {
243 return call4(logf, x);
244 }
245
Exp2(RValue<Float4> x)246 RValue<Float4> Exp2(RValue<Float4> x)
247 {
248 return call4(exp2f, x);
249 }
250
Log2(RValue<Float4> x)251 RValue<Float4> Log2(RValue<Float4> x)
252 {
253 return call4(log2f, x);
254 }
255
MinAtomic(RValue<Pointer<Int>> x,RValue<Int> y,std::memory_order memoryOrder)256 RValue<Int> MinAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
257 {
258 return Call(atomicMin<int32_t>, x, y);
259 }
260
MinAtomic(RValue<Pointer<UInt>> x,RValue<UInt> y,std::memory_order memoryOrder)261 RValue<UInt> MinAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
262 {
263 return Call(atomicMin<uint32_t>, x, y);
264 }
265
MaxAtomic(RValue<Pointer<Int>> x,RValue<Int> y,std::memory_order memoryOrder)266 RValue<Int> MaxAtomic(RValue<Pointer<Int>> x, RValue<Int> y, std::memory_order memoryOrder)
267 {
268 return Call(atomicMax<int32_t>, x, y);
269 }
270
MaxAtomic(RValue<Pointer<UInt>> x,RValue<UInt> y,std::memory_order memoryOrder)271 RValue<UInt> MaxAtomic(RValue<Pointer<UInt>> x, RValue<UInt> y, std::memory_order memoryOrder)
272 {
273 return Call(atomicMax<uint32_t>, x, y);
274 }
275
FRem(RValue<Float4> lhs,RValue<Float4> rhs)276 RValue<Float4> FRem(RValue<Float4> lhs, RValue<Float4> rhs)
277 {
278 return call4(fmodf, lhs, rhs);
279 }
280
281 } // namespace emulated
282 } // namespace rr
283