1 // Copyright 2019 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "SpirvShader.hpp"
16
17 #include "ShaderCore.hpp"
18 #include "Device/Primitive.hpp"
19 #include "Pipeline/Constants.hpp"
20
21 #include <spirv/unified1/GLSL.std.450.h>
22 #include <spirv/unified1/spirv.hpp>
23
24 namespace {
25 constexpr float PI = 3.141592653589793f;
26
Interpolate(const sw::SIMD::Float & x,const sw::SIMD::Float & y,const sw::SIMD::Float & rhw,const sw::SIMD::Float & A,const sw::SIMD::Float & B,const sw::SIMD::Float & C,bool flat,bool perspective)27 sw::SIMD::Float Interpolate(const sw::SIMD::Float &x, const sw::SIMD::Float &y, const sw::SIMD::Float &rhw,
28 const sw::SIMD::Float &A, const sw::SIMD::Float &B, const sw::SIMD::Float &C,
29 bool flat, bool perspective)
30 {
31 sw::SIMD::Float interpolant = C;
32
33 if(!flat)
34 {
35 interpolant += x * A + y * B;
36
37 if(perspective)
38 {
39 interpolant *= rhw;
40 }
41 }
42
43 return interpolant;
44 }
45
46 } // namespace
47
48 namespace sw {
49
EmitExtGLSLstd450(InsnIterator insn,EmitState * state) const50 SpirvShader::EmitResult SpirvShader::EmitExtGLSLstd450(InsnIterator insn, EmitState *state) const
51 {
52 auto &type = getType(insn.resultTypeId());
53 auto &dst = state->createIntermediate(insn.resultId(), type.componentCount);
54 auto extInstIndex = static_cast<GLSLstd450>(insn.word(4));
55
56 switch(extInstIndex)
57 {
58 case GLSLstd450FAbs:
59 {
60 auto src = Operand(this, state, insn.word(5));
61 for(auto i = 0u; i < type.componentCount; i++)
62 {
63 dst.move(i, Abs(src.Float(i)));
64 }
65 }
66 break;
67 case GLSLstd450SAbs:
68 {
69 auto src = Operand(this, state, insn.word(5));
70 for(auto i = 0u; i < type.componentCount; i++)
71 {
72 dst.move(i, Abs(src.Int(i)));
73 }
74 }
75 break;
76 case GLSLstd450Cross:
77 {
78 auto lhs = Operand(this, state, insn.word(5));
79 auto rhs = Operand(this, state, insn.word(6));
80 dst.move(0, lhs.Float(1) * rhs.Float(2) - rhs.Float(1) * lhs.Float(2));
81 dst.move(1, lhs.Float(2) * rhs.Float(0) - rhs.Float(2) * lhs.Float(0));
82 dst.move(2, lhs.Float(0) * rhs.Float(1) - rhs.Float(0) * lhs.Float(1));
83 }
84 break;
85 case GLSLstd450Floor:
86 {
87 auto src = Operand(this, state, insn.word(5));
88 for(auto i = 0u; i < type.componentCount; i++)
89 {
90 dst.move(i, Floor(src.Float(i)));
91 }
92 }
93 break;
94 case GLSLstd450Trunc:
95 {
96 auto src = Operand(this, state, insn.word(5));
97 for(auto i = 0u; i < type.componentCount; i++)
98 {
99 dst.move(i, Trunc(src.Float(i)));
100 }
101 }
102 break;
103 case GLSLstd450Ceil:
104 {
105 auto src = Operand(this, state, insn.word(5));
106 for(auto i = 0u; i < type.componentCount; i++)
107 {
108 dst.move(i, Ceil(src.Float(i)));
109 }
110 }
111 break;
112 case GLSLstd450Fract:
113 {
114 auto src = Operand(this, state, insn.word(5));
115 for(auto i = 0u; i < type.componentCount; i++)
116 {
117 dst.move(i, Frac(src.Float(i)));
118 }
119 }
120 break;
121 case GLSLstd450Round:
122 {
123 auto src = Operand(this, state, insn.word(5));
124 for(auto i = 0u; i < type.componentCount; i++)
125 {
126 dst.move(i, Round(src.Float(i)));
127 }
128 }
129 break;
130 case GLSLstd450RoundEven:
131 {
132 auto src = Operand(this, state, insn.word(5));
133 for(auto i = 0u; i < type.componentCount; i++)
134 {
135 auto x = Round(src.Float(i));
136 // dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src));
137 dst.move(i, x + ((SIMD::Float(CmpLT(x, src.Float(i)) & SIMD::Int(1)) * SIMD::Float(2.0f)) - SIMD::Float(1.0f)) *
138 SIMD::Float(CmpEQ(Frac(src.Float(i)), SIMD::Float(0.5f)) & SIMD::Int(1)) * SIMD::Float(Int4(x) & SIMD::Int(1)));
139 }
140 }
141 break;
142 case GLSLstd450FMin:
143 {
144 auto lhs = Operand(this, state, insn.word(5));
145 auto rhs = Operand(this, state, insn.word(6));
146 for(auto i = 0u; i < type.componentCount; i++)
147 {
148 dst.move(i, Min(lhs.Float(i), rhs.Float(i)));
149 }
150 }
151 break;
152 case GLSLstd450FMax:
153 {
154 auto lhs = Operand(this, state, insn.word(5));
155 auto rhs = Operand(this, state, insn.word(6));
156 for(auto i = 0u; i < type.componentCount; i++)
157 {
158 dst.move(i, Max(lhs.Float(i), rhs.Float(i)));
159 }
160 }
161 break;
162 case GLSLstd450SMin:
163 {
164 auto lhs = Operand(this, state, insn.word(5));
165 auto rhs = Operand(this, state, insn.word(6));
166 for(auto i = 0u; i < type.componentCount; i++)
167 {
168 dst.move(i, Min(lhs.Int(i), rhs.Int(i)));
169 }
170 }
171 break;
172 case GLSLstd450SMax:
173 {
174 auto lhs = Operand(this, state, insn.word(5));
175 auto rhs = Operand(this, state, insn.word(6));
176 for(auto i = 0u; i < type.componentCount; i++)
177 {
178 dst.move(i, Max(lhs.Int(i), rhs.Int(i)));
179 }
180 }
181 break;
182 case GLSLstd450UMin:
183 {
184 auto lhs = Operand(this, state, insn.word(5));
185 auto rhs = Operand(this, state, insn.word(6));
186 for(auto i = 0u; i < type.componentCount; i++)
187 {
188 dst.move(i, Min(lhs.UInt(i), rhs.UInt(i)));
189 }
190 }
191 break;
192 case GLSLstd450UMax:
193 {
194 auto lhs = Operand(this, state, insn.word(5));
195 auto rhs = Operand(this, state, insn.word(6));
196 for(auto i = 0u; i < type.componentCount; i++)
197 {
198 dst.move(i, Max(lhs.UInt(i), rhs.UInt(i)));
199 }
200 }
201 break;
202 case GLSLstd450Step:
203 {
204 auto edge = Operand(this, state, insn.word(5));
205 auto x = Operand(this, state, insn.word(6));
206 for(auto i = 0u; i < type.componentCount; i++)
207 {
208 dst.move(i, CmpNLT(x.Float(i), edge.Float(i)) & As<SIMD::Int>(SIMD::Float(1.0f)));
209 }
210 }
211 break;
212 case GLSLstd450SmoothStep:
213 {
214 auto edge0 = Operand(this, state, insn.word(5));
215 auto edge1 = Operand(this, state, insn.word(6));
216 auto x = Operand(this, state, insn.word(7));
217 for(auto i = 0u; i < type.componentCount; i++)
218 {
219 auto tx = Min(Max((x.Float(i) - edge0.Float(i)) /
220 (edge1.Float(i) - edge0.Float(i)),
221 SIMD::Float(0.0f)),
222 SIMD::Float(1.0f));
223 dst.move(i, tx * tx * (Float4(3.0f) - Float4(2.0f) * tx));
224 }
225 }
226 break;
227 case GLSLstd450FMix:
228 {
229 auto x = Operand(this, state, insn.word(5));
230 auto y = Operand(this, state, insn.word(6));
231 auto a = Operand(this, state, insn.word(7));
232 for(auto i = 0u; i < type.componentCount; i++)
233 {
234 dst.move(i, a.Float(i) * (y.Float(i) - x.Float(i)) + x.Float(i));
235 }
236 }
237 break;
238 case GLSLstd450FClamp:
239 {
240 auto x = Operand(this, state, insn.word(5));
241 auto minVal = Operand(this, state, insn.word(6));
242 auto maxVal = Operand(this, state, insn.word(7));
243 for(auto i = 0u; i < type.componentCount; i++)
244 {
245 dst.move(i, Min(Max(x.Float(i), minVal.Float(i)), maxVal.Float(i)));
246 }
247 }
248 break;
249 case GLSLstd450SClamp:
250 {
251 auto x = Operand(this, state, insn.word(5));
252 auto minVal = Operand(this, state, insn.word(6));
253 auto maxVal = Operand(this, state, insn.word(7));
254 for(auto i = 0u; i < type.componentCount; i++)
255 {
256 dst.move(i, Min(Max(x.Int(i), minVal.Int(i)), maxVal.Int(i)));
257 }
258 }
259 break;
260 case GLSLstd450UClamp:
261 {
262 auto x = Operand(this, state, insn.word(5));
263 auto minVal = Operand(this, state, insn.word(6));
264 auto maxVal = Operand(this, state, insn.word(7));
265 for(auto i = 0u; i < type.componentCount; i++)
266 {
267 dst.move(i, Min(Max(x.UInt(i), minVal.UInt(i)), maxVal.UInt(i)));
268 }
269 }
270 break;
271 case GLSLstd450FSign:
272 {
273 auto src = Operand(this, state, insn.word(5));
274 for(auto i = 0u; i < type.componentCount; i++)
275 {
276 auto neg = As<SIMD::Int>(CmpLT(src.Float(i), SIMD::Float(-0.0f))) & As<SIMD::Int>(SIMD::Float(-1.0f));
277 auto pos = As<SIMD::Int>(CmpNLE(src.Float(i), SIMD::Float(+0.0f))) & As<SIMD::Int>(SIMD::Float(1.0f));
278 dst.move(i, neg | pos);
279 }
280 }
281 break;
282 case GLSLstd450SSign:
283 {
284 auto src = Operand(this, state, insn.word(5));
285 for(auto i = 0u; i < type.componentCount; i++)
286 {
287 auto neg = CmpLT(src.Int(i), SIMD::Int(0)) & SIMD::Int(-1);
288 auto pos = CmpNLE(src.Int(i), SIMD::Int(0)) & SIMD::Int(1);
289 dst.move(i, neg | pos);
290 }
291 }
292 break;
293 case GLSLstd450Reflect:
294 {
295 auto I = Operand(this, state, insn.word(5));
296 auto N = Operand(this, state, insn.word(6));
297
298 SIMD::Float d = FDot(type.componentCount, I, N);
299
300 for(auto i = 0u; i < type.componentCount; i++)
301 {
302 dst.move(i, I.Float(i) - SIMD::Float(2.0f) * d * N.Float(i));
303 }
304 }
305 break;
306 case GLSLstd450Refract:
307 {
308 auto I = Operand(this, state, insn.word(5));
309 auto N = Operand(this, state, insn.word(6));
310 auto eta = Operand(this, state, insn.word(7));
311 Decorations r = GetDecorationsForId(insn.resultId());
312
313 SIMD::Float d = FDot(type.componentCount, I, N);
314 SIMD::Float k = SIMD::Float(1.0f) - eta.Float(0) * eta.Float(0) * (SIMD::Float(1.0f) - d * d);
315 SIMD::Int pos = CmpNLT(k, SIMD::Float(0.0f));
316 SIMD::Float t = (eta.Float(0) * d + Sqrt(k, r.RelaxedPrecision));
317
318 for(auto i = 0u; i < type.componentCount; i++)
319 {
320 dst.move(i, pos & As<SIMD::Int>(eta.Float(0) * I.Float(i) - t * N.Float(i)));
321 }
322 }
323 break;
324 case GLSLstd450FaceForward:
325 {
326 auto N = Operand(this, state, insn.word(5));
327 auto I = Operand(this, state, insn.word(6));
328 auto Nref = Operand(this, state, insn.word(7));
329
330 SIMD::Float d = FDot(type.componentCount, I, Nref);
331 SIMD::Int neg = CmpLT(d, SIMD::Float(0.0f));
332
333 for(auto i = 0u; i < type.componentCount; i++)
334 {
335 auto n = N.Float(i);
336 dst.move(i, (neg & As<SIMD::Int>(n)) | (~neg & As<SIMD::Int>(-n)));
337 }
338 }
339 break;
340 case GLSLstd450Length:
341 {
342 auto x = Operand(this, state, insn.word(5));
343 SIMD::Float d = FDot(getObjectType(insn.word(5)).componentCount, x, x);
344 Decorations r = GetDecorationsForId(insn.resultId());
345
346 dst.move(0, Sqrt(d, r.RelaxedPrecision));
347 }
348 break;
349 case GLSLstd450Normalize:
350 {
351 auto x = Operand(this, state, insn.word(5));
352 Decorations r = GetDecorationsForId(insn.resultId());
353
354 SIMD::Float d = FDot(getObjectType(insn.word(5)).componentCount, x, x);
355 SIMD::Float invLength = SIMD::Float(1.0f) / Sqrt(d, r.RelaxedPrecision);
356
357 for(auto i = 0u; i < type.componentCount; i++)
358 {
359 dst.move(i, invLength * x.Float(i));
360 }
361 }
362 break;
363 case GLSLstd450Distance:
364 {
365 auto p0 = Operand(this, state, insn.word(5));
366 auto p1 = Operand(this, state, insn.word(6));
367 Decorations r = GetDecorationsForId(insn.resultId());
368
369 // sqrt(dot(p0-p1, p0-p1))
370 SIMD::Float d = (p0.Float(0) - p1.Float(0)) * (p0.Float(0) - p1.Float(0));
371
372 for(auto i = 1u; i < p0.componentCount; i++)
373 {
374 d += (p0.Float(i) - p1.Float(i)) * (p0.Float(i) - p1.Float(i));
375 }
376
377 dst.move(0, Sqrt(d, r.RelaxedPrecision));
378 }
379 break;
380 case GLSLstd450Modf:
381 {
382 auto val = Operand(this, state, insn.word(5));
383 auto ptrId = Object::ID(insn.word(6));
384
385 Intermediate whole(type.componentCount);
386
387 for(auto i = 0u; i < type.componentCount; i++)
388 {
389 auto wholeAndFrac = Modf(val.Float(i));
390 dst.move(i, wholeAndFrac.second);
391 whole.move(i, wholeAndFrac.first);
392 }
393
394 Store(ptrId, whole, false, std::memory_order_relaxed, state);
395 }
396 break;
397 case GLSLstd450ModfStruct:
398 {
399 auto val = Operand(this, state, insn.word(5));
400
401 for(auto i = 0u; i < val.componentCount; i++)
402 {
403 auto wholeAndFrac = Modf(val.Float(i));
404 dst.move(i, wholeAndFrac.second);
405 dst.move(val.componentCount + i, wholeAndFrac.first);
406 }
407 }
408 break;
409 case GLSLstd450PackSnorm4x8:
410 {
411 auto val = Operand(this, state, insn.word(5));
412 dst.move(0, (SIMD::Int(Round(Min(Max(val.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
413 SIMD::Int(0xFF)) |
414 ((SIMD::Int(Round(Min(Max(val.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
415 SIMD::Int(0xFF))
416 << 8) |
417 ((SIMD::Int(Round(Min(Max(val.Float(2), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
418 SIMD::Int(0xFF))
419 << 16) |
420 ((SIMD::Int(Round(Min(Max(val.Float(3), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
421 SIMD::Int(0xFF))
422 << 24));
423 }
424 break;
425 case GLSLstd450PackUnorm4x8:
426 {
427 auto val = Operand(this, state, insn.word(5));
428 dst.move(0, (SIMD::UInt(Round(Min(Max(val.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
429 ((SIMD::UInt(Round(Min(Max(val.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
430 ((SIMD::UInt(Round(Min(Max(val.Float(2), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
431 ((SIMD::UInt(Round(Min(Max(val.Float(3), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24));
432 }
433 break;
434 case GLSLstd450PackSnorm2x16:
435 {
436 auto val = Operand(this, state, insn.word(5));
437 dst.move(0, (SIMD::Int(Round(Min(Max(val.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(32767.0f))) &
438 SIMD::Int(0xFFFF)) |
439 ((SIMD::Int(Round(Min(Max(val.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(32767.0f))) &
440 SIMD::Int(0xFFFF))
441 << 16));
442 }
443 break;
444 case GLSLstd450PackUnorm2x16:
445 {
446 auto val = Operand(this, state, insn.word(5));
447 dst.move(0, (SIMD::UInt(Round(Min(Max(val.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(65535.0f))) &
448 SIMD::UInt(0xFFFF)) |
449 ((SIMD::UInt(Round(Min(Max(val.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(65535.0f))) &
450 SIMD::UInt(0xFFFF))
451 << 16));
452 }
453 break;
454 case GLSLstd450PackHalf2x16:
455 {
456 auto val = Operand(this, state, insn.word(5));
457 dst.move(0, floatToHalfBits(val.UInt(0), false) | floatToHalfBits(val.UInt(1), true));
458 }
459 break;
460 case GLSLstd450UnpackSnorm4x8:
461 {
462 auto val = Operand(this, state, insn.word(5));
463 dst.move(0, Min(Max(SIMD::Float(((val.Int(0) << 24) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
464 dst.move(1, Min(Max(SIMD::Float(((val.Int(0) << 16) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
465 dst.move(2, Min(Max(SIMD::Float(((val.Int(0) << 8) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
466 dst.move(3, Min(Max(SIMD::Float(((val.Int(0)) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
467 }
468 break;
469 case GLSLstd450UnpackUnorm4x8:
470 {
471 auto val = Operand(this, state, insn.word(5));
472 dst.move(0, SIMD::Float((val.UInt(0) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
473 dst.move(1, SIMD::Float(((val.UInt(0) >> 8) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
474 dst.move(2, SIMD::Float(((val.UInt(0) >> 16) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
475 dst.move(3, SIMD::Float(((val.UInt(0) >> 24) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
476 }
477 break;
478 case GLSLstd450UnpackSnorm2x16:
479 {
480 auto val = Operand(this, state, insn.word(5));
481 // clamp(f / 32767.0, -1.0, 1.0)
482 dst.move(0, Min(Max(SIMD::Float(As<SIMD::Int>((val.UInt(0) & SIMD::UInt(0x0000FFFF)) << 16)) *
483 SIMD::Float(1.0f / float(0x7FFF0000)),
484 SIMD::Float(-1.0f)),
485 SIMD::Float(1.0f)));
486 dst.move(1, Min(Max(SIMD::Float(As<SIMD::Int>(val.UInt(0) & SIMD::UInt(0xFFFF0000))) * SIMD::Float(1.0f / float(0x7FFF0000)),
487 SIMD::Float(-1.0f)),
488 SIMD::Float(1.0f)));
489 }
490 break;
491 case GLSLstd450UnpackUnorm2x16:
492 {
493 auto val = Operand(this, state, insn.word(5));
494 // f / 65535.0
495 dst.move(0, SIMD::Float((val.UInt(0) & SIMD::UInt(0x0000FFFF)) << 16) * SIMD::Float(1.0f / float(0xFFFF0000)));
496 dst.move(1, SIMD::Float(val.UInt(0) & SIMD::UInt(0xFFFF0000)) * SIMD::Float(1.0f / float(0xFFFF0000)));
497 }
498 break;
499 case GLSLstd450UnpackHalf2x16:
500 {
501 auto val = Operand(this, state, insn.word(5));
502 dst.move(0, halfToFloatBits(val.UInt(0) & SIMD::UInt(0x0000FFFF)));
503 dst.move(1, halfToFloatBits((val.UInt(0) & SIMD::UInt(0xFFFF0000)) >> 16));
504 }
505 break;
506 case GLSLstd450Fma:
507 {
508 auto a = Operand(this, state, insn.word(5));
509 auto b = Operand(this, state, insn.word(6));
510 auto c = Operand(this, state, insn.word(7));
511 for(auto i = 0u; i < type.componentCount; i++)
512 {
513 dst.move(i, MulAdd(a.Float(i), b.Float(i), c.Float(i)));
514 }
515 }
516 break;
517 case GLSLstd450Frexp:
518 {
519 auto val = Operand(this, state, insn.word(5));
520 auto ptrId = Object::ID(insn.word(6));
521
522 Intermediate exp(type.componentCount);
523
524 for(auto i = 0u; i < type.componentCount; i++)
525 {
526 auto significandAndExponent = Frexp(val.Float(i));
527 dst.move(i, significandAndExponent.first);
528 exp.move(i, significandAndExponent.second);
529 }
530
531 Store(ptrId, exp, false, std::memory_order_relaxed, state);
532 }
533 break;
534 case GLSLstd450FrexpStruct:
535 {
536 auto val = Operand(this, state, insn.word(5));
537
538 for(auto i = 0u; i < val.componentCount; i++)
539 {
540 auto significandAndExponent = Frexp(val.Float(i));
541 dst.move(i, significandAndExponent.first);
542 dst.move(val.componentCount + i, significandAndExponent.second);
543 }
544 }
545 break;
546 case GLSLstd450Ldexp:
547 {
548 auto significand = Operand(this, state, insn.word(5));
549 auto exponent = Operand(this, state, insn.word(6));
550 for(auto i = 0u; i < type.componentCount; i++)
551 {
552 // Assumes IEEE 754
553 auto in = significand.Float(i);
554 auto significandExponent = Exponent(in);
555 auto combinedExponent = exponent.Int(i) + significandExponent;
556 auto isSignificandZero = SIMD::UInt(CmpEQ(significand.Int(i), SIMD::Int(0)));
557 auto isSignificandInf = SIMD::UInt(IsInf(in));
558 auto isSignificandNaN = SIMD::UInt(IsNan(in));
559 auto isExponentNotTooSmall = SIMD::UInt(CmpGE(combinedExponent, SIMD::Int(-126)));
560 auto isExponentNotTooLarge = SIMD::UInt(CmpLE(combinedExponent, SIMD::Int(128)));
561 auto isExponentInBounds = isExponentNotTooSmall & isExponentNotTooLarge;
562
563 SIMD::UInt v;
564 v = significand.UInt(i) & SIMD::UInt(0x7FFFFF); // Add significand.
565 v |= (SIMD::UInt(combinedExponent + SIMD::Int(126)) << SIMD::UInt(23)); // Add exponent.
566 v &= isExponentInBounds; // Clear v if the exponent is OOB.
567
568 v |= significand.UInt(i) & SIMD::UInt(0x80000000); // Add sign bit.
569 v |= ~isExponentNotTooLarge & SIMD::UInt(0x7F800000); // Mark as inf if the exponent is too great.
570
571 // If the input significand is zero, inf or nan, just return the
572 // input significand.
573 auto passthrough = isSignificandZero | isSignificandInf | isSignificandNaN;
574 v = (v & ~passthrough) | (significand.UInt(i) & passthrough);
575
576 dst.move(i, As<SIMD::Float>(v));
577 }
578 }
579 break;
580 case GLSLstd450Radians:
581 {
582 auto degrees = Operand(this, state, insn.word(5));
583 for(auto i = 0u; i < type.componentCount; i++)
584 {
585 dst.move(i, degrees.Float(i) * SIMD::Float(PI / 180.0f));
586 }
587 }
588 break;
589 case GLSLstd450Degrees:
590 {
591 auto radians = Operand(this, state, insn.word(5));
592 for(auto i = 0u; i < type.componentCount; i++)
593 {
594 dst.move(i, radians.Float(i) * SIMD::Float(180.0f / PI));
595 }
596 }
597 break;
598 case GLSLstd450Sin:
599 {
600 auto radians = Operand(this, state, insn.word(5));
601 Decorations d = GetDecorationsForId(insn.resultId());
602
603 for(auto i = 0u; i < type.componentCount; i++)
604 {
605 dst.move(i, sw::Sin(radians.Float(i), d.RelaxedPrecision));
606 }
607 }
608 break;
609 case GLSLstd450Cos:
610 {
611 auto radians = Operand(this, state, insn.word(5));
612 Decorations d = GetDecorationsForId(insn.resultId());
613
614 for(auto i = 0u; i < type.componentCount; i++)
615 {
616 dst.move(i, sw::Cos(radians.Float(i), d.RelaxedPrecision));
617 }
618 }
619 break;
620 case GLSLstd450Tan:
621 {
622 auto radians = Operand(this, state, insn.word(5));
623 Decorations d = GetDecorationsForId(insn.resultId());
624
625 for(auto i = 0u; i < type.componentCount; i++)
626 {
627 dst.move(i, sw::Tan(radians.Float(i), d.RelaxedPrecision));
628 }
629 }
630 break;
631 case GLSLstd450Asin:
632 {
633 auto val = Operand(this, state, insn.word(5));
634 Decorations d = GetDecorationsForId(insn.resultId());
635
636 for(auto i = 0u; i < type.componentCount; i++)
637 {
638 dst.move(i, sw::Asin(val.Float(i), d.RelaxedPrecision));
639 }
640 }
641 break;
642 case GLSLstd450Acos:
643 {
644 auto val = Operand(this, state, insn.word(5));
645 Decorations d = GetDecorationsForId(insn.resultId());
646
647 for(auto i = 0u; i < type.componentCount; i++)
648 {
649 dst.move(i, sw::Acos(val.Float(i), d.RelaxedPrecision));
650 }
651 }
652 break;
653 case GLSLstd450Atan:
654 {
655 auto val = Operand(this, state, insn.word(5));
656 Decorations d = GetDecorationsForId(insn.resultId());
657
658 for(auto i = 0u; i < type.componentCount; i++)
659 {
660 dst.move(i, sw::Atan(val.Float(i), d.RelaxedPrecision));
661 }
662 }
663 break;
664 case GLSLstd450Sinh:
665 {
666 auto val = Operand(this, state, insn.word(5));
667 Decorations d = GetDecorationsForId(insn.resultId());
668
669 for(auto i = 0u; i < type.componentCount; i++)
670 {
671 dst.move(i, sw::Sinh(val.Float(i), d.RelaxedPrecision));
672 }
673 }
674 break;
675 case GLSLstd450Cosh:
676 {
677 auto val = Operand(this, state, insn.word(5));
678 Decorations d = GetDecorationsForId(insn.resultId());
679
680 for(auto i = 0u; i < type.componentCount; i++)
681 {
682 dst.move(i, sw::Cosh(val.Float(i), d.RelaxedPrecision));
683 }
684 }
685 break;
686 case GLSLstd450Tanh:
687 {
688 auto val = Operand(this, state, insn.word(5));
689 Decorations d = GetDecorationsForId(insn.resultId());
690
691 for(auto i = 0u; i < type.componentCount; i++)
692 {
693 dst.move(i, sw::Tanh(val.Float(i), d.RelaxedPrecision));
694 }
695 }
696 break;
697 case GLSLstd450Asinh:
698 {
699 auto val = Operand(this, state, insn.word(5));
700 Decorations d = GetDecorationsForId(insn.resultId());
701
702 for(auto i = 0u; i < type.componentCount; i++)
703 {
704 dst.move(i, sw::Asinh(val.Float(i), d.RelaxedPrecision));
705 }
706 }
707 break;
708 case GLSLstd450Acosh:
709 {
710 auto val = Operand(this, state, insn.word(5));
711 Decorations d = GetDecorationsForId(insn.resultId());
712
713 for(auto i = 0u; i < type.componentCount; i++)
714 {
715 dst.move(i, sw::Acosh(val.Float(i), d.RelaxedPrecision));
716 }
717 }
718 break;
719 case GLSLstd450Atanh:
720 {
721 auto val = Operand(this, state, insn.word(5));
722 Decorations d = GetDecorationsForId(insn.resultId());
723
724 for(auto i = 0u; i < type.componentCount; i++)
725 {
726 dst.move(i, sw::Atanh(val.Float(i), d.RelaxedPrecision));
727 }
728 }
729 break;
730 case GLSLstd450Atan2:
731 {
732 auto x = Operand(this, state, insn.word(5));
733 auto y = Operand(this, state, insn.word(6));
734 Decorations d = GetDecorationsForId(insn.resultId());
735
736 for(auto i = 0u; i < type.componentCount; i++)
737 {
738 dst.move(i, sw::Atan2(x.Float(i), y.Float(i), d.RelaxedPrecision));
739 }
740 }
741 break;
742 case GLSLstd450Pow:
743 {
744 auto x = Operand(this, state, insn.word(5));
745 auto y = Operand(this, state, insn.word(6));
746 Decorations d = GetDecorationsForId(insn.resultId());
747
748 for(auto i = 0u; i < type.componentCount; i++)
749 {
750 dst.move(i, sw::Pow(x.Float(i), y.Float(i), d.RelaxedPrecision));
751 }
752 }
753 break;
754 case GLSLstd450Exp:
755 {
756 auto val = Operand(this, state, insn.word(5));
757 Decorations d = GetDecorationsForId(insn.resultId());
758
759 for(auto i = 0u; i < type.componentCount; i++)
760 {
761 dst.move(i, sw::Exp(val.Float(i), d.RelaxedPrecision));
762 }
763 }
764 break;
765 case GLSLstd450Log:
766 {
767 auto val = Operand(this, state, insn.word(5));
768 Decorations d = GetDecorationsForId(insn.resultId());
769
770 for(auto i = 0u; i < type.componentCount; i++)
771 {
772 dst.move(i, sw::Log(val.Float(i), d.RelaxedPrecision));
773 }
774 }
775 break;
776 case GLSLstd450Exp2:
777 {
778 auto val = Operand(this, state, insn.word(5));
779 Decorations d = GetDecorationsForId(insn.resultId());
780
781 for(auto i = 0u; i < type.componentCount; i++)
782 {
783 dst.move(i, sw::Exp2(val.Float(i), d.RelaxedPrecision));
784 }
785 }
786 break;
787 case GLSLstd450Log2:
788 {
789 auto val = Operand(this, state, insn.word(5));
790 Decorations d = GetDecorationsForId(insn.resultId());
791
792 for(auto i = 0u; i < type.componentCount; i++)
793 {
794 dst.move(i, sw::Log2(val.Float(i), d.RelaxedPrecision));
795 }
796 }
797 break;
798 case GLSLstd450Sqrt:
799 {
800 auto val = Operand(this, state, insn.word(5));
801 Decorations d = GetDecorationsForId(insn.resultId());
802
803 for(auto i = 0u; i < type.componentCount; i++)
804 {
805 dst.move(i, Sqrt(val.Float(i), d.RelaxedPrecision));
806 }
807 }
808 break;
809 case GLSLstd450InverseSqrt:
810 {
811 auto val = Operand(this, state, insn.word(5));
812 Decorations d = GetDecorationsForId(insn.resultId());
813
814 for(auto i = 0u; i < type.componentCount; i++)
815 {
816 dst.move(i, RcpSqrt(val.Float(i), d.RelaxedPrecision));
817 }
818 }
819 break;
820 case GLSLstd450Determinant:
821 {
822 auto mat = Operand(this, state, insn.word(5));
823
824 switch(mat.componentCount)
825 {
826 case 4: // 2x2
827 dst.move(0, Determinant(
828 mat.Float(0), mat.Float(1),
829 mat.Float(2), mat.Float(3)));
830 break;
831 case 9: // 3x3
832 dst.move(0, Determinant(
833 mat.Float(0), mat.Float(1), mat.Float(2),
834 mat.Float(3), mat.Float(4), mat.Float(5),
835 mat.Float(6), mat.Float(7), mat.Float(8)));
836 break;
837 case 16: // 4x4
838 dst.move(0, Determinant(
839 mat.Float(0), mat.Float(1), mat.Float(2), mat.Float(3),
840 mat.Float(4), mat.Float(5), mat.Float(6), mat.Float(7),
841 mat.Float(8), mat.Float(9), mat.Float(10), mat.Float(11),
842 mat.Float(12), mat.Float(13), mat.Float(14), mat.Float(15)));
843 break;
844 default:
845 UNREACHABLE("GLSLstd450Determinant can only operate with square matrices. Got %d elements", int(mat.componentCount));
846 }
847 }
848 break;
849 case GLSLstd450MatrixInverse:
850 {
851 auto mat = Operand(this, state, insn.word(5));
852
853 switch(mat.componentCount)
854 {
855 case 4: // 2x2
856 {
857 auto inv = MatrixInverse(
858 mat.Float(0), mat.Float(1),
859 mat.Float(2), mat.Float(3));
860 for(uint32_t i = 0; i < inv.size(); i++)
861 {
862 dst.move(i, inv[i]);
863 }
864 }
865 break;
866 case 9: // 3x3
867 {
868 auto inv = MatrixInverse(
869 mat.Float(0), mat.Float(1), mat.Float(2),
870 mat.Float(3), mat.Float(4), mat.Float(5),
871 mat.Float(6), mat.Float(7), mat.Float(8));
872 for(uint32_t i = 0; i < inv.size(); i++)
873 {
874 dst.move(i, inv[i]);
875 }
876 }
877 break;
878 case 16: // 4x4
879 {
880 auto inv = MatrixInverse(
881 mat.Float(0), mat.Float(1), mat.Float(2), mat.Float(3),
882 mat.Float(4), mat.Float(5), mat.Float(6), mat.Float(7),
883 mat.Float(8), mat.Float(9), mat.Float(10), mat.Float(11),
884 mat.Float(12), mat.Float(13), mat.Float(14), mat.Float(15));
885 for(uint32_t i = 0; i < inv.size(); i++)
886 {
887 dst.move(i, inv[i]);
888 }
889 }
890 break;
891 default:
892 UNREACHABLE("GLSLstd450MatrixInverse can only operate with square matrices. Got %d elements", int(mat.componentCount));
893 }
894 }
895 break;
896 case GLSLstd450IMix:
897 {
898 UNREACHABLE("GLSLstd450IMix has been removed from the specification");
899 }
900 break;
901 case GLSLstd450PackDouble2x32:
902 {
903 UNSUPPORTED("SPIR-V Float64 Capability (GLSLstd450PackDouble2x32)");
904 }
905 break;
906 case GLSLstd450UnpackDouble2x32:
907 {
908 UNSUPPORTED("SPIR-V Float64 Capability (GLSLstd450UnpackDouble2x32)");
909 }
910 break;
911 case GLSLstd450FindILsb:
912 {
913 auto val = Operand(this, state, insn.word(5));
914 for(auto i = 0u; i < type.componentCount; i++)
915 {
916 auto v = val.UInt(i);
917 dst.move(i, Cttz(v, true) | CmpEQ(v, SIMD::UInt(0)));
918 }
919 }
920 break;
921 case GLSLstd450FindSMsb:
922 {
923 auto val = Operand(this, state, insn.word(5));
924 for(auto i = 0u; i < type.componentCount; i++)
925 {
926 auto v = val.UInt(i) ^ As<SIMD::UInt>(CmpLT(val.Int(i), SIMD::Int(0)));
927 dst.move(i, SIMD::UInt(31) - Ctlz(v, false));
928 }
929 }
930 break;
931 case GLSLstd450FindUMsb:
932 {
933 auto val = Operand(this, state, insn.word(5));
934 for(auto i = 0u; i < type.componentCount; i++)
935 {
936 dst.move(i, SIMD::UInt(31) - Ctlz(val.UInt(i), false));
937 }
938 }
939 break;
940 case GLSLstd450InterpolateAtCentroid:
941 {
942 Decorations d = GetDecorationsForId(insn.word(5));
943 auto ptr = state->getPointer(insn.word(5));
944 for(auto i = 0u; i < type.componentCount; i++)
945 {
946 dst.move(i, Interpolate(ptr, d.Location, 0, i, state, SpirvShader::Centroid));
947 }
948 }
949 break;
950 case GLSLstd450InterpolateAtSample:
951 {
952 Decorations d = GetDecorationsForId(insn.word(5));
953 auto ptr = state->getPointer(insn.word(5));
954 for(auto i = 0u; i < type.componentCount; i++)
955 {
956 dst.move(i, Interpolate(ptr, d.Location, insn.word(6), i, state, SpirvShader::AtSample));
957 }
958 }
959 break;
960 case GLSLstd450InterpolateAtOffset:
961 {
962 Decorations d = GetDecorationsForId(insn.word(5));
963 auto ptr = state->getPointer(insn.word(5));
964 for(auto i = 0u; i < type.componentCount; i++)
965 {
966 dst.move(i, Interpolate(ptr, d.Location, insn.word(6), i, state, SpirvShader::AtOffset));
967 }
968 }
969 break;
970 case GLSLstd450NMin:
971 {
972 auto x = Operand(this, state, insn.word(5));
973 auto y = Operand(this, state, insn.word(6));
974 for(auto i = 0u; i < type.componentCount; i++)
975 {
976 dst.move(i, NMin(x.Float(i), y.Float(i)));
977 }
978 }
979 break;
980 case GLSLstd450NMax:
981 {
982 auto x = Operand(this, state, insn.word(5));
983 auto y = Operand(this, state, insn.word(6));
984 for(auto i = 0u; i < type.componentCount; i++)
985 {
986 dst.move(i, NMax(x.Float(i), y.Float(i)));
987 }
988 }
989 break;
990 case GLSLstd450NClamp:
991 {
992 auto x = Operand(this, state, insn.word(5));
993 auto minVal = Operand(this, state, insn.word(6));
994 auto maxVal = Operand(this, state, insn.word(7));
995 for(auto i = 0u; i < type.componentCount; i++)
996 {
997 auto clamp = NMin(NMax(x.Float(i), minVal.Float(i)), maxVal.Float(i));
998 dst.move(i, clamp);
999 }
1000 }
1001 break;
1002 default:
1003 UNREACHABLE("ExtInst %d", int(extInstIndex));
1004 break;
1005 }
1006
1007 return EmitResult::Continue;
1008 }
1009
Interpolate(SIMD::Pointer const & ptr,int32_t location,Object::ID paramId,uint32_t component,EmitState * state,InterpolationType type) const1010 SIMD::Float SpirvShader::Interpolate(SIMD::Pointer const &ptr, int32_t location, Object::ID paramId,
1011 uint32_t component, EmitState *state, InterpolationType type) const
1012 {
1013 uint32_t interpolant = (location * 4);
1014 uint32_t components_per_row = GetNumInputComponents(location);
1015 if((location < 0) || (interpolant >= inputs.size()) || (components_per_row == 0))
1016 {
1017 return SIMD::Float(0.0f);
1018 }
1019
1020 const auto &interpolationData = state->routine->interpolationData;
1021
1022 SIMD::Float x;
1023 SIMD::Float y;
1024 SIMD::Float rhw;
1025
1026 switch(type)
1027 {
1028 case Centroid:
1029 x = interpolationData.xCentroid;
1030 y = interpolationData.yCentroid;
1031 rhw = interpolationData.rhwCentroid;
1032 break;
1033 case AtSample:
1034 x = SIMD::Float(0.0f);
1035 y = SIMD::Float(0.0f);
1036
1037 if(state->getMultiSampleCount() > 1)
1038 {
1039 static constexpr int NUM_SAMPLES = 4;
1040 ASSERT(state->getMultiSampleCount() == NUM_SAMPLES);
1041
1042 Array<Float> sampleX(NUM_SAMPLES);
1043 Array<Float> sampleY(NUM_SAMPLES);
1044 for(int i = 0; i < NUM_SAMPLES; ++i)
1045 {
1046 sampleX[i] = Constants::SampleLocationsX[i];
1047 sampleY[i] = Constants::SampleLocationsY[i];
1048 }
1049
1050 auto sampleOperand = Operand(this, state, paramId);
1051 ASSERT(sampleOperand.componentCount == 1);
1052
1053 // If sample does not exist, the position used to interpolate the
1054 // input variable is undefined, so we just clamp to avoid OOB accesses.
1055 SIMD::Int samples = sampleOperand.Int(0) & SIMD::Int(NUM_SAMPLES - 1);
1056
1057 for(int i = 0; i < SIMD::Width; ++i)
1058 {
1059 Int sample = Extract(samples, i);
1060 x = Insert(x, sampleX[sample], i);
1061 y = Insert(y, sampleY[sample], i);
1062 }
1063 }
1064
1065 x += interpolationData.x;
1066 y += interpolationData.y;
1067 rhw = interpolationData.rhw;
1068 break;
1069 case AtOffset:
1070 {
1071 // An offset of (0, 0) identifies the center of the pixel.
1072 auto offset = Operand(this, state, paramId);
1073 ASSERT(offset.componentCount == 2);
1074
1075 x = interpolationData.x + offset.Float(0);
1076 y = interpolationData.y + offset.Float(1);
1077 rhw = interpolationData.rhw;
1078 }
1079 break;
1080 default:
1081 UNREACHABLE("Unknown interpolation type: %d", (int)type);
1082 return SIMD::Float(0.0f);
1083 }
1084
1085 uint32_t packedInterpolant = GetPackedInterpolant(location);
1086 Pointer<Byte> planeEquation = interpolationData.primitive + OFFSET(Primitive, V[packedInterpolant]);
1087 if(ptr.hasDynamicOffsets)
1088 {
1089 // Combine plane equations into one
1090 SIMD::Float A;
1091 SIMD::Float B;
1092 SIMD::Float C;
1093
1094 for(int i = 0; i < SIMD::Width; ++i)
1095 {
1096 Int offset = ((Extract(ptr.dynamicOffsets, i) + ptr.staticOffsets[i]) >> 2) + component;
1097 Pointer<Byte> planeEquationI = planeEquation + (offset * sizeof(PlaneEquation));
1098 A = Insert(A, Extract(*Pointer<SIMD::Float>(planeEquationI + OFFSET(PlaneEquation, A), 16), i), i);
1099 B = Insert(B, Extract(*Pointer<SIMD::Float>(planeEquationI + OFFSET(PlaneEquation, B), 16), i), i);
1100 C = Insert(C, Extract(*Pointer<SIMD::Float>(planeEquationI + OFFSET(PlaneEquation, C), 16), i), i);
1101 }
1102 return ::Interpolate(x, y, rhw, A, B, C, false, true);
1103 }
1104 else
1105 {
1106 ASSERT(ptr.hasStaticEqualOffsets());
1107
1108 uint32_t offset = (ptr.staticOffsets[0] >> 2) + component;
1109 if((interpolant + offset) >= inputs.size())
1110 {
1111 return SIMD::Float(0.0f);
1112 }
1113 planeEquation += offset * sizeof(PlaneEquation);
1114 }
1115
1116 return SpirvRoutine::interpolateAtXY(x, y, rhw, planeEquation, false, true);
1117 }
1118
interpolateAtXY(const SIMD::Float & x,const SIMD::Float & y,const SIMD::Float & rhw,Pointer<Byte> planeEquation,bool flat,bool perspective)1119 SIMD::Float SpirvRoutine::interpolateAtXY(const SIMD::Float &x, const SIMD::Float &y, const SIMD::Float &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective)
1120 {
1121 SIMD::Float A;
1122 SIMD::Float B;
1123 SIMD::Float C = *Pointer<SIMD::Float>(planeEquation + OFFSET(PlaneEquation, C), 16);
1124
1125 if(!flat)
1126 {
1127 A = *Pointer<SIMD::Float>(planeEquation + OFFSET(PlaneEquation, A), 16);
1128 B = *Pointer<SIMD::Float>(planeEquation + OFFSET(PlaneEquation, B), 16);
1129 }
1130
1131 return ::Interpolate(x, y, rhw, A, B, C, flat, perspective);
1132 }
1133
1134 } // namespace sw