1 // Copyright 2019 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "SpirvShader.hpp"
16
17 #include "ShaderCore.hpp"
18 #include "Device/Primitive.hpp"
19 #include "Pipeline/Constants.hpp"
20
21 #include <spirv/unified1/GLSL.std.450.h>
22 #include <spirv/unified1/spirv.hpp>
23
24 namespace {
25 constexpr float PI = 3.141592653589793f;
26
Interpolate(const sw::SIMD::Float & x,const sw::SIMD::Float & y,const sw::SIMD::Float & rhw,const sw::SIMD::Float & A,const sw::SIMD::Float & B,const sw::SIMD::Float & C,bool flat,bool perspective)27 sw::SIMD::Float Interpolate(const sw::SIMD::Float &x, const sw::SIMD::Float &y, const sw::SIMD::Float &rhw,
28 const sw::SIMD::Float &A, const sw::SIMD::Float &B, const sw::SIMD::Float &C,
29 bool flat, bool perspective)
30 {
31 sw::SIMD::Float interpolant = C;
32
33 if(!flat)
34 {
35 interpolant += x * A + y * B;
36
37 if(perspective)
38 {
39 interpolant *= rhw;
40 }
41 }
42
43 return interpolant;
44 }
45
46 } // namespace
47
48 namespace sw {
49
EmitExtGLSLstd450(InsnIterator insn,EmitState * state) const50 SpirvShader::EmitResult SpirvShader::EmitExtGLSLstd450(InsnIterator insn, EmitState *state) const
51 {
52 auto &type = getType(insn.resultTypeId());
53 auto &dst = state->createIntermediate(insn.resultId(), type.componentCount);
54 auto extInstIndex = static_cast<GLSLstd450>(insn.word(4));
55
56 switch(extInstIndex)
57 {
58 case GLSLstd450FAbs:
59 {
60 auto src = Operand(this, state, insn.word(5));
61 for(auto i = 0u; i < type.componentCount; i++)
62 {
63 dst.move(i, Abs(src.Float(i)));
64 }
65 }
66 break;
67 case GLSLstd450SAbs:
68 {
69 auto src = Operand(this, state, insn.word(5));
70 for(auto i = 0u; i < type.componentCount; i++)
71 {
72 dst.move(i, Abs(src.Int(i)));
73 }
74 }
75 break;
76 case GLSLstd450Cross:
77 {
78 auto lhs = Operand(this, state, insn.word(5));
79 auto rhs = Operand(this, state, insn.word(6));
80 dst.move(0, lhs.Float(1) * rhs.Float(2) - rhs.Float(1) * lhs.Float(2));
81 dst.move(1, lhs.Float(2) * rhs.Float(0) - rhs.Float(2) * lhs.Float(0));
82 dst.move(2, lhs.Float(0) * rhs.Float(1) - rhs.Float(0) * lhs.Float(1));
83 }
84 break;
85 case GLSLstd450Floor:
86 {
87 auto src = Operand(this, state, insn.word(5));
88 for(auto i = 0u; i < type.componentCount; i++)
89 {
90 dst.move(i, Floor(src.Float(i)));
91 }
92 }
93 break;
94 case GLSLstd450Trunc:
95 {
96 auto src = Operand(this, state, insn.word(5));
97 for(auto i = 0u; i < type.componentCount; i++)
98 {
99 dst.move(i, Trunc(src.Float(i)));
100 }
101 }
102 break;
103 case GLSLstd450Ceil:
104 {
105 auto src = Operand(this, state, insn.word(5));
106 for(auto i = 0u; i < type.componentCount; i++)
107 {
108 dst.move(i, Ceil(src.Float(i)));
109 }
110 }
111 break;
112 case GLSLstd450Fract:
113 {
114 auto src = Operand(this, state, insn.word(5));
115 for(auto i = 0u; i < type.componentCount; i++)
116 {
117 dst.move(i, Frac(src.Float(i)));
118 }
119 }
120 break;
121 case GLSLstd450Round:
122 {
123 auto src = Operand(this, state, insn.word(5));
124 for(auto i = 0u; i < type.componentCount; i++)
125 {
126 dst.move(i, Round(src.Float(i)));
127 }
128 }
129 break;
130 case GLSLstd450RoundEven:
131 {
132 auto src = Operand(this, state, insn.word(5));
133 for(auto i = 0u; i < type.componentCount; i++)
134 {
135 auto x = Round(src.Float(i));
136 // dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src));
137 dst.move(i, x + ((SIMD::Float(CmpLT(x, src.Float(i)) & SIMD::Int(1)) * SIMD::Float(2.0f)) - SIMD::Float(1.0f)) *
138 SIMD::Float(CmpEQ(Frac(src.Float(i)), SIMD::Float(0.5f)) & SIMD::Int(1)) * SIMD::Float(Int4(x) & SIMD::Int(1)));
139 }
140 }
141 break;
142 case GLSLstd450FMin:
143 {
144 auto lhs = Operand(this, state, insn.word(5));
145 auto rhs = Operand(this, state, insn.word(6));
146 for(auto i = 0u; i < type.componentCount; i++)
147 {
148 dst.move(i, Min(lhs.Float(i), rhs.Float(i)));
149 }
150 }
151 break;
152 case GLSLstd450FMax:
153 {
154 auto lhs = Operand(this, state, insn.word(5));
155 auto rhs = Operand(this, state, insn.word(6));
156 for(auto i = 0u; i < type.componentCount; i++)
157 {
158 dst.move(i, Max(lhs.Float(i), rhs.Float(i)));
159 }
160 }
161 break;
162 case GLSLstd450SMin:
163 {
164 auto lhs = Operand(this, state, insn.word(5));
165 auto rhs = Operand(this, state, insn.word(6));
166 for(auto i = 0u; i < type.componentCount; i++)
167 {
168 dst.move(i, Min(lhs.Int(i), rhs.Int(i)));
169 }
170 }
171 break;
172 case GLSLstd450SMax:
173 {
174 auto lhs = Operand(this, state, insn.word(5));
175 auto rhs = Operand(this, state, insn.word(6));
176 for(auto i = 0u; i < type.componentCount; i++)
177 {
178 dst.move(i, Max(lhs.Int(i), rhs.Int(i)));
179 }
180 }
181 break;
182 case GLSLstd450UMin:
183 {
184 auto lhs = Operand(this, state, insn.word(5));
185 auto rhs = Operand(this, state, insn.word(6));
186 for(auto i = 0u; i < type.componentCount; i++)
187 {
188 dst.move(i, Min(lhs.UInt(i), rhs.UInt(i)));
189 }
190 }
191 break;
192 case GLSLstd450UMax:
193 {
194 auto lhs = Operand(this, state, insn.word(5));
195 auto rhs = Operand(this, state, insn.word(6));
196 for(auto i = 0u; i < type.componentCount; i++)
197 {
198 dst.move(i, Max(lhs.UInt(i), rhs.UInt(i)));
199 }
200 }
201 break;
202 case GLSLstd450Step:
203 {
204 auto edge = Operand(this, state, insn.word(5));
205 auto x = Operand(this, state, insn.word(6));
206 for(auto i = 0u; i < type.componentCount; i++)
207 {
208 dst.move(i, CmpNLT(x.Float(i), edge.Float(i)) & As<SIMD::Int>(SIMD::Float(1.0f)));
209 }
210 }
211 break;
212 case GLSLstd450SmoothStep:
213 {
214 auto edge0 = Operand(this, state, insn.word(5));
215 auto edge1 = Operand(this, state, insn.word(6));
216 auto x = Operand(this, state, insn.word(7));
217 for(auto i = 0u; i < type.componentCount; i++)
218 {
219 auto tx = Min(Max((x.Float(i) - edge0.Float(i)) /
220 (edge1.Float(i) - edge0.Float(i)),
221 SIMD::Float(0.0f)),
222 SIMD::Float(1.0f));
223 dst.move(i, tx * tx * (Float4(3.0f) - Float4(2.0f) * tx));
224 }
225 }
226 break;
227 case GLSLstd450FMix:
228 {
229 auto x = Operand(this, state, insn.word(5));
230 auto y = Operand(this, state, insn.word(6));
231 auto a = Operand(this, state, insn.word(7));
232 for(auto i = 0u; i < type.componentCount; i++)
233 {
234 dst.move(i, a.Float(i) * (y.Float(i) - x.Float(i)) + x.Float(i));
235 }
236 }
237 break;
238 case GLSLstd450FClamp:
239 {
240 auto x = Operand(this, state, insn.word(5));
241 auto minVal = Operand(this, state, insn.word(6));
242 auto maxVal = Operand(this, state, insn.word(7));
243 for(auto i = 0u; i < type.componentCount; i++)
244 {
245 dst.move(i, Min(Max(x.Float(i), minVal.Float(i)), maxVal.Float(i)));
246 }
247 }
248 break;
249 case GLSLstd450SClamp:
250 {
251 auto x = Operand(this, state, insn.word(5));
252 auto minVal = Operand(this, state, insn.word(6));
253 auto maxVal = Operand(this, state, insn.word(7));
254 for(auto i = 0u; i < type.componentCount; i++)
255 {
256 dst.move(i, Min(Max(x.Int(i), minVal.Int(i)), maxVal.Int(i)));
257 }
258 }
259 break;
260 case GLSLstd450UClamp:
261 {
262 auto x = Operand(this, state, insn.word(5));
263 auto minVal = Operand(this, state, insn.word(6));
264 auto maxVal = Operand(this, state, insn.word(7));
265 for(auto i = 0u; i < type.componentCount; i++)
266 {
267 dst.move(i, Min(Max(x.UInt(i), minVal.UInt(i)), maxVal.UInt(i)));
268 }
269 }
270 break;
271 case GLSLstd450FSign:
272 {
273 auto src = Operand(this, state, insn.word(5));
274 for(auto i = 0u; i < type.componentCount; i++)
275 {
276 auto neg = As<SIMD::Int>(CmpLT(src.Float(i), SIMD::Float(-0.0f))) & As<SIMD::Int>(SIMD::Float(-1.0f));
277 auto pos = As<SIMD::Int>(CmpNLE(src.Float(i), SIMD::Float(+0.0f))) & As<SIMD::Int>(SIMD::Float(1.0f));
278 dst.move(i, neg | pos);
279 }
280 }
281 break;
282 case GLSLstd450SSign:
283 {
284 auto src = Operand(this, state, insn.word(5));
285 for(auto i = 0u; i < type.componentCount; i++)
286 {
287 auto neg = CmpLT(src.Int(i), SIMD::Int(0)) & SIMD::Int(-1);
288 auto pos = CmpNLE(src.Int(i), SIMD::Int(0)) & SIMD::Int(1);
289 dst.move(i, neg | pos);
290 }
291 }
292 break;
293 case GLSLstd450Reflect:
294 {
295 auto I = Operand(this, state, insn.word(5));
296 auto N = Operand(this, state, insn.word(6));
297
298 SIMD::Float d = Dot(type.componentCount, I, N);
299
300 for(auto i = 0u; i < type.componentCount; i++)
301 {
302 dst.move(i, I.Float(i) - SIMD::Float(2.0f) * d * N.Float(i));
303 }
304 }
305 break;
306 case GLSLstd450Refract:
307 {
308 auto I = Operand(this, state, insn.word(5));
309 auto N = Operand(this, state, insn.word(6));
310 auto eta = Operand(this, state, insn.word(7));
311
312 SIMD::Float d = Dot(type.componentCount, I, N);
313 SIMD::Float k = SIMD::Float(1.0f) - eta.Float(0) * eta.Float(0) * (SIMD::Float(1.0f) - d * d);
314 SIMD::Int pos = CmpNLT(k, SIMD::Float(0.0f));
315 SIMD::Float t = (eta.Float(0) * d + Sqrt(k));
316
317 for(auto i = 0u; i < type.componentCount; i++)
318 {
319 dst.move(i, pos & As<SIMD::Int>(eta.Float(0) * I.Float(i) - t * N.Float(i)));
320 }
321 }
322 break;
323 case GLSLstd450FaceForward:
324 {
325 auto N = Operand(this, state, insn.word(5));
326 auto I = Operand(this, state, insn.word(6));
327 auto Nref = Operand(this, state, insn.word(7));
328
329 SIMD::Float d = Dot(type.componentCount, I, Nref);
330 SIMD::Int neg = CmpLT(d, SIMD::Float(0.0f));
331
332 for(auto i = 0u; i < type.componentCount; i++)
333 {
334 auto n = N.Float(i);
335 dst.move(i, (neg & As<SIMD::Int>(n)) | (~neg & As<SIMD::Int>(-n)));
336 }
337 }
338 break;
339 case GLSLstd450Length:
340 {
341 auto x = Operand(this, state, insn.word(5));
342 SIMD::Float d = Dot(getObjectType(insn.word(5)).componentCount, x, x);
343
344 dst.move(0, Sqrt(d));
345 }
346 break;
347 case GLSLstd450Normalize:
348 {
349 auto x = Operand(this, state, insn.word(5));
350 SIMD::Float d = Dot(getObjectType(insn.word(5)).componentCount, x, x);
351 SIMD::Float invLength = SIMD::Float(1.0f) / Sqrt(d);
352
353 for(auto i = 0u; i < type.componentCount; i++)
354 {
355 dst.move(i, invLength * x.Float(i));
356 }
357 }
358 break;
359 case GLSLstd450Distance:
360 {
361 auto p0 = Operand(this, state, insn.word(5));
362 auto p1 = Operand(this, state, insn.word(6));
363
364 // sqrt(dot(p0-p1, p0-p1))
365 SIMD::Float d = (p0.Float(0) - p1.Float(0)) * (p0.Float(0) - p1.Float(0));
366
367 for(auto i = 1u; i < p0.componentCount; i++)
368 {
369 d += (p0.Float(i) - p1.Float(i)) * (p0.Float(i) - p1.Float(i));
370 }
371
372 dst.move(0, Sqrt(d));
373 }
374 break;
375 case GLSLstd450Modf:
376 {
377 auto val = Operand(this, state, insn.word(5));
378 auto ptrId = Object::ID(insn.word(6));
379
380 Intermediate whole(type.componentCount);
381
382 for(auto i = 0u; i < type.componentCount; i++)
383 {
384 auto wholeAndFrac = Modf(val.Float(i));
385 dst.move(i, wholeAndFrac.second);
386 whole.move(i, wholeAndFrac.first);
387 }
388
389 Store(ptrId, whole, false, std::memory_order_relaxed, state);
390 }
391 break;
392 case GLSLstd450ModfStruct:
393 {
394 auto val = Operand(this, state, insn.word(5));
395
396 for(auto i = 0u; i < val.componentCount; i++)
397 {
398 auto wholeAndFrac = Modf(val.Float(i));
399 dst.move(i, wholeAndFrac.second);
400 dst.move(val.componentCount + i, wholeAndFrac.first);
401 }
402 }
403 break;
404 case GLSLstd450PackSnorm4x8:
405 {
406 auto val = Operand(this, state, insn.word(5));
407 dst.move(0, (SIMD::Int(Round(Min(Max(val.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
408 SIMD::Int(0xFF)) |
409 ((SIMD::Int(Round(Min(Max(val.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
410 SIMD::Int(0xFF))
411 << 8) |
412 ((SIMD::Int(Round(Min(Max(val.Float(2), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
413 SIMD::Int(0xFF))
414 << 16) |
415 ((SIMD::Int(Round(Min(Max(val.Float(3), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
416 SIMD::Int(0xFF))
417 << 24));
418 }
419 break;
420 case GLSLstd450PackUnorm4x8:
421 {
422 auto val = Operand(this, state, insn.word(5));
423 dst.move(0, (SIMD::UInt(Round(Min(Max(val.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
424 ((SIMD::UInt(Round(Min(Max(val.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
425 ((SIMD::UInt(Round(Min(Max(val.Float(2), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
426 ((SIMD::UInt(Round(Min(Max(val.Float(3), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24));
427 }
428 break;
429 case GLSLstd450PackSnorm2x16:
430 {
431 auto val = Operand(this, state, insn.word(5));
432 dst.move(0, (SIMD::Int(Round(Min(Max(val.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(32767.0f))) &
433 SIMD::Int(0xFFFF)) |
434 ((SIMD::Int(Round(Min(Max(val.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(32767.0f))) &
435 SIMD::Int(0xFFFF))
436 << 16));
437 }
438 break;
439 case GLSLstd450PackUnorm2x16:
440 {
441 auto val = Operand(this, state, insn.word(5));
442 dst.move(0, (SIMD::UInt(Round(Min(Max(val.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(65535.0f))) &
443 SIMD::UInt(0xFFFF)) |
444 ((SIMD::UInt(Round(Min(Max(val.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(65535.0f))) &
445 SIMD::UInt(0xFFFF))
446 << 16));
447 }
448 break;
449 case GLSLstd450PackHalf2x16:
450 {
451 auto val = Operand(this, state, insn.word(5));
452 dst.move(0, floatToHalfBits(val.UInt(0), false) | floatToHalfBits(val.UInt(1), true));
453 }
454 break;
455 case GLSLstd450UnpackSnorm4x8:
456 {
457 auto val = Operand(this, state, insn.word(5));
458 dst.move(0, Min(Max(SIMD::Float(((val.Int(0) << 24) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
459 dst.move(1, Min(Max(SIMD::Float(((val.Int(0) << 16) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
460 dst.move(2, Min(Max(SIMD::Float(((val.Int(0) << 8) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
461 dst.move(3, Min(Max(SIMD::Float(((val.Int(0)) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
462 }
463 break;
464 case GLSLstd450UnpackUnorm4x8:
465 {
466 auto val = Operand(this, state, insn.word(5));
467 dst.move(0, SIMD::Float((val.UInt(0) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
468 dst.move(1, SIMD::Float(((val.UInt(0) >> 8) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
469 dst.move(2, SIMD::Float(((val.UInt(0) >> 16) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
470 dst.move(3, SIMD::Float(((val.UInt(0) >> 24) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
471 }
472 break;
473 case GLSLstd450UnpackSnorm2x16:
474 {
475 auto val = Operand(this, state, insn.word(5));
476 // clamp(f / 32767.0, -1.0, 1.0)
477 dst.move(0, Min(Max(SIMD::Float(As<SIMD::Int>((val.UInt(0) & SIMD::UInt(0x0000FFFF)) << 16)) *
478 SIMD::Float(1.0f / float(0x7FFF0000)),
479 SIMD::Float(-1.0f)),
480 SIMD::Float(1.0f)));
481 dst.move(1, Min(Max(SIMD::Float(As<SIMD::Int>(val.UInt(0) & SIMD::UInt(0xFFFF0000))) * SIMD::Float(1.0f / float(0x7FFF0000)),
482 SIMD::Float(-1.0f)),
483 SIMD::Float(1.0f)));
484 }
485 break;
486 case GLSLstd450UnpackUnorm2x16:
487 {
488 auto val = Operand(this, state, insn.word(5));
489 // f / 65535.0
490 dst.move(0, SIMD::Float((val.UInt(0) & SIMD::UInt(0x0000FFFF)) << 16) * SIMD::Float(1.0f / float(0xFFFF0000)));
491 dst.move(1, SIMD::Float(val.UInt(0) & SIMD::UInt(0xFFFF0000)) * SIMD::Float(1.0f / float(0xFFFF0000)));
492 }
493 break;
494 case GLSLstd450UnpackHalf2x16:
495 {
496 auto val = Operand(this, state, insn.word(5));
497 dst.move(0, halfToFloatBits(val.UInt(0) & SIMD::UInt(0x0000FFFF)));
498 dst.move(1, halfToFloatBits((val.UInt(0) & SIMD::UInt(0xFFFF0000)) >> 16));
499 }
500 break;
501 case GLSLstd450Fma:
502 {
503 auto a = Operand(this, state, insn.word(5));
504 auto b = Operand(this, state, insn.word(6));
505 auto c = Operand(this, state, insn.word(7));
506 for(auto i = 0u; i < type.componentCount; i++)
507 {
508 dst.move(i, FMA(a.Float(i), b.Float(i), c.Float(i)));
509 }
510 }
511 break;
512 case GLSLstd450Frexp:
513 {
514 auto val = Operand(this, state, insn.word(5));
515 auto ptrId = Object::ID(insn.word(6));
516
517 Intermediate exp(type.componentCount);
518
519 for(auto i = 0u; i < type.componentCount; i++)
520 {
521 auto significandAndExponent = Frexp(val.Float(i));
522 dst.move(i, significandAndExponent.first);
523 exp.move(i, significandAndExponent.second);
524 }
525
526 Store(ptrId, exp, false, std::memory_order_relaxed, state);
527 }
528 break;
529 case GLSLstd450FrexpStruct:
530 {
531 auto val = Operand(this, state, insn.word(5));
532
533 for(auto i = 0u; i < val.componentCount; i++)
534 {
535 auto significandAndExponent = Frexp(val.Float(i));
536 dst.move(i, significandAndExponent.first);
537 dst.move(val.componentCount + i, significandAndExponent.second);
538 }
539 }
540 break;
541 case GLSLstd450Ldexp:
542 {
543 auto significand = Operand(this, state, insn.word(5));
544 auto exponent = Operand(this, state, insn.word(6));
545 for(auto i = 0u; i < type.componentCount; i++)
546 {
547 // Assumes IEEE 754
548 auto in = significand.Float(i);
549 auto significandExponent = Exponent(in);
550 auto combinedExponent = exponent.Int(i) + significandExponent;
551 auto isSignificandZero = SIMD::UInt(CmpEQ(significand.Int(i), SIMD::Int(0)));
552 auto isSignificandInf = SIMD::UInt(IsInf(in));
553 auto isSignificandNaN = SIMD::UInt(IsNan(in));
554 auto isExponentNotTooSmall = SIMD::UInt(CmpGE(combinedExponent, SIMD::Int(-126)));
555 auto isExponentNotTooLarge = SIMD::UInt(CmpLE(combinedExponent, SIMD::Int(128)));
556 auto isExponentInBounds = isExponentNotTooSmall & isExponentNotTooLarge;
557
558 SIMD::UInt v;
559 v = significand.UInt(i) & SIMD::UInt(0x7FFFFF); // Add significand.
560 v |= (SIMD::UInt(combinedExponent + SIMD::Int(126)) << SIMD::UInt(23)); // Add exponent.
561 v &= isExponentInBounds; // Clear v if the exponent is OOB.
562
563 v |= significand.UInt(i) & SIMD::UInt(0x80000000); // Add sign bit.
564 v |= ~isExponentNotTooLarge & SIMD::UInt(0x7F800000); // Mark as inf if the exponent is too great.
565
566 // If the input significand is zero, inf or nan, just return the
567 // input significand.
568 auto passthrough = isSignificandZero | isSignificandInf | isSignificandNaN;
569 v = (v & ~passthrough) | (significand.UInt(i) & passthrough);
570
571 dst.move(i, As<SIMD::Float>(v));
572 }
573 }
574 break;
575 case GLSLstd450Radians:
576 {
577 auto degrees = Operand(this, state, insn.word(5));
578 for(auto i = 0u; i < type.componentCount; i++)
579 {
580 dst.move(i, degrees.Float(i) * SIMD::Float(PI / 180.0f));
581 }
582 }
583 break;
584 case GLSLstd450Degrees:
585 {
586 auto radians = Operand(this, state, insn.word(5));
587 for(auto i = 0u; i < type.componentCount; i++)
588 {
589 dst.move(i, radians.Float(i) * SIMD::Float(180.0f / PI));
590 }
591 }
592 break;
593 case GLSLstd450Sin:
594 {
595 auto radians = Operand(this, state, insn.word(5));
596 for(auto i = 0u; i < type.componentCount; i++)
597 {
598 dst.move(i, Sin(radians.Float(i)));
599 }
600 }
601 break;
602 case GLSLstd450Cos:
603 {
604 auto radians = Operand(this, state, insn.word(5));
605 for(auto i = 0u; i < type.componentCount; i++)
606 {
607 dst.move(i, Cos(radians.Float(i)));
608 }
609 }
610 break;
611 case GLSLstd450Tan:
612 {
613 auto radians = Operand(this, state, insn.word(5));
614 for(auto i = 0u; i < type.componentCount; i++)
615 {
616 dst.move(i, Tan(radians.Float(i)));
617 }
618 }
619 break;
620 case GLSLstd450Asin:
621 {
622 auto val = Operand(this, state, insn.word(5));
623 Decorations d;
624 ApplyDecorationsForId(&d, insn.word(5));
625 for(auto i = 0u; i < type.componentCount; i++)
626 {
627 dst.move(i, Asin(val.Float(i), d.RelaxedPrecision ? Precision::Relaxed : Precision::Full));
628 }
629 }
630 break;
631 case GLSLstd450Acos:
632 {
633 auto val = Operand(this, state, insn.word(5));
634 Decorations d;
635 ApplyDecorationsForId(&d, insn.word(5));
636 for(auto i = 0u; i < type.componentCount; i++)
637 {
638 dst.move(i, Acos(val.Float(i), d.RelaxedPrecision ? Precision::Relaxed : Precision::Full));
639 }
640 }
641 break;
642 case GLSLstd450Atan:
643 {
644 auto val = Operand(this, state, insn.word(5));
645 for(auto i = 0u; i < type.componentCount; i++)
646 {
647 dst.move(i, Atan(val.Float(i)));
648 }
649 }
650 break;
651 case GLSLstd450Sinh:
652 {
653 auto val = Operand(this, state, insn.word(5));
654 for(auto i = 0u; i < type.componentCount; i++)
655 {
656 dst.move(i, Sinh(val.Float(i)));
657 }
658 }
659 break;
660 case GLSLstd450Cosh:
661 {
662 auto val = Operand(this, state, insn.word(5));
663 for(auto i = 0u; i < type.componentCount; i++)
664 {
665 dst.move(i, Cosh(val.Float(i)));
666 }
667 }
668 break;
669 case GLSLstd450Tanh:
670 {
671 auto val = Operand(this, state, insn.word(5));
672 for(auto i = 0u; i < type.componentCount; i++)
673 {
674 dst.move(i, Tanh(val.Float(i)));
675 }
676 }
677 break;
678 case GLSLstd450Asinh:
679 {
680 auto val = Operand(this, state, insn.word(5));
681 for(auto i = 0u; i < type.componentCount; i++)
682 {
683 dst.move(i, Asinh(val.Float(i)));
684 }
685 }
686 break;
687 case GLSLstd450Acosh:
688 {
689 auto val = Operand(this, state, insn.word(5));
690 for(auto i = 0u; i < type.componentCount; i++)
691 {
692 dst.move(i, Acosh(val.Float(i)));
693 }
694 }
695 break;
696 case GLSLstd450Atanh:
697 {
698 auto val = Operand(this, state, insn.word(5));
699 for(auto i = 0u; i < type.componentCount; i++)
700 {
701 dst.move(i, Atanh(val.Float(i)));
702 }
703 }
704 break;
705 case GLSLstd450Atan2:
706 {
707 auto x = Operand(this, state, insn.word(5));
708 auto y = Operand(this, state, insn.word(6));
709 for(auto i = 0u; i < type.componentCount; i++)
710 {
711 dst.move(i, Atan2(x.Float(i), y.Float(i)));
712 }
713 }
714 break;
715 case GLSLstd450Pow:
716 {
717 auto x = Operand(this, state, insn.word(5));
718 auto y = Operand(this, state, insn.word(6));
719 for(auto i = 0u; i < type.componentCount; i++)
720 {
721 dst.move(i, Pow(x.Float(i), y.Float(i)));
722 }
723 }
724 break;
725 case GLSLstd450Exp:
726 {
727 auto val = Operand(this, state, insn.word(5));
728 for(auto i = 0u; i < type.componentCount; i++)
729 {
730 dst.move(i, Exp(val.Float(i)));
731 }
732 }
733 break;
734 case GLSLstd450Log:
735 {
736 auto val = Operand(this, state, insn.word(5));
737 for(auto i = 0u; i < type.componentCount; i++)
738 {
739 dst.move(i, Log(val.Float(i)));
740 }
741 }
742 break;
743 case GLSLstd450Exp2:
744 {
745 auto val = Operand(this, state, insn.word(5));
746 for(auto i = 0u; i < type.componentCount; i++)
747 {
748 dst.move(i, Exp2(val.Float(i)));
749 }
750 }
751 break;
752 case GLSLstd450Log2:
753 {
754 auto val = Operand(this, state, insn.word(5));
755 for(auto i = 0u; i < type.componentCount; i++)
756 {
757 dst.move(i, Log2(val.Float(i)));
758 }
759 }
760 break;
761 case GLSLstd450Sqrt:
762 {
763 auto val = Operand(this, state, insn.word(5));
764 for(auto i = 0u; i < type.componentCount; i++)
765 {
766 dst.move(i, Sqrt(val.Float(i)));
767 }
768 }
769 break;
770 case GLSLstd450InverseSqrt:
771 {
772 auto val = Operand(this, state, insn.word(5));
773 Decorations d;
774 ApplyDecorationsForId(&d, insn.word(5));
775
776 for(auto i = 0u; i < type.componentCount; i++)
777 {
778 dst.move(i, RcpSqrt(val.Float(i), d.RelaxedPrecision ? Precision::Relaxed : Precision::Full));
779 }
780 }
781 break;
782 case GLSLstd450Determinant:
783 {
784 auto mat = Operand(this, state, insn.word(5));
785
786 switch(mat.componentCount)
787 {
788 case 4: // 2x2
789 dst.move(0, Determinant(
790 mat.Float(0), mat.Float(1),
791 mat.Float(2), mat.Float(3)));
792 break;
793 case 9: // 3x3
794 dst.move(0, Determinant(
795 mat.Float(0), mat.Float(1), mat.Float(2),
796 mat.Float(3), mat.Float(4), mat.Float(5),
797 mat.Float(6), mat.Float(7), mat.Float(8)));
798 break;
799 case 16: // 4x4
800 dst.move(0, Determinant(
801 mat.Float(0), mat.Float(1), mat.Float(2), mat.Float(3),
802 mat.Float(4), mat.Float(5), mat.Float(6), mat.Float(7),
803 mat.Float(8), mat.Float(9), mat.Float(10), mat.Float(11),
804 mat.Float(12), mat.Float(13), mat.Float(14), mat.Float(15)));
805 break;
806 default:
807 UNREACHABLE("GLSLstd450Determinant can only operate with square matrices. Got %d elements", int(mat.componentCount));
808 }
809 }
810 break;
811 case GLSLstd450MatrixInverse:
812 {
813 auto mat = Operand(this, state, insn.word(5));
814
815 switch(mat.componentCount)
816 {
817 case 4: // 2x2
818 {
819 auto inv = MatrixInverse(
820 mat.Float(0), mat.Float(1),
821 mat.Float(2), mat.Float(3));
822 for(uint32_t i = 0; i < inv.size(); i++)
823 {
824 dst.move(i, inv[i]);
825 }
826 }
827 break;
828 case 9: // 3x3
829 {
830 auto inv = MatrixInverse(
831 mat.Float(0), mat.Float(1), mat.Float(2),
832 mat.Float(3), mat.Float(4), mat.Float(5),
833 mat.Float(6), mat.Float(7), mat.Float(8));
834 for(uint32_t i = 0; i < inv.size(); i++)
835 {
836 dst.move(i, inv[i]);
837 }
838 }
839 break;
840 case 16: // 4x4
841 {
842 auto inv = MatrixInverse(
843 mat.Float(0), mat.Float(1), mat.Float(2), mat.Float(3),
844 mat.Float(4), mat.Float(5), mat.Float(6), mat.Float(7),
845 mat.Float(8), mat.Float(9), mat.Float(10), mat.Float(11),
846 mat.Float(12), mat.Float(13), mat.Float(14), mat.Float(15));
847 for(uint32_t i = 0; i < inv.size(); i++)
848 {
849 dst.move(i, inv[i]);
850 }
851 }
852 break;
853 default:
854 UNREACHABLE("GLSLstd450MatrixInverse can only operate with square matrices. Got %d elements", int(mat.componentCount));
855 }
856 }
857 break;
858 case GLSLstd450IMix:
859 {
860 UNREACHABLE("GLSLstd450IMix has been removed from the specification");
861 }
862 break;
863 case GLSLstd450PackDouble2x32:
864 {
865 UNSUPPORTED("SPIR-V Float64 Capability (GLSLstd450PackDouble2x32)");
866 }
867 break;
868 case GLSLstd450UnpackDouble2x32:
869 {
870 UNSUPPORTED("SPIR-V Float64 Capability (GLSLstd450UnpackDouble2x32)");
871 }
872 break;
873 case GLSLstd450FindILsb:
874 {
875 auto val = Operand(this, state, insn.word(5));
876 for(auto i = 0u; i < type.componentCount; i++)
877 {
878 auto v = val.UInt(i);
879 dst.move(i, Cttz(v, true) | CmpEQ(v, SIMD::UInt(0)));
880 }
881 }
882 break;
883 case GLSLstd450FindSMsb:
884 {
885 auto val = Operand(this, state, insn.word(5));
886 for(auto i = 0u; i < type.componentCount; i++)
887 {
888 auto v = val.UInt(i) ^ As<SIMD::UInt>(CmpLT(val.Int(i), SIMD::Int(0)));
889 dst.move(i, SIMD::UInt(31) - Ctlz(v, false));
890 }
891 }
892 break;
893 case GLSLstd450FindUMsb:
894 {
895 auto val = Operand(this, state, insn.word(5));
896 for(auto i = 0u; i < type.componentCount; i++)
897 {
898 dst.move(i, SIMD::UInt(31) - Ctlz(val.UInt(i), false));
899 }
900 }
901 break;
902 case GLSLstd450InterpolateAtCentroid:
903 {
904 Decorations d;
905 ApplyDecorationsForId(&d, insn.word(5));
906 auto ptr = state->getPointer(insn.word(5));
907 for(auto i = 0u; i < type.componentCount; i++)
908 {
909 dst.move(i, Interpolate(ptr, d.Location, 0, i, state, SpirvShader::Centroid));
910 }
911 }
912 break;
913 case GLSLstd450InterpolateAtSample:
914 {
915 Decorations d;
916 ApplyDecorationsForId(&d, insn.word(5));
917 auto ptr = state->getPointer(insn.word(5));
918 for(auto i = 0u; i < type.componentCount; i++)
919 {
920 dst.move(i, Interpolate(ptr, d.Location, insn.word(6), i, state, SpirvShader::AtSample));
921 }
922 }
923 break;
924 case GLSLstd450InterpolateAtOffset:
925 {
926 Decorations d;
927 ApplyDecorationsForId(&d, insn.word(5));
928 auto ptr = state->getPointer(insn.word(5));
929 for(auto i = 0u; i < type.componentCount; i++)
930 {
931 dst.move(i, Interpolate(ptr, d.Location, insn.word(6), i, state, SpirvShader::AtOffset));
932 }
933 }
934 break;
935 case GLSLstd450NMin:
936 {
937 auto x = Operand(this, state, insn.word(5));
938 auto y = Operand(this, state, insn.word(6));
939 for(auto i = 0u; i < type.componentCount; i++)
940 {
941 dst.move(i, NMin(x.Float(i), y.Float(i)));
942 }
943 }
944 break;
945 case GLSLstd450NMax:
946 {
947 auto x = Operand(this, state, insn.word(5));
948 auto y = Operand(this, state, insn.word(6));
949 for(auto i = 0u; i < type.componentCount; i++)
950 {
951 dst.move(i, NMax(x.Float(i), y.Float(i)));
952 }
953 }
954 break;
955 case GLSLstd450NClamp:
956 {
957 auto x = Operand(this, state, insn.word(5));
958 auto minVal = Operand(this, state, insn.word(6));
959 auto maxVal = Operand(this, state, insn.word(7));
960 for(auto i = 0u; i < type.componentCount; i++)
961 {
962 auto clamp = NMin(NMax(x.Float(i), minVal.Float(i)), maxVal.Float(i));
963 dst.move(i, clamp);
964 }
965 }
966 break;
967 default:
968 UNREACHABLE("ExtInst %d", int(extInstIndex));
969 break;
970 }
971
972 return EmitResult::Continue;
973 }
974
Interpolate(SIMD::Pointer const & ptr,int32_t location,Object::ID paramId,uint32_t component,EmitState * state,InterpolationType type) const975 SIMD::Float SpirvShader::Interpolate(SIMD::Pointer const &ptr, int32_t location, Object::ID paramId,
976 uint32_t component, EmitState *state, InterpolationType type) const
977 {
978 uint32_t interpolant = (location * 4);
979 uint32_t components_per_row = GetNumInputComponents(location);
980 if((location < 0) || (interpolant >= inputs.size()) || (components_per_row == 0))
981 {
982 return SIMD::Float(0.0f);
983 }
984
985 const auto &interpolationData = state->routine->interpolationData;
986
987 SIMD::Float x;
988 SIMD::Float y;
989 SIMD::Float rhw;
990
991 switch(type)
992 {
993 case Centroid:
994 x = interpolationData.xCentroid;
995 y = interpolationData.yCentroid;
996 rhw = interpolationData.rhwCentroid;
997 break;
998 case AtSample:
999 x = SIMD::Float(0.0f);
1000 y = SIMD::Float(0.0f);
1001
1002 if(state->getMultiSampleCount() > 1)
1003 {
1004 static constexpr int NUM_SAMPLES = 4;
1005 ASSERT(state->getMultiSampleCount() == NUM_SAMPLES);
1006
1007 Array<Float> sampleX(NUM_SAMPLES);
1008 Array<Float> sampleY(NUM_SAMPLES);
1009 for(int i = 0; i < NUM_SAMPLES; ++i)
1010 {
1011 sampleX[i] = Constants::SampleLocationsX[i];
1012 sampleY[i] = Constants::SampleLocationsY[i];
1013 }
1014
1015 auto sampleOperand = Operand(this, state, paramId);
1016 ASSERT(sampleOperand.componentCount == 1);
1017
1018 // If sample does not exist, the position used to interpolate the
1019 // input variable is undefined, so we just clamp to avoid OOB accesses.
1020 SIMD::Int samples = sampleOperand.Int(0) & SIMD::Int(NUM_SAMPLES - 1);
1021
1022 for(int i = 0; i < SIMD::Width; ++i)
1023 {
1024 Int sample = Extract(samples, i);
1025 x = Insert(x, sampleX[sample], i);
1026 y = Insert(y, sampleY[sample], i);
1027 }
1028 }
1029
1030 x += interpolationData.x;
1031 y += interpolationData.y;
1032 rhw = interpolationData.rhw;
1033 break;
1034 case AtOffset:
1035 {
1036 // An offset of (0, 0) identifies the center of the pixel.
1037 auto offset = Operand(this, state, paramId);
1038 ASSERT(offset.componentCount == 2);
1039
1040 x = interpolationData.x + offset.Float(0);
1041 y = interpolationData.y + offset.Float(1);
1042 rhw = interpolationData.rhw;
1043 }
1044 break;
1045 default:
1046 UNREACHABLE("Unknown interpolation type: %d", (int)type);
1047 return SIMD::Float(0.0f);
1048 }
1049
1050 uint32_t packedInterpolant = GetPackedInterpolant(location);
1051 Pointer<Byte> planeEquation = interpolationData.primitive + OFFSET(Primitive, V[packedInterpolant]);
1052 if(ptr.hasDynamicOffsets)
1053 {
1054 // This code assumes all dynamic offsets are equal
1055 Int offset = ((Extract(ptr.dynamicOffsets, 0) + ptr.staticOffsets[0]) >> 2) + component;
1056 offset = Min(offset, Int(inputs.size() - interpolant - 1));
1057 planeEquation += (offset * sizeof(PlaneEquation));
1058 }
1059 else
1060 {
1061 ASSERT(ptr.hasStaticEqualOffsets());
1062
1063 uint32_t offset = (ptr.staticOffsets[0] >> 2) + component;
1064 if((interpolant + offset) >= inputs.size())
1065 {
1066 return SIMD::Float(0.0f);
1067 }
1068 planeEquation += offset * sizeof(PlaneEquation);
1069 }
1070
1071 return SpirvRoutine::interpolateAtXY(x, y, rhw, planeEquation, false, true);
1072 }
1073
interpolateAtXY(const SIMD::Float & x,const SIMD::Float & y,const SIMD::Float & rhw,Pointer<Byte> planeEquation,bool flat,bool perspective)1074 SIMD::Float SpirvRoutine::interpolateAtXY(const SIMD::Float &x, const SIMD::Float &y, const SIMD::Float &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective)
1075 {
1076 SIMD::Float A;
1077 SIMD::Float B;
1078 SIMD::Float C = *Pointer<SIMD::Float>(planeEquation + OFFSET(PlaneEquation, C), 16);
1079
1080 if(!flat)
1081 {
1082 A = *Pointer<SIMD::Float>(planeEquation + OFFSET(PlaneEquation, A), 16);
1083 B = *Pointer<SIMD::Float>(planeEquation + OFFSET(PlaneEquation, B), 16);
1084 }
1085
1086 return ::Interpolate(x, y, rhw, A, B, C, flat, perspective);
1087 }
1088
1089 } // namespace sw