• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "SpirvShader.hpp"
16 
17 #include "ShaderCore.hpp"
18 #include "Device/Primitive.hpp"
19 #include "Pipeline/Constants.hpp"
20 
21 #include <spirv/unified1/GLSL.std.450.h>
22 #include <spirv/unified1/spirv.hpp>
23 
24 namespace {
25 constexpr float PI = 3.141592653589793f;
26 
Interpolate(const sw::SIMD::Float & x,const sw::SIMD::Float & y,const sw::SIMD::Float & rhw,const sw::SIMD::Float & A,const sw::SIMD::Float & B,const sw::SIMD::Float & C,bool flat,bool perspective)27 sw::SIMD::Float Interpolate(const sw::SIMD::Float &x, const sw::SIMD::Float &y, const sw::SIMD::Float &rhw,
28                             const sw::SIMD::Float &A, const sw::SIMD::Float &B, const sw::SIMD::Float &C,
29                             bool flat, bool perspective)
30 {
31 	sw::SIMD::Float interpolant = C;
32 
33 	if(!flat)
34 	{
35 		interpolant += x * A + y * B;
36 
37 		if(perspective)
38 		{
39 			interpolant *= rhw;
40 		}
41 	}
42 
43 	return interpolant;
44 }
45 
46 }  // namespace
47 
48 namespace sw {
49 
EmitExtGLSLstd450(InsnIterator insn,EmitState * state) const50 SpirvShader::EmitResult SpirvShader::EmitExtGLSLstd450(InsnIterator insn, EmitState *state) const
51 {
52 	auto &type = getType(insn.resultTypeId());
53 	auto &dst = state->createIntermediate(insn.resultId(), type.componentCount);
54 	auto extInstIndex = static_cast<GLSLstd450>(insn.word(4));
55 
56 	switch(extInstIndex)
57 	{
58 	case GLSLstd450FAbs:
59 		{
60 			auto src = Operand(this, state, insn.word(5));
61 			for(auto i = 0u; i < type.componentCount; i++)
62 			{
63 				dst.move(i, Abs(src.Float(i)));
64 			}
65 		}
66 		break;
67 	case GLSLstd450SAbs:
68 		{
69 			auto src = Operand(this, state, insn.word(5));
70 			for(auto i = 0u; i < type.componentCount; i++)
71 			{
72 				dst.move(i, Abs(src.Int(i)));
73 			}
74 		}
75 		break;
76 	case GLSLstd450Cross:
77 		{
78 			auto lhs = Operand(this, state, insn.word(5));
79 			auto rhs = Operand(this, state, insn.word(6));
80 			dst.move(0, lhs.Float(1) * rhs.Float(2) - rhs.Float(1) * lhs.Float(2));
81 			dst.move(1, lhs.Float(2) * rhs.Float(0) - rhs.Float(2) * lhs.Float(0));
82 			dst.move(2, lhs.Float(0) * rhs.Float(1) - rhs.Float(0) * lhs.Float(1));
83 		}
84 		break;
85 	case GLSLstd450Floor:
86 		{
87 			auto src = Operand(this, state, insn.word(5));
88 			for(auto i = 0u; i < type.componentCount; i++)
89 			{
90 				dst.move(i, Floor(src.Float(i)));
91 			}
92 		}
93 		break;
94 	case GLSLstd450Trunc:
95 		{
96 			auto src = Operand(this, state, insn.word(5));
97 			for(auto i = 0u; i < type.componentCount; i++)
98 			{
99 				dst.move(i, Trunc(src.Float(i)));
100 			}
101 		}
102 		break;
103 	case GLSLstd450Ceil:
104 		{
105 			auto src = Operand(this, state, insn.word(5));
106 			for(auto i = 0u; i < type.componentCount; i++)
107 			{
108 				dst.move(i, Ceil(src.Float(i)));
109 			}
110 		}
111 		break;
112 	case GLSLstd450Fract:
113 		{
114 			auto src = Operand(this, state, insn.word(5));
115 			for(auto i = 0u; i < type.componentCount; i++)
116 			{
117 				dst.move(i, Frac(src.Float(i)));
118 			}
119 		}
120 		break;
121 	case GLSLstd450Round:
122 		{
123 			auto src = Operand(this, state, insn.word(5));
124 			for(auto i = 0u; i < type.componentCount; i++)
125 			{
126 				dst.move(i, Round(src.Float(i)));
127 			}
128 		}
129 		break;
130 	case GLSLstd450RoundEven:
131 		{
132 			auto src = Operand(this, state, insn.word(5));
133 			for(auto i = 0u; i < type.componentCount; i++)
134 			{
135 				auto x = Round(src.Float(i));
136 				// dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src));
137 				dst.move(i, x + ((SIMD::Float(CmpLT(x, src.Float(i)) & SIMD::Int(1)) * SIMD::Float(2.0f)) - SIMD::Float(1.0f)) *
138 				                    SIMD::Float(CmpEQ(Frac(src.Float(i)), SIMD::Float(0.5f)) & SIMD::Int(1)) * SIMD::Float(Int4(x) & SIMD::Int(1)));
139 			}
140 		}
141 		break;
142 	case GLSLstd450FMin:
143 		{
144 			auto lhs = Operand(this, state, insn.word(5));
145 			auto rhs = Operand(this, state, insn.word(6));
146 			for(auto i = 0u; i < type.componentCount; i++)
147 			{
148 				dst.move(i, Min(lhs.Float(i), rhs.Float(i)));
149 			}
150 		}
151 		break;
152 	case GLSLstd450FMax:
153 		{
154 			auto lhs = Operand(this, state, insn.word(5));
155 			auto rhs = Operand(this, state, insn.word(6));
156 			for(auto i = 0u; i < type.componentCount; i++)
157 			{
158 				dst.move(i, Max(lhs.Float(i), rhs.Float(i)));
159 			}
160 		}
161 		break;
162 	case GLSLstd450SMin:
163 		{
164 			auto lhs = Operand(this, state, insn.word(5));
165 			auto rhs = Operand(this, state, insn.word(6));
166 			for(auto i = 0u; i < type.componentCount; i++)
167 			{
168 				dst.move(i, Min(lhs.Int(i), rhs.Int(i)));
169 			}
170 		}
171 		break;
172 	case GLSLstd450SMax:
173 		{
174 			auto lhs = Operand(this, state, insn.word(5));
175 			auto rhs = Operand(this, state, insn.word(6));
176 			for(auto i = 0u; i < type.componentCount; i++)
177 			{
178 				dst.move(i, Max(lhs.Int(i), rhs.Int(i)));
179 			}
180 		}
181 		break;
182 	case GLSLstd450UMin:
183 		{
184 			auto lhs = Operand(this, state, insn.word(5));
185 			auto rhs = Operand(this, state, insn.word(6));
186 			for(auto i = 0u; i < type.componentCount; i++)
187 			{
188 				dst.move(i, Min(lhs.UInt(i), rhs.UInt(i)));
189 			}
190 		}
191 		break;
192 	case GLSLstd450UMax:
193 		{
194 			auto lhs = Operand(this, state, insn.word(5));
195 			auto rhs = Operand(this, state, insn.word(6));
196 			for(auto i = 0u; i < type.componentCount; i++)
197 			{
198 				dst.move(i, Max(lhs.UInt(i), rhs.UInt(i)));
199 			}
200 		}
201 		break;
202 	case GLSLstd450Step:
203 		{
204 			auto edge = Operand(this, state, insn.word(5));
205 			auto x = Operand(this, state, insn.word(6));
206 			for(auto i = 0u; i < type.componentCount; i++)
207 			{
208 				dst.move(i, CmpNLT(x.Float(i), edge.Float(i)) & As<SIMD::Int>(SIMD::Float(1.0f)));
209 			}
210 		}
211 		break;
212 	case GLSLstd450SmoothStep:
213 		{
214 			auto edge0 = Operand(this, state, insn.word(5));
215 			auto edge1 = Operand(this, state, insn.word(6));
216 			auto x = Operand(this, state, insn.word(7));
217 			for(auto i = 0u; i < type.componentCount; i++)
218 			{
219 				auto tx = Min(Max((x.Float(i) - edge0.Float(i)) /
220 				                      (edge1.Float(i) - edge0.Float(i)),
221 				                  SIMD::Float(0.0f)),
222 				              SIMD::Float(1.0f));
223 				dst.move(i, tx * tx * (Float4(3.0f) - Float4(2.0f) * tx));
224 			}
225 		}
226 		break;
227 	case GLSLstd450FMix:
228 		{
229 			auto x = Operand(this, state, insn.word(5));
230 			auto y = Operand(this, state, insn.word(6));
231 			auto a = Operand(this, state, insn.word(7));
232 			for(auto i = 0u; i < type.componentCount; i++)
233 			{
234 				dst.move(i, a.Float(i) * (y.Float(i) - x.Float(i)) + x.Float(i));
235 			}
236 		}
237 		break;
238 	case GLSLstd450FClamp:
239 		{
240 			auto x = Operand(this, state, insn.word(5));
241 			auto minVal = Operand(this, state, insn.word(6));
242 			auto maxVal = Operand(this, state, insn.word(7));
243 			for(auto i = 0u; i < type.componentCount; i++)
244 			{
245 				dst.move(i, Min(Max(x.Float(i), minVal.Float(i)), maxVal.Float(i)));
246 			}
247 		}
248 		break;
249 	case GLSLstd450SClamp:
250 		{
251 			auto x = Operand(this, state, insn.word(5));
252 			auto minVal = Operand(this, state, insn.word(6));
253 			auto maxVal = Operand(this, state, insn.word(7));
254 			for(auto i = 0u; i < type.componentCount; i++)
255 			{
256 				dst.move(i, Min(Max(x.Int(i), minVal.Int(i)), maxVal.Int(i)));
257 			}
258 		}
259 		break;
260 	case GLSLstd450UClamp:
261 		{
262 			auto x = Operand(this, state, insn.word(5));
263 			auto minVal = Operand(this, state, insn.word(6));
264 			auto maxVal = Operand(this, state, insn.word(7));
265 			for(auto i = 0u; i < type.componentCount; i++)
266 			{
267 				dst.move(i, Min(Max(x.UInt(i), minVal.UInt(i)), maxVal.UInt(i)));
268 			}
269 		}
270 		break;
271 	case GLSLstd450FSign:
272 		{
273 			auto src = Operand(this, state, insn.word(5));
274 			for(auto i = 0u; i < type.componentCount; i++)
275 			{
276 				auto neg = As<SIMD::Int>(CmpLT(src.Float(i), SIMD::Float(-0.0f))) & As<SIMD::Int>(SIMD::Float(-1.0f));
277 				auto pos = As<SIMD::Int>(CmpNLE(src.Float(i), SIMD::Float(+0.0f))) & As<SIMD::Int>(SIMD::Float(1.0f));
278 				dst.move(i, neg | pos);
279 			}
280 		}
281 		break;
282 	case GLSLstd450SSign:
283 		{
284 			auto src = Operand(this, state, insn.word(5));
285 			for(auto i = 0u; i < type.componentCount; i++)
286 			{
287 				auto neg = CmpLT(src.Int(i), SIMD::Int(0)) & SIMD::Int(-1);
288 				auto pos = CmpNLE(src.Int(i), SIMD::Int(0)) & SIMD::Int(1);
289 				dst.move(i, neg | pos);
290 			}
291 		}
292 		break;
293 	case GLSLstd450Reflect:
294 		{
295 			auto I = Operand(this, state, insn.word(5));
296 			auto N = Operand(this, state, insn.word(6));
297 
298 			SIMD::Float d = Dot(type.componentCount, I, N);
299 
300 			for(auto i = 0u; i < type.componentCount; i++)
301 			{
302 				dst.move(i, I.Float(i) - SIMD::Float(2.0f) * d * N.Float(i));
303 			}
304 		}
305 		break;
306 	case GLSLstd450Refract:
307 		{
308 			auto I = Operand(this, state, insn.word(5));
309 			auto N = Operand(this, state, insn.word(6));
310 			auto eta = Operand(this, state, insn.word(7));
311 
312 			SIMD::Float d = Dot(type.componentCount, I, N);
313 			SIMD::Float k = SIMD::Float(1.0f) - eta.Float(0) * eta.Float(0) * (SIMD::Float(1.0f) - d * d);
314 			SIMD::Int pos = CmpNLT(k, SIMD::Float(0.0f));
315 			SIMD::Float t = (eta.Float(0) * d + Sqrt(k));
316 
317 			for(auto i = 0u; i < type.componentCount; i++)
318 			{
319 				dst.move(i, pos & As<SIMD::Int>(eta.Float(0) * I.Float(i) - t * N.Float(i)));
320 			}
321 		}
322 		break;
323 	case GLSLstd450FaceForward:
324 		{
325 			auto N = Operand(this, state, insn.word(5));
326 			auto I = Operand(this, state, insn.word(6));
327 			auto Nref = Operand(this, state, insn.word(7));
328 
329 			SIMD::Float d = Dot(type.componentCount, I, Nref);
330 			SIMD::Int neg = CmpLT(d, SIMD::Float(0.0f));
331 
332 			for(auto i = 0u; i < type.componentCount; i++)
333 			{
334 				auto n = N.Float(i);
335 				dst.move(i, (neg & As<SIMD::Int>(n)) | (~neg & As<SIMD::Int>(-n)));
336 			}
337 		}
338 		break;
339 	case GLSLstd450Length:
340 		{
341 			auto x = Operand(this, state, insn.word(5));
342 			SIMD::Float d = Dot(getObjectType(insn.word(5)).componentCount, x, x);
343 
344 			dst.move(0, Sqrt(d));
345 		}
346 		break;
347 	case GLSLstd450Normalize:
348 		{
349 			auto x = Operand(this, state, insn.word(5));
350 			SIMD::Float d = Dot(getObjectType(insn.word(5)).componentCount, x, x);
351 			SIMD::Float invLength = SIMD::Float(1.0f) / Sqrt(d);
352 
353 			for(auto i = 0u; i < type.componentCount; i++)
354 			{
355 				dst.move(i, invLength * x.Float(i));
356 			}
357 		}
358 		break;
359 	case GLSLstd450Distance:
360 		{
361 			auto p0 = Operand(this, state, insn.word(5));
362 			auto p1 = Operand(this, state, insn.word(6));
363 
364 			// sqrt(dot(p0-p1, p0-p1))
365 			SIMD::Float d = (p0.Float(0) - p1.Float(0)) * (p0.Float(0) - p1.Float(0));
366 
367 			for(auto i = 1u; i < p0.componentCount; i++)
368 			{
369 				d += (p0.Float(i) - p1.Float(i)) * (p0.Float(i) - p1.Float(i));
370 			}
371 
372 			dst.move(0, Sqrt(d));
373 		}
374 		break;
375 	case GLSLstd450Modf:
376 		{
377 			auto val = Operand(this, state, insn.word(5));
378 			auto ptrId = Object::ID(insn.word(6));
379 
380 			Intermediate whole(type.componentCount);
381 
382 			for(auto i = 0u; i < type.componentCount; i++)
383 			{
384 				auto wholeAndFrac = Modf(val.Float(i));
385 				dst.move(i, wholeAndFrac.second);
386 				whole.move(i, wholeAndFrac.first);
387 			}
388 
389 			Store(ptrId, whole, false, std::memory_order_relaxed, state);
390 		}
391 		break;
392 	case GLSLstd450ModfStruct:
393 		{
394 			auto val = Operand(this, state, insn.word(5));
395 
396 			for(auto i = 0u; i < val.componentCount; i++)
397 			{
398 				auto wholeAndFrac = Modf(val.Float(i));
399 				dst.move(i, wholeAndFrac.second);
400 				dst.move(val.componentCount + i, wholeAndFrac.first);
401 			}
402 		}
403 		break;
404 	case GLSLstd450PackSnorm4x8:
405 		{
406 			auto val = Operand(this, state, insn.word(5));
407 			dst.move(0, (SIMD::Int(Round(Min(Max(val.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
408 			             SIMD::Int(0xFF)) |
409 			                ((SIMD::Int(Round(Min(Max(val.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
410 			                  SIMD::Int(0xFF))
411 			                 << 8) |
412 			                ((SIMD::Int(Round(Min(Max(val.Float(2), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
413 			                  SIMD::Int(0xFF))
414 			                 << 16) |
415 			                ((SIMD::Int(Round(Min(Max(val.Float(3), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
416 			                  SIMD::Int(0xFF))
417 			                 << 24));
418 		}
419 		break;
420 	case GLSLstd450PackUnorm4x8:
421 		{
422 			auto val = Operand(this, state, insn.word(5));
423 			dst.move(0, (SIMD::UInt(Round(Min(Max(val.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
424 			                ((SIMD::UInt(Round(Min(Max(val.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
425 			                ((SIMD::UInt(Round(Min(Max(val.Float(2), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
426 			                ((SIMD::UInt(Round(Min(Max(val.Float(3), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24));
427 		}
428 		break;
429 	case GLSLstd450PackSnorm2x16:
430 		{
431 			auto val = Operand(this, state, insn.word(5));
432 			dst.move(0, (SIMD::Int(Round(Min(Max(val.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(32767.0f))) &
433 			             SIMD::Int(0xFFFF)) |
434 			                ((SIMD::Int(Round(Min(Max(val.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(32767.0f))) &
435 			                  SIMD::Int(0xFFFF))
436 			                 << 16));
437 		}
438 		break;
439 	case GLSLstd450PackUnorm2x16:
440 		{
441 			auto val = Operand(this, state, insn.word(5));
442 			dst.move(0, (SIMD::UInt(Round(Min(Max(val.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(65535.0f))) &
443 			             SIMD::UInt(0xFFFF)) |
444 			                ((SIMD::UInt(Round(Min(Max(val.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(65535.0f))) &
445 			                  SIMD::UInt(0xFFFF))
446 			                 << 16));
447 		}
448 		break;
449 	case GLSLstd450PackHalf2x16:
450 		{
451 			auto val = Operand(this, state, insn.word(5));
452 			dst.move(0, floatToHalfBits(val.UInt(0), false) | floatToHalfBits(val.UInt(1), true));
453 		}
454 		break;
455 	case GLSLstd450UnpackSnorm4x8:
456 		{
457 			auto val = Operand(this, state, insn.word(5));
458 			dst.move(0, Min(Max(SIMD::Float(((val.Int(0) << 24) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
459 			dst.move(1, Min(Max(SIMD::Float(((val.Int(0) << 16) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
460 			dst.move(2, Min(Max(SIMD::Float(((val.Int(0) << 8) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
461 			dst.move(3, Min(Max(SIMD::Float(((val.Int(0)) & SIMD::Int(0xFF000000))) * SIMD::Float(1.0f / float(0x7f000000)), SIMD::Float(-1.0f)), SIMD::Float(1.0f)));
462 		}
463 		break;
464 	case GLSLstd450UnpackUnorm4x8:
465 		{
466 			auto val = Operand(this, state, insn.word(5));
467 			dst.move(0, SIMD::Float((val.UInt(0) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
468 			dst.move(1, SIMD::Float(((val.UInt(0) >> 8) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
469 			dst.move(2, SIMD::Float(((val.UInt(0) >> 16) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
470 			dst.move(3, SIMD::Float(((val.UInt(0) >> 24) & SIMD::UInt(0xFF))) * SIMD::Float(1.0f / 255.f));
471 		}
472 		break;
473 	case GLSLstd450UnpackSnorm2x16:
474 		{
475 			auto val = Operand(this, state, insn.word(5));
476 			// clamp(f / 32767.0, -1.0, 1.0)
477 			dst.move(0, Min(Max(SIMD::Float(As<SIMD::Int>((val.UInt(0) & SIMD::UInt(0x0000FFFF)) << 16)) *
478 			                        SIMD::Float(1.0f / float(0x7FFF0000)),
479 			                    SIMD::Float(-1.0f)),
480 			                SIMD::Float(1.0f)));
481 			dst.move(1, Min(Max(SIMD::Float(As<SIMD::Int>(val.UInt(0) & SIMD::UInt(0xFFFF0000))) * SIMD::Float(1.0f / float(0x7FFF0000)),
482 			                    SIMD::Float(-1.0f)),
483 			                SIMD::Float(1.0f)));
484 		}
485 		break;
486 	case GLSLstd450UnpackUnorm2x16:
487 		{
488 			auto val = Operand(this, state, insn.word(5));
489 			// f / 65535.0
490 			dst.move(0, SIMD::Float((val.UInt(0) & SIMD::UInt(0x0000FFFF)) << 16) * SIMD::Float(1.0f / float(0xFFFF0000)));
491 			dst.move(1, SIMD::Float(val.UInt(0) & SIMD::UInt(0xFFFF0000)) * SIMD::Float(1.0f / float(0xFFFF0000)));
492 		}
493 		break;
494 	case GLSLstd450UnpackHalf2x16:
495 		{
496 			auto val = Operand(this, state, insn.word(5));
497 			dst.move(0, halfToFloatBits(val.UInt(0) & SIMD::UInt(0x0000FFFF)));
498 			dst.move(1, halfToFloatBits((val.UInt(0) & SIMD::UInt(0xFFFF0000)) >> 16));
499 		}
500 		break;
501 	case GLSLstd450Fma:
502 		{
503 			auto a = Operand(this, state, insn.word(5));
504 			auto b = Operand(this, state, insn.word(6));
505 			auto c = Operand(this, state, insn.word(7));
506 			for(auto i = 0u; i < type.componentCount; i++)
507 			{
508 				dst.move(i, FMA(a.Float(i), b.Float(i), c.Float(i)));
509 			}
510 		}
511 		break;
512 	case GLSLstd450Frexp:
513 		{
514 			auto val = Operand(this, state, insn.word(5));
515 			auto ptrId = Object::ID(insn.word(6));
516 
517 			Intermediate exp(type.componentCount);
518 
519 			for(auto i = 0u; i < type.componentCount; i++)
520 			{
521 				auto significandAndExponent = Frexp(val.Float(i));
522 				dst.move(i, significandAndExponent.first);
523 				exp.move(i, significandAndExponent.second);
524 			}
525 
526 			Store(ptrId, exp, false, std::memory_order_relaxed, state);
527 		}
528 		break;
529 	case GLSLstd450FrexpStruct:
530 		{
531 			auto val = Operand(this, state, insn.word(5));
532 
533 			for(auto i = 0u; i < val.componentCount; i++)
534 			{
535 				auto significandAndExponent = Frexp(val.Float(i));
536 				dst.move(i, significandAndExponent.first);
537 				dst.move(val.componentCount + i, significandAndExponent.second);
538 			}
539 		}
540 		break;
541 	case GLSLstd450Ldexp:
542 		{
543 			auto significand = Operand(this, state, insn.word(5));
544 			auto exponent = Operand(this, state, insn.word(6));
545 			for(auto i = 0u; i < type.componentCount; i++)
546 			{
547 				// Assumes IEEE 754
548 				auto in = significand.Float(i);
549 				auto significandExponent = Exponent(in);
550 				auto combinedExponent = exponent.Int(i) + significandExponent;
551 				auto isSignificandZero = SIMD::UInt(CmpEQ(significand.Int(i), SIMD::Int(0)));
552 				auto isSignificandInf = SIMD::UInt(IsInf(in));
553 				auto isSignificandNaN = SIMD::UInt(IsNan(in));
554 				auto isExponentNotTooSmall = SIMD::UInt(CmpGE(combinedExponent, SIMD::Int(-126)));
555 				auto isExponentNotTooLarge = SIMD::UInt(CmpLE(combinedExponent, SIMD::Int(128)));
556 				auto isExponentInBounds = isExponentNotTooSmall & isExponentNotTooLarge;
557 
558 				SIMD::UInt v;
559 				v = significand.UInt(i) & SIMD::UInt(0x7FFFFF);                          // Add significand.
560 				v |= (SIMD::UInt(combinedExponent + SIMD::Int(126)) << SIMD::UInt(23));  // Add exponent.
561 				v &= isExponentInBounds;                                                 // Clear v if the exponent is OOB.
562 
563 				v |= significand.UInt(i) & SIMD::UInt(0x80000000);     // Add sign bit.
564 				v |= ~isExponentNotTooLarge & SIMD::UInt(0x7F800000);  // Mark as inf if the exponent is too great.
565 
566 				// If the input significand is zero, inf or nan, just return the
567 				// input significand.
568 				auto passthrough = isSignificandZero | isSignificandInf | isSignificandNaN;
569 				v = (v & ~passthrough) | (significand.UInt(i) & passthrough);
570 
571 				dst.move(i, As<SIMD::Float>(v));
572 			}
573 		}
574 		break;
575 	case GLSLstd450Radians:
576 		{
577 			auto degrees = Operand(this, state, insn.word(5));
578 			for(auto i = 0u; i < type.componentCount; i++)
579 			{
580 				dst.move(i, degrees.Float(i) * SIMD::Float(PI / 180.0f));
581 			}
582 		}
583 		break;
584 	case GLSLstd450Degrees:
585 		{
586 			auto radians = Operand(this, state, insn.word(5));
587 			for(auto i = 0u; i < type.componentCount; i++)
588 			{
589 				dst.move(i, radians.Float(i) * SIMD::Float(180.0f / PI));
590 			}
591 		}
592 		break;
593 	case GLSLstd450Sin:
594 		{
595 			auto radians = Operand(this, state, insn.word(5));
596 			for(auto i = 0u; i < type.componentCount; i++)
597 			{
598 				dst.move(i, Sin(radians.Float(i)));
599 			}
600 		}
601 		break;
602 	case GLSLstd450Cos:
603 		{
604 			auto radians = Operand(this, state, insn.word(5));
605 			for(auto i = 0u; i < type.componentCount; i++)
606 			{
607 				dst.move(i, Cos(radians.Float(i)));
608 			}
609 		}
610 		break;
611 	case GLSLstd450Tan:
612 		{
613 			auto radians = Operand(this, state, insn.word(5));
614 			for(auto i = 0u; i < type.componentCount; i++)
615 			{
616 				dst.move(i, Tan(radians.Float(i)));
617 			}
618 		}
619 		break;
620 	case GLSLstd450Asin:
621 		{
622 			auto val = Operand(this, state, insn.word(5));
623 			Decorations d;
624 			ApplyDecorationsForId(&d, insn.word(5));
625 			for(auto i = 0u; i < type.componentCount; i++)
626 			{
627 				dst.move(i, Asin(val.Float(i), d.RelaxedPrecision ? Precision::Relaxed : Precision::Full));
628 			}
629 		}
630 		break;
631 	case GLSLstd450Acos:
632 		{
633 			auto val = Operand(this, state, insn.word(5));
634 			Decorations d;
635 			ApplyDecorationsForId(&d, insn.word(5));
636 			for(auto i = 0u; i < type.componentCount; i++)
637 			{
638 				dst.move(i, Acos(val.Float(i), d.RelaxedPrecision ? Precision::Relaxed : Precision::Full));
639 			}
640 		}
641 		break;
642 	case GLSLstd450Atan:
643 		{
644 			auto val = Operand(this, state, insn.word(5));
645 			for(auto i = 0u; i < type.componentCount; i++)
646 			{
647 				dst.move(i, Atan(val.Float(i)));
648 			}
649 		}
650 		break;
651 	case GLSLstd450Sinh:
652 		{
653 			auto val = Operand(this, state, insn.word(5));
654 			for(auto i = 0u; i < type.componentCount; i++)
655 			{
656 				dst.move(i, Sinh(val.Float(i)));
657 			}
658 		}
659 		break;
660 	case GLSLstd450Cosh:
661 		{
662 			auto val = Operand(this, state, insn.word(5));
663 			for(auto i = 0u; i < type.componentCount; i++)
664 			{
665 				dst.move(i, Cosh(val.Float(i)));
666 			}
667 		}
668 		break;
669 	case GLSLstd450Tanh:
670 		{
671 			auto val = Operand(this, state, insn.word(5));
672 			for(auto i = 0u; i < type.componentCount; i++)
673 			{
674 				dst.move(i, Tanh(val.Float(i)));
675 			}
676 		}
677 		break;
678 	case GLSLstd450Asinh:
679 		{
680 			auto val = Operand(this, state, insn.word(5));
681 			for(auto i = 0u; i < type.componentCount; i++)
682 			{
683 				dst.move(i, Asinh(val.Float(i)));
684 			}
685 		}
686 		break;
687 	case GLSLstd450Acosh:
688 		{
689 			auto val = Operand(this, state, insn.word(5));
690 			for(auto i = 0u; i < type.componentCount; i++)
691 			{
692 				dst.move(i, Acosh(val.Float(i)));
693 			}
694 		}
695 		break;
696 	case GLSLstd450Atanh:
697 		{
698 			auto val = Operand(this, state, insn.word(5));
699 			for(auto i = 0u; i < type.componentCount; i++)
700 			{
701 				dst.move(i, Atanh(val.Float(i)));
702 			}
703 		}
704 		break;
705 	case GLSLstd450Atan2:
706 		{
707 			auto x = Operand(this, state, insn.word(5));
708 			auto y = Operand(this, state, insn.word(6));
709 			for(auto i = 0u; i < type.componentCount; i++)
710 			{
711 				dst.move(i, Atan2(x.Float(i), y.Float(i)));
712 			}
713 		}
714 		break;
715 	case GLSLstd450Pow:
716 		{
717 			auto x = Operand(this, state, insn.word(5));
718 			auto y = Operand(this, state, insn.word(6));
719 			for(auto i = 0u; i < type.componentCount; i++)
720 			{
721 				dst.move(i, Pow(x.Float(i), y.Float(i)));
722 			}
723 		}
724 		break;
725 	case GLSLstd450Exp:
726 		{
727 			auto val = Operand(this, state, insn.word(5));
728 			for(auto i = 0u; i < type.componentCount; i++)
729 			{
730 				dst.move(i, Exp(val.Float(i)));
731 			}
732 		}
733 		break;
734 	case GLSLstd450Log:
735 		{
736 			auto val = Operand(this, state, insn.word(5));
737 			for(auto i = 0u; i < type.componentCount; i++)
738 			{
739 				dst.move(i, Log(val.Float(i)));
740 			}
741 		}
742 		break;
743 	case GLSLstd450Exp2:
744 		{
745 			auto val = Operand(this, state, insn.word(5));
746 			for(auto i = 0u; i < type.componentCount; i++)
747 			{
748 				dst.move(i, Exp2(val.Float(i)));
749 			}
750 		}
751 		break;
752 	case GLSLstd450Log2:
753 		{
754 			auto val = Operand(this, state, insn.word(5));
755 			for(auto i = 0u; i < type.componentCount; i++)
756 			{
757 				dst.move(i, Log2(val.Float(i)));
758 			}
759 		}
760 		break;
761 	case GLSLstd450Sqrt:
762 		{
763 			auto val = Operand(this, state, insn.word(5));
764 			for(auto i = 0u; i < type.componentCount; i++)
765 			{
766 				dst.move(i, Sqrt(val.Float(i)));
767 			}
768 		}
769 		break;
770 	case GLSLstd450InverseSqrt:
771 		{
772 			auto val = Operand(this, state, insn.word(5));
773 			Decorations d;
774 			ApplyDecorationsForId(&d, insn.word(5));
775 
776 			for(auto i = 0u; i < type.componentCount; i++)
777 			{
778 				dst.move(i, RcpSqrt(val.Float(i), d.RelaxedPrecision ? Precision::Relaxed : Precision::Full));
779 			}
780 		}
781 		break;
782 	case GLSLstd450Determinant:
783 		{
784 			auto mat = Operand(this, state, insn.word(5));
785 
786 			switch(mat.componentCount)
787 			{
788 			case 4:  // 2x2
789 				dst.move(0, Determinant(
790 				                mat.Float(0), mat.Float(1),
791 				                mat.Float(2), mat.Float(3)));
792 				break;
793 			case 9:  // 3x3
794 				dst.move(0, Determinant(
795 				                mat.Float(0), mat.Float(1), mat.Float(2),
796 				                mat.Float(3), mat.Float(4), mat.Float(5),
797 				                mat.Float(6), mat.Float(7), mat.Float(8)));
798 				break;
799 			case 16:  // 4x4
800 				dst.move(0, Determinant(
801 				                mat.Float(0), mat.Float(1), mat.Float(2), mat.Float(3),
802 				                mat.Float(4), mat.Float(5), mat.Float(6), mat.Float(7),
803 				                mat.Float(8), mat.Float(9), mat.Float(10), mat.Float(11),
804 				                mat.Float(12), mat.Float(13), mat.Float(14), mat.Float(15)));
805 				break;
806 			default:
807 				UNREACHABLE("GLSLstd450Determinant can only operate with square matrices. Got %d elements", int(mat.componentCount));
808 			}
809 		}
810 		break;
811 	case GLSLstd450MatrixInverse:
812 		{
813 			auto mat = Operand(this, state, insn.word(5));
814 
815 			switch(mat.componentCount)
816 			{
817 			case 4:  // 2x2
818 				{
819 					auto inv = MatrixInverse(
820 					    mat.Float(0), mat.Float(1),
821 					    mat.Float(2), mat.Float(3));
822 					for(uint32_t i = 0; i < inv.size(); i++)
823 					{
824 						dst.move(i, inv[i]);
825 					}
826 				}
827 				break;
828 			case 9:  // 3x3
829 				{
830 					auto inv = MatrixInverse(
831 					    mat.Float(0), mat.Float(1), mat.Float(2),
832 					    mat.Float(3), mat.Float(4), mat.Float(5),
833 					    mat.Float(6), mat.Float(7), mat.Float(8));
834 					for(uint32_t i = 0; i < inv.size(); i++)
835 					{
836 						dst.move(i, inv[i]);
837 					}
838 				}
839 				break;
840 			case 16:  // 4x4
841 				{
842 					auto inv = MatrixInverse(
843 					    mat.Float(0), mat.Float(1), mat.Float(2), mat.Float(3),
844 					    mat.Float(4), mat.Float(5), mat.Float(6), mat.Float(7),
845 					    mat.Float(8), mat.Float(9), mat.Float(10), mat.Float(11),
846 					    mat.Float(12), mat.Float(13), mat.Float(14), mat.Float(15));
847 					for(uint32_t i = 0; i < inv.size(); i++)
848 					{
849 						dst.move(i, inv[i]);
850 					}
851 				}
852 				break;
853 			default:
854 				UNREACHABLE("GLSLstd450MatrixInverse can only operate with square matrices. Got %d elements", int(mat.componentCount));
855 			}
856 		}
857 		break;
858 	case GLSLstd450IMix:
859 		{
860 			UNREACHABLE("GLSLstd450IMix has been removed from the specification");
861 		}
862 		break;
863 	case GLSLstd450PackDouble2x32:
864 		{
865 			UNSUPPORTED("SPIR-V Float64 Capability (GLSLstd450PackDouble2x32)");
866 		}
867 		break;
868 	case GLSLstd450UnpackDouble2x32:
869 		{
870 			UNSUPPORTED("SPIR-V Float64 Capability (GLSLstd450UnpackDouble2x32)");
871 		}
872 		break;
873 	case GLSLstd450FindILsb:
874 		{
875 			auto val = Operand(this, state, insn.word(5));
876 			for(auto i = 0u; i < type.componentCount; i++)
877 			{
878 				auto v = val.UInt(i);
879 				dst.move(i, Cttz(v, true) | CmpEQ(v, SIMD::UInt(0)));
880 			}
881 		}
882 		break;
883 	case GLSLstd450FindSMsb:
884 		{
885 			auto val = Operand(this, state, insn.word(5));
886 			for(auto i = 0u; i < type.componentCount; i++)
887 			{
888 				auto v = val.UInt(i) ^ As<SIMD::UInt>(CmpLT(val.Int(i), SIMD::Int(0)));
889 				dst.move(i, SIMD::UInt(31) - Ctlz(v, false));
890 			}
891 		}
892 		break;
893 	case GLSLstd450FindUMsb:
894 		{
895 			auto val = Operand(this, state, insn.word(5));
896 			for(auto i = 0u; i < type.componentCount; i++)
897 			{
898 				dst.move(i, SIMD::UInt(31) - Ctlz(val.UInt(i), false));
899 			}
900 		}
901 		break;
902 	case GLSLstd450InterpolateAtCentroid:
903 		{
904 			Decorations d;
905 			ApplyDecorationsForId(&d, insn.word(5));
906 			auto ptr = state->getPointer(insn.word(5));
907 			for(auto i = 0u; i < type.componentCount; i++)
908 			{
909 				dst.move(i, Interpolate(ptr, d.Location, 0, i, state, SpirvShader::Centroid));
910 			}
911 		}
912 		break;
913 	case GLSLstd450InterpolateAtSample:
914 		{
915 			Decorations d;
916 			ApplyDecorationsForId(&d, insn.word(5));
917 			auto ptr = state->getPointer(insn.word(5));
918 			for(auto i = 0u; i < type.componentCount; i++)
919 			{
920 				dst.move(i, Interpolate(ptr, d.Location, insn.word(6), i, state, SpirvShader::AtSample));
921 			}
922 		}
923 		break;
924 	case GLSLstd450InterpolateAtOffset:
925 		{
926 			Decorations d;
927 			ApplyDecorationsForId(&d, insn.word(5));
928 			auto ptr = state->getPointer(insn.word(5));
929 			for(auto i = 0u; i < type.componentCount; i++)
930 			{
931 				dst.move(i, Interpolate(ptr, d.Location, insn.word(6), i, state, SpirvShader::AtOffset));
932 			}
933 		}
934 		break;
935 	case GLSLstd450NMin:
936 		{
937 			auto x = Operand(this, state, insn.word(5));
938 			auto y = Operand(this, state, insn.word(6));
939 			for(auto i = 0u; i < type.componentCount; i++)
940 			{
941 				dst.move(i, NMin(x.Float(i), y.Float(i)));
942 			}
943 		}
944 		break;
945 	case GLSLstd450NMax:
946 		{
947 			auto x = Operand(this, state, insn.word(5));
948 			auto y = Operand(this, state, insn.word(6));
949 			for(auto i = 0u; i < type.componentCount; i++)
950 			{
951 				dst.move(i, NMax(x.Float(i), y.Float(i)));
952 			}
953 		}
954 		break;
955 	case GLSLstd450NClamp:
956 		{
957 			auto x = Operand(this, state, insn.word(5));
958 			auto minVal = Operand(this, state, insn.word(6));
959 			auto maxVal = Operand(this, state, insn.word(7));
960 			for(auto i = 0u; i < type.componentCount; i++)
961 			{
962 				auto clamp = NMin(NMax(x.Float(i), minVal.Float(i)), maxVal.Float(i));
963 				dst.move(i, clamp);
964 			}
965 		}
966 		break;
967 	default:
968 		UNREACHABLE("ExtInst %d", int(extInstIndex));
969 		break;
970 	}
971 
972 	return EmitResult::Continue;
973 }
974 
Interpolate(SIMD::Pointer const & ptr,int32_t location,Object::ID paramId,uint32_t component,EmitState * state,InterpolationType type) const975 SIMD::Float SpirvShader::Interpolate(SIMD::Pointer const &ptr, int32_t location, Object::ID paramId,
976                                      uint32_t component, EmitState *state, InterpolationType type) const
977 {
978 	uint32_t interpolant = (location * 4);
979 	uint32_t components_per_row = GetNumInputComponents(location);
980 	if((location < 0) || (interpolant >= inputs.size()) || (components_per_row == 0))
981 	{
982 		return SIMD::Float(0.0f);
983 	}
984 
985 	const auto &interpolationData = state->routine->interpolationData;
986 
987 	SIMD::Float x;
988 	SIMD::Float y;
989 	SIMD::Float rhw;
990 
991 	switch(type)
992 	{
993 	case Centroid:
994 		x = interpolationData.xCentroid;
995 		y = interpolationData.yCentroid;
996 		rhw = interpolationData.rhwCentroid;
997 		break;
998 	case AtSample:
999 		x = SIMD::Float(0.0f);
1000 		y = SIMD::Float(0.0f);
1001 
1002 		if(state->getMultiSampleCount() > 1)
1003 		{
1004 			static constexpr int NUM_SAMPLES = 4;
1005 			ASSERT(state->getMultiSampleCount() == NUM_SAMPLES);
1006 
1007 			Array<Float> sampleX(NUM_SAMPLES);
1008 			Array<Float> sampleY(NUM_SAMPLES);
1009 			for(int i = 0; i < NUM_SAMPLES; ++i)
1010 			{
1011 				sampleX[i] = Constants::SampleLocationsX[i];
1012 				sampleY[i] = Constants::SampleLocationsY[i];
1013 			}
1014 
1015 			auto sampleOperand = Operand(this, state, paramId);
1016 			ASSERT(sampleOperand.componentCount == 1);
1017 
1018 			// If sample does not exist, the position used to interpolate the
1019 			// input variable is undefined, so we just clamp to avoid OOB accesses.
1020 			SIMD::Int samples = sampleOperand.Int(0) & SIMD::Int(NUM_SAMPLES - 1);
1021 
1022 			for(int i = 0; i < SIMD::Width; ++i)
1023 			{
1024 				Int sample = Extract(samples, i);
1025 				x = Insert(x, sampleX[sample], i);
1026 				y = Insert(y, sampleY[sample], i);
1027 			}
1028 		}
1029 
1030 		x += interpolationData.x;
1031 		y += interpolationData.y;
1032 		rhw = interpolationData.rhw;
1033 		break;
1034 	case AtOffset:
1035 		{
1036 			//  An offset of (0, 0) identifies the center of the pixel.
1037 			auto offset = Operand(this, state, paramId);
1038 			ASSERT(offset.componentCount == 2);
1039 
1040 			x = interpolationData.x + offset.Float(0);
1041 			y = interpolationData.y + offset.Float(1);
1042 			rhw = interpolationData.rhw;
1043 		}
1044 		break;
1045 	default:
1046 		UNREACHABLE("Unknown interpolation type: %d", (int)type);
1047 		return SIMD::Float(0.0f);
1048 	}
1049 
1050 	uint32_t packedInterpolant = GetPackedInterpolant(location);
1051 	Pointer<Byte> planeEquation = interpolationData.primitive + OFFSET(Primitive, V[packedInterpolant]);
1052 	if(ptr.hasDynamicOffsets)
1053 	{
1054 		// This code assumes all dynamic offsets are equal
1055 		Int offset = ((Extract(ptr.dynamicOffsets, 0) + ptr.staticOffsets[0]) >> 2) + component;
1056 		offset = Min(offset, Int(inputs.size() - interpolant - 1));
1057 		planeEquation += (offset * sizeof(PlaneEquation));
1058 	}
1059 	else
1060 	{
1061 		ASSERT(ptr.hasStaticEqualOffsets());
1062 
1063 		uint32_t offset = (ptr.staticOffsets[0] >> 2) + component;
1064 		if((interpolant + offset) >= inputs.size())
1065 		{
1066 			return SIMD::Float(0.0f);
1067 		}
1068 		planeEquation += offset * sizeof(PlaneEquation);
1069 	}
1070 
1071 	return SpirvRoutine::interpolateAtXY(x, y, rhw, planeEquation, false, true);
1072 }
1073 
interpolateAtXY(const SIMD::Float & x,const SIMD::Float & y,const SIMD::Float & rhw,Pointer<Byte> planeEquation,bool flat,bool perspective)1074 SIMD::Float SpirvRoutine::interpolateAtXY(const SIMD::Float &x, const SIMD::Float &y, const SIMD::Float &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective)
1075 {
1076 	SIMD::Float A;
1077 	SIMD::Float B;
1078 	SIMD::Float C = *Pointer<SIMD::Float>(planeEquation + OFFSET(PlaneEquation, C), 16);
1079 
1080 	if(!flat)
1081 	{
1082 		A = *Pointer<SIMD::Float>(planeEquation + OFFSET(PlaneEquation, A), 16);
1083 		B = *Pointer<SIMD::Float>(planeEquation + OFFSET(PlaneEquation, B), 16);
1084 	}
1085 
1086 	return ::Interpolate(x, y, rhw, A, B, C, flat, perspective);
1087 }
1088 
1089 }  // namespace sw