• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "SpirvShader.hpp"
16 #include "SpirvShaderDebug.hpp"
17 
18 #include "ShaderCore.hpp"
19 
20 #include <spirv/unified1/spirv.hpp>
21 
22 namespace sw {
23 
EmitVectorTimesScalar(InsnIterator insn,EmitState * state) const24 SpirvShader::EmitResult SpirvShader::EmitVectorTimesScalar(InsnIterator insn, EmitState *state) const
25 {
26 	auto &type = getType(insn.resultTypeId());
27 	auto &dst = state->createIntermediate(insn.resultId(), type.componentCount);
28 	auto lhs = Operand(this, state, insn.word(3));
29 	auto rhs = Operand(this, state, insn.word(4));
30 
31 	for(auto i = 0u; i < type.componentCount; i++)
32 	{
33 		dst.move(i, lhs.Float(i) * rhs.Float(0));
34 	}
35 
36 	return EmitResult::Continue;
37 }
38 
EmitMatrixTimesVector(InsnIterator insn,EmitState * state) const39 SpirvShader::EmitResult SpirvShader::EmitMatrixTimesVector(InsnIterator insn, EmitState *state) const
40 {
41 	auto &type = getType(insn.resultTypeId());
42 	auto &dst = state->createIntermediate(insn.resultId(), type.componentCount);
43 	auto lhs = Operand(this, state, insn.word(3));
44 	auto rhs = Operand(this, state, insn.word(4));
45 
46 	for(auto i = 0u; i < type.componentCount; i++)
47 	{
48 		SIMD::Float v = lhs.Float(i) * rhs.Float(0);
49 		for(auto j = 1u; j < rhs.componentCount; j++)
50 		{
51 			v += lhs.Float(i + type.componentCount * j) * rhs.Float(j);
52 		}
53 		dst.move(i, v);
54 	}
55 
56 	return EmitResult::Continue;
57 }
58 
EmitVectorTimesMatrix(InsnIterator insn,EmitState * state) const59 SpirvShader::EmitResult SpirvShader::EmitVectorTimesMatrix(InsnIterator insn, EmitState *state) const
60 {
61 	auto &type = getType(insn.resultTypeId());
62 	auto &dst = state->createIntermediate(insn.resultId(), type.componentCount);
63 	auto lhs = Operand(this, state, insn.word(3));
64 	auto rhs = Operand(this, state, insn.word(4));
65 
66 	for(auto i = 0u; i < type.componentCount; i++)
67 	{
68 		SIMD::Float v = lhs.Float(0) * rhs.Float(i * lhs.componentCount);
69 		for(auto j = 1u; j < lhs.componentCount; j++)
70 		{
71 			v += lhs.Float(j) * rhs.Float(i * lhs.componentCount + j);
72 		}
73 		dst.move(i, v);
74 	}
75 
76 	return EmitResult::Continue;
77 }
78 
EmitMatrixTimesMatrix(InsnIterator insn,EmitState * state) const79 SpirvShader::EmitResult SpirvShader::EmitMatrixTimesMatrix(InsnIterator insn, EmitState *state) const
80 {
81 	auto &type = getType(insn.resultTypeId());
82 	auto &dst = state->createIntermediate(insn.resultId(), type.componentCount);
83 	auto lhs = Operand(this, state, insn.word(3));
84 	auto rhs = Operand(this, state, insn.word(4));
85 
86 	auto numColumns = type.definition.word(3);
87 	auto numRows = getType(type.definition.word(2)).definition.word(3);
88 	auto numAdds = getObjectType(insn.word(3)).definition.word(3);
89 
90 	for(auto row = 0u; row < numRows; row++)
91 	{
92 		for(auto col = 0u; col < numColumns; col++)
93 		{
94 			SIMD::Float v = SIMD::Float(0);
95 			for(auto i = 0u; i < numAdds; i++)
96 			{
97 				v += lhs.Float(i * numRows + row) * rhs.Float(col * numAdds + i);
98 			}
99 			dst.move(numRows * col + row, v);
100 		}
101 	}
102 
103 	return EmitResult::Continue;
104 }
105 
EmitOuterProduct(InsnIterator insn,EmitState * state) const106 SpirvShader::EmitResult SpirvShader::EmitOuterProduct(InsnIterator insn, EmitState *state) const
107 {
108 	auto &type = getType(insn.resultTypeId());
109 	auto &dst = state->createIntermediate(insn.resultId(), type.componentCount);
110 	auto lhs = Operand(this, state, insn.word(3));
111 	auto rhs = Operand(this, state, insn.word(4));
112 
113 	auto numRows = lhs.componentCount;
114 	auto numCols = rhs.componentCount;
115 
116 	for(auto col = 0u; col < numCols; col++)
117 	{
118 		for(auto row = 0u; row < numRows; row++)
119 		{
120 			dst.move(col * numRows + row, lhs.Float(row) * rhs.Float(col));
121 		}
122 	}
123 
124 	return EmitResult::Continue;
125 }
126 
EmitTranspose(InsnIterator insn,EmitState * state) const127 SpirvShader::EmitResult SpirvShader::EmitTranspose(InsnIterator insn, EmitState *state) const
128 {
129 	auto &type = getType(insn.resultTypeId());
130 	auto &dst = state->createIntermediate(insn.resultId(), type.componentCount);
131 	auto mat = Operand(this, state, insn.word(3));
132 
133 	auto numCols = type.definition.word(3);
134 	auto numRows = getType(type.definition.word(2)).componentCount;
135 
136 	for(auto col = 0u; col < numCols; col++)
137 	{
138 		for(auto row = 0u; row < numRows; row++)
139 		{
140 			dst.move(col * numRows + row, mat.Float(row * numCols + col));
141 		}
142 	}
143 
144 	return EmitResult::Continue;
145 }
146 
EmitUnaryOp(InsnIterator insn,EmitState * state) const147 SpirvShader::EmitResult SpirvShader::EmitUnaryOp(InsnIterator insn, EmitState *state) const
148 {
149 	auto &type = getType(insn.resultTypeId());
150 	auto &dst = state->createIntermediate(insn.resultId(), type.componentCount);
151 	auto src = Operand(this, state, insn.word(3));
152 
153 	for(auto i = 0u; i < type.componentCount; i++)
154 	{
155 		switch(insn.opcode())
156 		{
157 		case spv::OpNot:
158 		case spv::OpLogicalNot:  // logical not == bitwise not due to all-bits boolean representation
159 			dst.move(i, ~src.UInt(i));
160 			break;
161 		case spv::OpBitFieldInsert:
162 			{
163 				auto insert = Operand(this, state, insn.word(4)).UInt(i);
164 				auto offset = Operand(this, state, insn.word(5)).UInt(0);
165 				auto count = Operand(this, state, insn.word(6)).UInt(0);
166 				auto one = SIMD::UInt(1);
167 				auto v = src.UInt(i);
168 				auto mask = Bitmask32(offset + count) ^ Bitmask32(offset);
169 				dst.move(i, (v & ~mask) | ((insert << offset) & mask));
170 			}
171 			break;
172 		case spv::OpBitFieldSExtract:
173 		case spv::OpBitFieldUExtract:
174 			{
175 				auto offset = Operand(this, state, insn.word(4)).UInt(0);
176 				auto count = Operand(this, state, insn.word(5)).UInt(0);
177 				auto one = SIMD::UInt(1);
178 				auto v = src.UInt(i);
179 				SIMD::UInt out = (v >> offset) & Bitmask32(count);
180 				if(insn.opcode() == spv::OpBitFieldSExtract)
181 				{
182 					auto sign = out & NthBit32(count - one);
183 					auto sext = ~(sign - one);
184 					out |= sext;
185 				}
186 				dst.move(i, out);
187 			}
188 			break;
189 		case spv::OpBitReverse:
190 			{
191 				// TODO: Add an intrinsic to reactor. Even if there isn't a
192 				// single vector instruction, there may be target-dependent
193 				// ways to make this faster.
194 				// https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
195 				SIMD::UInt v = src.UInt(i);
196 				v = ((v >> 1) & SIMD::UInt(0x55555555)) | ((v & SIMD::UInt(0x55555555)) << 1);
197 				v = ((v >> 2) & SIMD::UInt(0x33333333)) | ((v & SIMD::UInt(0x33333333)) << 2);
198 				v = ((v >> 4) & SIMD::UInt(0x0F0F0F0F)) | ((v & SIMD::UInt(0x0F0F0F0F)) << 4);
199 				v = ((v >> 8) & SIMD::UInt(0x00FF00FF)) | ((v & SIMD::UInt(0x00FF00FF)) << 8);
200 				v = (v >> 16) | (v << 16);
201 				dst.move(i, v);
202 			}
203 			break;
204 		case spv::OpBitCount:
205 			dst.move(i, CountBits(src.UInt(i)));
206 			break;
207 		case spv::OpSNegate:
208 			dst.move(i, -src.Int(i));
209 			break;
210 		case spv::OpFNegate:
211 			dst.move(i, -src.Float(i));
212 			break;
213 		case spv::OpConvertFToU:
214 			dst.move(i, SIMD::UInt(src.Float(i)));
215 			break;
216 		case spv::OpConvertFToS:
217 			dst.move(i, SIMD::Int(src.Float(i)));
218 			break;
219 		case spv::OpConvertSToF:
220 			dst.move(i, SIMD::Float(src.Int(i)));
221 			break;
222 		case spv::OpConvertUToF:
223 			dst.move(i, SIMD::Float(src.UInt(i)));
224 			break;
225 		case spv::OpBitcast:
226 			dst.move(i, src.Float(i));
227 			break;
228 		case spv::OpIsInf:
229 			dst.move(i, IsInf(src.Float(i)));
230 			break;
231 		case spv::OpIsNan:
232 			dst.move(i, IsNan(src.Float(i)));
233 			break;
234 		case spv::OpDPdx:
235 		case spv::OpDPdxCoarse:
236 			// Derivative instructions: FS invocations are laid out like so:
237 			//    0 1
238 			//    2 3
239 			static_assert(SIMD::Width == 4, "All cross-lane instructions will need care when using a different width");
240 			dst.move(i, SIMD::Float(Extract(src.Float(i), 1) - Extract(src.Float(i), 0)));
241 			break;
242 		case spv::OpDPdy:
243 		case spv::OpDPdyCoarse:
244 			dst.move(i, SIMD::Float(Extract(src.Float(i), 2) - Extract(src.Float(i), 0)));
245 			break;
246 		case spv::OpFwidth:
247 		case spv::OpFwidthCoarse:
248 			dst.move(i, SIMD::Float(Abs(Extract(src.Float(i), 1) - Extract(src.Float(i), 0)) + Abs(Extract(src.Float(i), 2) - Extract(src.Float(i), 0))));
249 			break;
250 		case spv::OpDPdxFine:
251 			{
252 				auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
253 				auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
254 				SIMD::Float v = SIMD::Float(firstRow);
255 				v = Insert(v, secondRow, 2);
256 				v = Insert(v, secondRow, 3);
257 				dst.move(i, v);
258 			}
259 			break;
260 		case spv::OpDPdyFine:
261 			{
262 				auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
263 				auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
264 				SIMD::Float v = SIMD::Float(firstColumn);
265 				v = Insert(v, secondColumn, 1);
266 				v = Insert(v, secondColumn, 3);
267 				dst.move(i, v);
268 			}
269 			break;
270 		case spv::OpFwidthFine:
271 			{
272 				auto firstRow = Extract(src.Float(i), 1) - Extract(src.Float(i), 0);
273 				auto secondRow = Extract(src.Float(i), 3) - Extract(src.Float(i), 2);
274 				SIMD::Float dpdx = SIMD::Float(firstRow);
275 				dpdx = Insert(dpdx, secondRow, 2);
276 				dpdx = Insert(dpdx, secondRow, 3);
277 				auto firstColumn = Extract(src.Float(i), 2) - Extract(src.Float(i), 0);
278 				auto secondColumn = Extract(src.Float(i), 3) - Extract(src.Float(i), 1);
279 				SIMD::Float dpdy = SIMD::Float(firstColumn);
280 				dpdy = Insert(dpdy, secondColumn, 1);
281 				dpdy = Insert(dpdy, secondColumn, 3);
282 				dst.move(i, Abs(dpdx) + Abs(dpdy));
283 			}
284 			break;
285 		case spv::OpQuantizeToF16:
286 			{
287 				// Note: keep in sync with the specialization constant version in EvalSpecConstantUnaryOp
288 				auto abs = Abs(src.Float(i));
289 				auto sign = src.Int(i) & SIMD::Int(0x80000000);
290 				auto isZero = CmpLT(abs, SIMD::Float(0.000061035f));
291 				auto isInf = CmpGT(abs, SIMD::Float(65504.0f));
292 				auto isNaN = IsNan(abs);
293 				auto isInfOrNan = isInf | isNaN;
294 				SIMD::Int v = src.Int(i) & SIMD::Int(0xFFFFE000);
295 				v &= ~isZero | SIMD::Int(0x80000000);
296 				v = sign | (isInfOrNan & SIMD::Int(0x7F800000)) | (~isInfOrNan & v);
297 				v |= isNaN & SIMD::Int(0x400000);
298 				dst.move(i, v);
299 			}
300 			break;
301 		default:
302 			UNREACHABLE("%s", OpcodeName(insn.opcode()));
303 		}
304 	}
305 
306 	return EmitResult::Continue;
307 }
308 
EmitBinaryOp(InsnIterator insn,EmitState * state) const309 SpirvShader::EmitResult SpirvShader::EmitBinaryOp(InsnIterator insn, EmitState *state) const
310 {
311 	auto &type = getType(insn.resultTypeId());
312 	auto &dst = state->createIntermediate(insn.resultId(), type.componentCount);
313 	auto &lhsType = getObjectType(insn.word(3));
314 	auto lhs = Operand(this, state, insn.word(3));
315 	auto rhs = Operand(this, state, insn.word(4));
316 
317 	for(auto i = 0u; i < lhsType.componentCount; i++)
318 	{
319 		switch(insn.opcode())
320 		{
321 		case spv::OpIAdd:
322 			dst.move(i, lhs.Int(i) + rhs.Int(i));
323 			break;
324 		case spv::OpISub:
325 			dst.move(i, lhs.Int(i) - rhs.Int(i));
326 			break;
327 		case spv::OpIMul:
328 			dst.move(i, lhs.Int(i) * rhs.Int(i));
329 			break;
330 		case spv::OpSDiv:
331 			{
332 				SIMD::Int a = lhs.Int(i);
333 				SIMD::Int b = rhs.Int(i);
334 				b = b | CmpEQ(b, SIMD::Int(0));                                       // prevent divide-by-zero
335 				a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1)));  // prevent integer overflow
336 				dst.move(i, a / b);
337 			}
338 			break;
339 		case spv::OpUDiv:
340 			{
341 				auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
342 				dst.move(i, lhs.UInt(i) / (rhs.UInt(i) | zeroMask));
343 			}
344 			break;
345 		case spv::OpSRem:
346 			{
347 				SIMD::Int a = lhs.Int(i);
348 				SIMD::Int b = rhs.Int(i);
349 				b = b | CmpEQ(b, SIMD::Int(0));                                       // prevent divide-by-zero
350 				a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1)));  // prevent integer overflow
351 				dst.move(i, a % b);
352 			}
353 			break;
354 		case spv::OpSMod:
355 			{
356 				SIMD::Int a = lhs.Int(i);
357 				SIMD::Int b = rhs.Int(i);
358 				b = b | CmpEQ(b, SIMD::Int(0));                                       // prevent divide-by-zero
359 				a = a | (CmpEQ(a, SIMD::Int(0x80000000)) & CmpEQ(b, SIMD::Int(-1)));  // prevent integer overflow
360 				auto mod = a % b;
361 				// If a and b have opposite signs, the remainder operation takes
362 				// the sign from a but OpSMod is supposed to take the sign of b.
363 				// Adding b will ensure that the result has the correct sign and
364 				// that it is still congruent to a modulo b.
365 				//
366 				// See also http://mathforum.org/library/drmath/view/52343.html
367 				auto signDiff = CmpNEQ(CmpGE(a, SIMD::Int(0)), CmpGE(b, SIMD::Int(0)));
368 				auto fixedMod = mod + (b & CmpNEQ(mod, SIMD::Int(0)) & signDiff);
369 				dst.move(i, As<SIMD::Float>(fixedMod));
370 			}
371 			break;
372 		case spv::OpUMod:
373 			{
374 				auto zeroMask = As<SIMD::UInt>(CmpEQ(rhs.Int(i), SIMD::Int(0)));
375 				dst.move(i, lhs.UInt(i) % (rhs.UInt(i) | zeroMask));
376 			}
377 			break;
378 		case spv::OpIEqual:
379 		case spv::OpLogicalEqual:
380 			dst.move(i, CmpEQ(lhs.Int(i), rhs.Int(i)));
381 			break;
382 		case spv::OpINotEqual:
383 		case spv::OpLogicalNotEqual:
384 			dst.move(i, CmpNEQ(lhs.Int(i), rhs.Int(i)));
385 			break;
386 		case spv::OpUGreaterThan:
387 			dst.move(i, CmpGT(lhs.UInt(i), rhs.UInt(i)));
388 			break;
389 		case spv::OpSGreaterThan:
390 			dst.move(i, CmpGT(lhs.Int(i), rhs.Int(i)));
391 			break;
392 		case spv::OpUGreaterThanEqual:
393 			dst.move(i, CmpGE(lhs.UInt(i), rhs.UInt(i)));
394 			break;
395 		case spv::OpSGreaterThanEqual:
396 			dst.move(i, CmpGE(lhs.Int(i), rhs.Int(i)));
397 			break;
398 		case spv::OpULessThan:
399 			dst.move(i, CmpLT(lhs.UInt(i), rhs.UInt(i)));
400 			break;
401 		case spv::OpSLessThan:
402 			dst.move(i, CmpLT(lhs.Int(i), rhs.Int(i)));
403 			break;
404 		case spv::OpULessThanEqual:
405 			dst.move(i, CmpLE(lhs.UInt(i), rhs.UInt(i)));
406 			break;
407 		case spv::OpSLessThanEqual:
408 			dst.move(i, CmpLE(lhs.Int(i), rhs.Int(i)));
409 			break;
410 		case spv::OpFAdd:
411 			dst.move(i, lhs.Float(i) + rhs.Float(i));
412 			break;
413 		case spv::OpFSub:
414 			dst.move(i, lhs.Float(i) - rhs.Float(i));
415 			break;
416 		case spv::OpFMul:
417 			dst.move(i, lhs.Float(i) * rhs.Float(i));
418 			break;
419 		case spv::OpFDiv:
420 			dst.move(i, lhs.Float(i) / rhs.Float(i));
421 			break;
422 		case spv::OpFMod:
423 			// TODO(b/126873455): inaccurate for values greater than 2^24
424 			dst.move(i, lhs.Float(i) - rhs.Float(i) * Floor(lhs.Float(i) / rhs.Float(i)));
425 			break;
426 		case spv::OpFRem:
427 			dst.move(i, lhs.Float(i) % rhs.Float(i));
428 			break;
429 		case spv::OpFOrdEqual:
430 			dst.move(i, CmpEQ(lhs.Float(i), rhs.Float(i)));
431 			break;
432 		case spv::OpFUnordEqual:
433 			dst.move(i, CmpUEQ(lhs.Float(i), rhs.Float(i)));
434 			break;
435 		case spv::OpFOrdNotEqual:
436 			dst.move(i, CmpNEQ(lhs.Float(i), rhs.Float(i)));
437 			break;
438 		case spv::OpFUnordNotEqual:
439 			dst.move(i, CmpUNEQ(lhs.Float(i), rhs.Float(i)));
440 			break;
441 		case spv::OpFOrdLessThan:
442 			dst.move(i, CmpLT(lhs.Float(i), rhs.Float(i)));
443 			break;
444 		case spv::OpFUnordLessThan:
445 			dst.move(i, CmpULT(lhs.Float(i), rhs.Float(i)));
446 			break;
447 		case spv::OpFOrdGreaterThan:
448 			dst.move(i, CmpGT(lhs.Float(i), rhs.Float(i)));
449 			break;
450 		case spv::OpFUnordGreaterThan:
451 			dst.move(i, CmpUGT(lhs.Float(i), rhs.Float(i)));
452 			break;
453 		case spv::OpFOrdLessThanEqual:
454 			dst.move(i, CmpLE(lhs.Float(i), rhs.Float(i)));
455 			break;
456 		case spv::OpFUnordLessThanEqual:
457 			dst.move(i, CmpULE(lhs.Float(i), rhs.Float(i)));
458 			break;
459 		case spv::OpFOrdGreaterThanEqual:
460 			dst.move(i, CmpGE(lhs.Float(i), rhs.Float(i)));
461 			break;
462 		case spv::OpFUnordGreaterThanEqual:
463 			dst.move(i, CmpUGE(lhs.Float(i), rhs.Float(i)));
464 			break;
465 		case spv::OpShiftRightLogical:
466 			dst.move(i, lhs.UInt(i) >> rhs.UInt(i));
467 			break;
468 		case spv::OpShiftRightArithmetic:
469 			dst.move(i, lhs.Int(i) >> rhs.Int(i));
470 			break;
471 		case spv::OpShiftLeftLogical:
472 			dst.move(i, lhs.UInt(i) << rhs.UInt(i));
473 			break;
474 		case spv::OpBitwiseOr:
475 		case spv::OpLogicalOr:
476 			dst.move(i, lhs.UInt(i) | rhs.UInt(i));
477 			break;
478 		case spv::OpBitwiseXor:
479 			dst.move(i, lhs.UInt(i) ^ rhs.UInt(i));
480 			break;
481 		case spv::OpBitwiseAnd:
482 		case spv::OpLogicalAnd:
483 			dst.move(i, lhs.UInt(i) & rhs.UInt(i));
484 			break;
485 		case spv::OpSMulExtended:
486 			// Extended ops: result is a structure containing two members of the same type as lhs & rhs.
487 			// In our flat view then, component i is the i'th component of the first member;
488 			// component i + N is the i'th component of the second member.
489 			dst.move(i, lhs.Int(i) * rhs.Int(i));
490 			dst.move(i + lhsType.componentCount, MulHigh(lhs.Int(i), rhs.Int(i)));
491 			break;
492 		case spv::OpUMulExtended:
493 			dst.move(i, lhs.UInt(i) * rhs.UInt(i));
494 			dst.move(i + lhsType.componentCount, MulHigh(lhs.UInt(i), rhs.UInt(i)));
495 			break;
496 		case spv::OpIAddCarry:
497 			dst.move(i, lhs.UInt(i) + rhs.UInt(i));
498 			dst.move(i + lhsType.componentCount, CmpLT(dst.UInt(i), lhs.UInt(i)) >> 31);
499 			break;
500 		case spv::OpISubBorrow:
501 			dst.move(i, lhs.UInt(i) - rhs.UInt(i));
502 			dst.move(i + lhsType.componentCount, CmpLT(lhs.UInt(i), rhs.UInt(i)) >> 31);
503 			break;
504 		default:
505 			UNREACHABLE("%s", OpcodeName(insn.opcode()));
506 		}
507 	}
508 
509 	SPIRV_SHADER_DBG("{0}: {1}", insn.word(2), dst);
510 	SPIRV_SHADER_DBG("{0}: {1}", insn.word(3), lhs);
511 	SPIRV_SHADER_DBG("{0}: {1}", insn.word(4), rhs);
512 
513 	return EmitResult::Continue;
514 }
515 
EmitDot(InsnIterator insn,EmitState * state) const516 SpirvShader::EmitResult SpirvShader::EmitDot(InsnIterator insn, EmitState *state) const
517 {
518 	auto &type = getType(insn.resultTypeId());
519 	ASSERT(type.componentCount == 1);
520 	auto &dst = state->createIntermediate(insn.resultId(), type.componentCount);
521 	auto &lhsType = getObjectType(insn.word(3));
522 	auto lhs = Operand(this, state, insn.word(3));
523 	auto rhs = Operand(this, state, insn.word(4));
524 
525 	dst.move(0, Dot(lhsType.componentCount, lhs, rhs));
526 
527 	SPIRV_SHADER_DBG("{0}: {1}", insn.resultId(), dst);
528 	SPIRV_SHADER_DBG("{0}: {1}", insn.word(3), lhs);
529 	SPIRV_SHADER_DBG("{0}: {1}", insn.word(4), rhs);
530 
531 	return EmitResult::Continue;
532 }
533 
Dot(unsigned numComponents,Operand const & x,Operand const & y) const534 SIMD::Float SpirvShader::Dot(unsigned numComponents, Operand const &x, Operand const &y) const
535 {
536 	SIMD::Float d = x.Float(0) * y.Float(0);
537 
538 	for(auto i = 1u; i < numComponents; i++)
539 	{
540 		d += x.Float(i) * y.Float(i);
541 	}
542 
543 	return d;
544 }
545 
Frexp(RValue<SIMD::Float> val) const546 std::pair<SIMD::Float, SIMD::Int> SpirvShader::Frexp(RValue<SIMD::Float> val) const
547 {
548 	// Assumes IEEE 754
549 	auto v = As<SIMD::UInt>(val);
550 	auto isNotZero = CmpNEQ(v & SIMD::UInt(0x7FFFFFFF), SIMD::UInt(0));
551 	auto zeroSign = v & SIMD::UInt(0x80000000) & ~isNotZero;
552 	auto significand = As<SIMD::Float>((((v & SIMD::UInt(0x807FFFFF)) | SIMD::UInt(0x3F000000)) & isNotZero) | zeroSign);
553 	auto exponent = Exponent(val) & SIMD::Int(isNotZero);
554 	return std::make_pair(significand, exponent);
555 }
556 
557 }  // namespace sw