// Copyright 2019 The SwiftShader Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "SpirvShader.hpp" #include namespace sw { struct SpirvShader::Impl::Group { // Template function to perform a binary operation. // |TYPE| should be the type of the binary operation (as a SIMD::). // |I| should be a type suitable to initialize the identity value. // |APPLY| should be a callable object that takes two RValue parameters // and returns a new RValue corresponding to the operation's result. template static void BinaryOperation( const SpirvShader *shader, const SpirvShader::InsnIterator &insn, const SpirvShader::EmitState *state, Intermediate &dst, const I identityValue, APPLY &&apply) { SpirvShader::Operand value(shader, state, insn.word(5)); auto &type = shader->getType(SpirvShader::Type::ID(insn.word(1))); for(auto i = 0u; i < type.componentCount; i++) { auto mask = As(state->activeLaneMask()); // Considers helper invocations active. See b/151137030 auto identity = TYPE(identityValue); SIMD::UInt v_uint = (value.UInt(i) & mask) | (As(identity) & ~mask); TYPE v = As(v_uint); switch(spv::GroupOperation(insn.word(4))) { case spv::GroupOperationReduce: { // NOTE: floating-point add and multiply are not really commutative so // ensure that all values in the final lanes are identical TYPE v2 = apply(v.xxzz, v.yyww); // [xy] [xy] [zw] [zw] TYPE v3 = apply(v2.xxxx, v2.zzzz); // [xyzw] [xyzw] [xyzw] [xyzw] dst.move(i, v3); break; } case spv::GroupOperationInclusiveScan: { TYPE v2 = apply(v, Shuffle(v, identity, 0x4012) /* [id, v.y, v.z, v.w] */); // [x] [xy] [yz] [zw] TYPE v3 = apply(v2, Shuffle(v2, identity, 0x4401) /* [id, id, v2.x, v2.y] */); // [x] [xy] [xyz] [xyzw] dst.move(i, v3); break; } case spv::GroupOperationExclusiveScan: { TYPE v2 = apply(v, Shuffle(v, identity, 0x4012) /* [id, v.y, v.z, v.w] */); // [x] [xy] [yz] [zw] TYPE v3 = apply(v2, Shuffle(v2, identity, 0x4401) /* [id, id, v2.x, v2.y] */); // [x] [xy] [xyz] [xyzw] auto v4 = Shuffle(v3, identity, 0x4012 /* [id, v3.x, v3.y, v3.z] */); // [i] [x] [xy] [xyz] dst.move(i, v4); break; } default: UNSUPPORTED("EmitGroupNonUniform op: %s Group operation: %d", SpirvShader::OpcodeName(type.opcode()), insn.word(4)); } } } }; SpirvShader::EmitResult SpirvShader::EmitGroupNonUniform(InsnIterator insn, EmitState *state) const { static_assert(SIMD::Width == 4, "EmitGroupNonUniform makes many assumptions that the SIMD vector width is 4"); auto &type = getType(Type::ID(insn.word(1))); Object::ID resultId = insn.word(2); auto scope = spv::Scope(GetConstScalarInt(insn.word(3))); ASSERT_MSG(scope == spv::ScopeSubgroup, "Scope for Non Uniform Group Operations must be Subgroup for Vulkan 1.1"); auto &dst = state->createIntermediate(resultId, type.componentCount); switch(insn.opcode()) { case spv::OpGroupNonUniformElect: { // Result is true only in the active invocation with the lowest id // in the group, otherwise result is false. SIMD::Int active = state->activeLaneMask(); // Considers helper invocations active. See b/151137030 // TODO: Would be nice if we could write this as: // elect = active & ~(active.Oxyz | active.OOxy | active.OOOx) auto v0111 = SIMD::Int(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); auto elect = active & ~(v0111 & (active.xxyz | active.xxxy | active.xxxx)); dst.move(0, elect); break; } case spv::OpGroupNonUniformAll: { Operand predicate(this, state, insn.word(4)); dst.move(0, AndAll(predicate.UInt(0) | ~As(state->activeLaneMask()))); // Considers helper invocations active. See b/151137030 break; } case spv::OpGroupNonUniformAny: { Operand predicate(this, state, insn.word(4)); dst.move(0, OrAll(predicate.UInt(0) & As(state->activeLaneMask()))); // Considers helper invocations active. See b/151137030 break; } case spv::OpGroupNonUniformAllEqual: { Operand value(this, state, insn.word(4)); auto res = SIMD::UInt(0xffffffff); SIMD::UInt active = As(state->activeLaneMask()); // Considers helper invocations active. See b/151137030 SIMD::UInt inactive = ~active; for(auto i = 0u; i < type.componentCount; i++) { SIMD::UInt v = value.UInt(i) & active; SIMD::UInt filled = v; for(int j = 0; j < SIMD::Width - 1; j++) { filled |= filled.yzwx & inactive; // Populate inactive 'holes' with a live value } res &= AndAll(CmpEQ(filled.xyzw, filled.yzwx)); } dst.move(0, res); break; } case spv::OpGroupNonUniformBroadcast: { auto valueId = Object::ID(insn.word(4)); auto idId = Object::ID(insn.word(5)); Operand value(this, state, valueId); // Decide between the fast path for constants and the slow path for // intermediates. if(getObject(idId).kind == SpirvShader::Object::Kind::Constant) { auto id = SIMD::Int(GetConstScalarInt(insn.word(5))); auto mask = CmpEQ(id, SIMD::Int(0, 1, 2, 3)); for(auto i = 0u; i < type.componentCount; i++) { dst.move(i, OrAll(value.Int(i) & mask)); } } else { Operand id(this, state, idId); SIMD::UInt active = As(state->activeLaneMask()); // Considers helper invocations active. See b/151137030 SIMD::UInt inactive = ~active; SIMD::UInt filled = id.UInt(0) & active; for(int j = 0; j < SIMD::Width - 1; j++) { filled |= filled.yzwx & inactive; // Populate inactive 'holes' with a live value } auto mask = CmpEQ(filled, SIMD::UInt(0, 1, 2, 3)); for(uint32_t i = 0u; i < type.componentCount; i++) { dst.move(i, OrAll(value.UInt(i) & mask)); } } break; } case spv::OpGroupNonUniformBroadcastFirst: { auto valueId = Object::ID(insn.word(4)); Operand value(this, state, valueId); // Result is true only in the active invocation with the lowest id // in the group, otherwise result is false. SIMD::Int active = state->activeLaneMask(); // Considers helper invocations active. See b/151137030 // TODO: Would be nice if we could write this as: // elect = active & ~(active.Oxyz | active.OOxy | active.OOOx) auto v0111 = SIMD::Int(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); auto elect = active & ~(v0111 & (active.xxyz | active.xxxy | active.xxxx)); for(auto i = 0u; i < type.componentCount; i++) { dst.move(i, OrAll(value.Int(i) & elect)); } break; } case spv::OpGroupNonUniformBallot: { ASSERT(type.componentCount == 4); Operand predicate(this, state, insn.word(4)); dst.move(0, SIMD::Int(SignMask(state->activeLaneMask() & predicate.Int(0)))); // Considers helper invocations active. See b/151137030 dst.move(1, SIMD::Int(0)); dst.move(2, SIMD::Int(0)); dst.move(3, SIMD::Int(0)); break; } case spv::OpGroupNonUniformInverseBallot: { auto valueId = Object::ID(insn.word(4)); ASSERT(type.componentCount == 1); ASSERT(getType(getObject(valueId)).componentCount == 4); Operand value(this, state, valueId); auto bit = (value.Int(0) >> SIMD::Int(0, 1, 2, 3)) & SIMD::Int(1); dst.move(0, -bit); break; } case spv::OpGroupNonUniformBallotBitExtract: { auto valueId = Object::ID(insn.word(4)); auto indexId = Object::ID(insn.word(5)); ASSERT(type.componentCount == 1); ASSERT(getType(getObject(valueId)).componentCount == 4); ASSERT(getType(getObject(indexId)).componentCount == 1); Operand value(this, state, valueId); Operand index(this, state, indexId); auto vecIdx = index.Int(0) / SIMD::Int(32); auto bitIdx = index.Int(0) & SIMD::Int(31); auto bits = (value.Int(0) & CmpEQ(vecIdx, SIMD::Int(0))) | (value.Int(1) & CmpEQ(vecIdx, SIMD::Int(1))) | (value.Int(2) & CmpEQ(vecIdx, SIMD::Int(2))) | (value.Int(3) & CmpEQ(vecIdx, SIMD::Int(3))); dst.move(0, -((bits >> bitIdx) & SIMD::Int(1))); break; } case spv::OpGroupNonUniformBallotBitCount: { auto operation = spv::GroupOperation(insn.word(4)); auto valueId = Object::ID(insn.word(5)); ASSERT(type.componentCount == 1); ASSERT(getType(getObject(valueId)).componentCount == 4); Operand value(this, state, valueId); switch(operation) { case spv::GroupOperationReduce: dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(15))); break; case spv::GroupOperationInclusiveScan: dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(1, 3, 7, 15))); break; case spv::GroupOperationExclusiveScan: dst.move(0, CountBits(value.UInt(0) & SIMD::UInt(0, 1, 3, 7))); break; default: UNSUPPORTED("GroupOperation %d", int(operation)); } break; } case spv::OpGroupNonUniformBallotFindLSB: { auto valueId = Object::ID(insn.word(4)); ASSERT(type.componentCount == 1); ASSERT(getType(getObject(valueId)).componentCount == 4); Operand value(this, state, valueId); dst.move(0, Cttz(value.UInt(0) & SIMD::UInt(15), true)); break; } case spv::OpGroupNonUniformBallotFindMSB: { auto valueId = Object::ID(insn.word(4)); ASSERT(type.componentCount == 1); ASSERT(getType(getObject(valueId)).componentCount == 4); Operand value(this, state, valueId); dst.move(0, SIMD::UInt(31) - Ctlz(value.UInt(0) & SIMD::UInt(15), false)); break; } case spv::OpGroupNonUniformShuffle: { Operand value(this, state, insn.word(4)); Operand id(this, state, insn.word(5)); auto x = CmpEQ(SIMD::Int(0), id.Int(0)); auto y = CmpEQ(SIMD::Int(1), id.Int(0)); auto z = CmpEQ(SIMD::Int(2), id.Int(0)); auto w = CmpEQ(SIMD::Int(3), id.Int(0)); for(auto i = 0u; i < type.componentCount; i++) { SIMD::Int v = value.Int(i); dst.move(i, (x & v.xxxx) | (y & v.yyyy) | (z & v.zzzz) | (w & v.wwww)); } break; } case spv::OpGroupNonUniformShuffleXor: { Operand value(this, state, insn.word(4)); Operand mask(this, state, insn.word(5)); auto x = CmpEQ(SIMD::Int(0), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0)); auto y = CmpEQ(SIMD::Int(1), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0)); auto z = CmpEQ(SIMD::Int(2), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0)); auto w = CmpEQ(SIMD::Int(3), SIMD::Int(0, 1, 2, 3) ^ mask.Int(0)); for(auto i = 0u; i < type.componentCount; i++) { SIMD::Int v = value.Int(i); dst.move(i, (x & v.xxxx) | (y & v.yyyy) | (z & v.zzzz) | (w & v.wwww)); } break; } case spv::OpGroupNonUniformShuffleUp: { Operand value(this, state, insn.word(4)); Operand delta(this, state, insn.word(5)); auto d0 = CmpEQ(SIMD::Int(0), delta.Int(0)); auto d1 = CmpEQ(SIMD::Int(1), delta.Int(0)); auto d2 = CmpEQ(SIMD::Int(2), delta.Int(0)); auto d3 = CmpEQ(SIMD::Int(3), delta.Int(0)); for(auto i = 0u; i < type.componentCount; i++) { SIMD::Int v = value.Int(i); dst.move(i, (d0 & v.xyzw) | (d1 & v.xxyz) | (d2 & v.xxxy) | (d3 & v.xxxx)); } break; } case spv::OpGroupNonUniformShuffleDown: { Operand value(this, state, insn.word(4)); Operand delta(this, state, insn.word(5)); auto d0 = CmpEQ(SIMD::Int(0), delta.Int(0)); auto d1 = CmpEQ(SIMD::Int(1), delta.Int(0)); auto d2 = CmpEQ(SIMD::Int(2), delta.Int(0)); auto d3 = CmpEQ(SIMD::Int(3), delta.Int(0)); for(auto i = 0u; i < type.componentCount; i++) { SIMD::Int v = value.Int(i); dst.move(i, (d0 & v.xyzw) | (d1 & v.yzww) | (d2 & v.zwww) | (d3 & v.wwww)); } break; } case spv::OpGroupNonUniformIAdd: Impl::Group::BinaryOperation( this, insn, state, dst, 0, [](auto a, auto b) { return a + b; }); break; case spv::OpGroupNonUniformFAdd: Impl::Group::BinaryOperation( this, insn, state, dst, 0.0f, [](auto a, auto b) { return a + b; }); break; case spv::OpGroupNonUniformIMul: Impl::Group::BinaryOperation( this, insn, state, dst, 1, [](auto a, auto b) { return a * b; }); break; case spv::OpGroupNonUniformFMul: Impl::Group::BinaryOperation( this, insn, state, dst, 1.0f, [](auto a, auto b) { return a * b; }); break; case spv::OpGroupNonUniformBitwiseAnd: Impl::Group::BinaryOperation( this, insn, state, dst, ~0u, [](auto a, auto b) { return a & b; }); break; case spv::OpGroupNonUniformBitwiseOr: Impl::Group::BinaryOperation( this, insn, state, dst, 0, [](auto a, auto b) { return a | b; }); break; case spv::OpGroupNonUniformBitwiseXor: Impl::Group::BinaryOperation( this, insn, state, dst, 0, [](auto a, auto b) { return a ^ b; }); break; case spv::OpGroupNonUniformSMin: Impl::Group::BinaryOperation( this, insn, state, dst, INT32_MAX, [](auto a, auto b) { return Min(a, b); }); break; case spv::OpGroupNonUniformUMin: Impl::Group::BinaryOperation( this, insn, state, dst, ~0u, [](auto a, auto b) { return Min(a, b); }); break; case spv::OpGroupNonUniformFMin: Impl::Group::BinaryOperation( this, insn, state, dst, SIMD::Float::infinity(), [](auto a, auto b) { return NMin(a, b); }); break; case spv::OpGroupNonUniformSMax: Impl::Group::BinaryOperation( this, insn, state, dst, INT32_MIN, [](auto a, auto b) { return Max(a, b); }); break; case spv::OpGroupNonUniformUMax: Impl::Group::BinaryOperation( this, insn, state, dst, 0, [](auto a, auto b) { return Max(a, b); }); break; case spv::OpGroupNonUniformFMax: Impl::Group::BinaryOperation( this, insn, state, dst, -SIMD::Float::infinity(), [](auto a, auto b) { return NMax(a, b); }); break; case spv::OpGroupNonUniformLogicalAnd: Impl::Group::BinaryOperation( this, insn, state, dst, ~0u, [](auto a, auto b) { SIMD::UInt zero = SIMD::UInt(0); return CmpNEQ(a, zero) & CmpNEQ(b, zero); }); break; case spv::OpGroupNonUniformLogicalOr: Impl::Group::BinaryOperation( this, insn, state, dst, 0, [](auto a, auto b) { SIMD::UInt zero = SIMD::UInt(0); return CmpNEQ(a, zero) | CmpNEQ(b, zero); }); break; case spv::OpGroupNonUniformLogicalXor: Impl::Group::BinaryOperation( this, insn, state, dst, 0, [](auto a, auto b) { SIMD::UInt zero = SIMD::UInt(0); return CmpNEQ(a, zero) ^ CmpNEQ(b, zero); }); break; default: UNSUPPORTED("EmitGroupNonUniform op: %s", OpcodeName(type.opcode())); } return EmitResult::Continue; } } // namespace sw